From 51fbd77c171936aed07cf5081741d8e3437d683b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 Jun 2015 13:49:06 +0200 Subject: [PATCH 0001/2091] x86/boot: Fix overflow warning with 32-bit binutils commit 04c17341b42699a5859a8afa05e64ba08a4e5235 upstream. When building the kernel with 32-bit binutils built with support only for the i386 target, we get the following warning: arch/x86/kernel/head_32.S:66: Warning: shift count out of range (32 is not between 0 and 31) The problem is that in that case, binutils' internal type representation is 32-bit wide and the shift range overflows. In order to fix this, manipulate the shift expression which creates the 4GiB constant to not overflow the shift count. Suggested-by: Michael Matz Reported-and-tested-by: Enrico Mioso Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_32.S | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 53eeb226657ca..7e429c99c7285 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -62,9 +62,16 @@ #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif -/* Number of possible pages in the lowmem region */ -LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) - +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) + /* Enough space to fit pagetables for the low memory linear map */ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT From 12709f95fd8ca75ec1c4dbed945751b4d0d1d05f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 May 2015 22:04:25 +0200 Subject: [PATCH 0002/2091] perf: Fix ring_buffer_attach() RCU sync, again commit 2f993cf093643b98477c421fa2b9a98dcc940323 upstream. While looking for other users of get_state/cond_sync. I Found ring_buffer_attach() and it looks obviously buggy? Don't we need to ensure that we have "synchronize" _between_ list_del() and list_add() ? IOW. Suppose that ring_buffer_attach() preempts right_after get_state_synchronize_rcu() and gp completes before spin_lock(). In this case cond_synchronize_rcu() does nothing and we reuse ->rb_entry without waiting for gp in between? It also moves the ->rcu_pending check under "if (rb)", to make it more readable imo. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: der.herr@hofr.at Cc: josh@joshtriplett.org Cc: tj@kernel.org Fixes: b69cf53640da ("perf: Fix a race between ring_buffer_detach() and ring_buffer_attach()") Link: http://lkml.kernel.org/r/20150530200425.GA15748@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index eddf1ed4155ea..0ceb386777ae7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4331,20 +4331,20 @@ static void ring_buffer_attach(struct perf_event *event, WARN_ON_ONCE(event->rcu_pending); old_rb = event->rb; - event->rcu_batches = get_state_synchronize_rcu(); - event->rcu_pending = 1; - spin_lock_irqsave(&old_rb->event_lock, flags); list_del_rcu(&event->rb_entry); spin_unlock_irqrestore(&old_rb->event_lock, flags); - } - if (event->rcu_pending && rb) { - cond_synchronize_rcu(event->rcu_batches); - event->rcu_pending = 0; + event->rcu_batches = get_state_synchronize_rcu(); + event->rcu_pending = 1; } if (rb) { + if (event->rcu_pending) { + cond_synchronize_rcu(event->rcu_batches); + event->rcu_pending = 0; + } + spin_lock_irqsave(&rb->event_lock, flags); list_add_rcu(&event->rb_entry, &rb->event_list); spin_unlock_irqrestore(&rb->event_lock, flags); From 0432ca1ad0d399d40db32c4364143b1d741e18c5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 11 Jun 2015 13:52:22 -0700 Subject: [PATCH 0003/2091] perf/x86: Add more Broadwell model numbers commit 4b36f1a4139c9284df74c0f5d7655603d67807df upstream. This patch adds additional model numbers for Broadwell to perf. Support for Broadwell with Iris Pro (Intel Core i7-57xxC) and support for Broadwell Server Xeon. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434055942-28253-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a1e35c9f06b95..afdd7c08d0fcb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3253,6 +3253,8 @@ __init int intel_pmu_init(void) case 61: /* 14nm Broadwell Core-M */ case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); From 55c9e52cf3b056109ab84de8d2a5b4b6e0a4cefa Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 11 Jun 2015 15:13:56 +0300 Subject: [PATCH 0004/2091] perf/x86/intel/bts: Fix DS area sharing with x86_pmu events commit 6b099d9b040b0f3d0aec05b560d7caf879af5077 upstream. Currently, the intel_bts driver relies on the DS area allocated by the x86_pmu code in its event_init() path, which is a bug: creating a BTS event while no x86_pmu events are present results in a NULL pointer dereference. The same DS area is also used by PEBS sampling, which makes it quite a bit trickier to have a separate one for intel_bts' purposes. This patch makes intel_bts driver use the same DS allocation and reference counting code as x86_pmu to make sure it is always present when either intel_bts or x86_pmu need it. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Link: http://lkml.kernel.org/r/1434024837-9916-2-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event.c | 52 ++++++++++++++-------- arch/x86/kernel/cpu/perf_event.h | 4 ++ arch/x86/kernel/cpu/perf_event_intel_bts.c | 9 ++++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4f7001f28936f..aa4e3a74e5410 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -270,11 +270,7 @@ static bool check_hw_exists(void) static void hw_perf_event_destroy(struct perf_event *event) { - if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_ds_buffers(); - mutex_unlock(&pmc_reserve_mutex); - } + x86_release_hardware(); } void hw_perf_lbr_event_destroy(struct perf_event *event) @@ -324,6 +320,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } +int x86_reserve_hardware(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&active_events) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_ds_buffers(); + } + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmc_reserve_mutex); + } + + return err; +} + +void x86_release_hardware(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_ds_buffers(); + mutex_unlock(&pmc_reserve_mutex); + } +} + /* * Check if we can create event of a certain type (that no conflicting events * are present). @@ -336,9 +361,10 @@ int x86_add_exclusive(unsigned int what) return 0; mutex_lock(&pmc_reserve_mutex); - for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) goto out; + } atomic_inc(&x86_pmu.lbr_exclusive[what]); ret = 0; @@ -527,19 +553,7 @@ static int __x86_pmu_event_init(struct perf_event *event) if (!x86_pmu_initialized()) return -ENODEV; - err = 0; - if (!atomic_inc_not_zero(&active_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_events) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmc_reserve_mutex); - } + err = x86_reserve_hardware(); if (err) return err; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ef78516850fb0..f068695eaca06 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -703,6 +703,10 @@ int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); +int x86_reserve_hardware(void); + +void x86_release_hardware(void); + void hw_perf_lbr_event_destroy(struct perf_event *event); int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index ac1f0c55f3796..7795f3f8b1d57 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode) static void bts_event_destroy(struct perf_event *event) { + x86_release_hardware(); x86_del_exclusive(x86_lbr_exclusive_bts); } static int bts_event_init(struct perf_event *event) { + int ret; + if (event->attr.type != bts_pmu.type) return -ENOENT; if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; + ret = x86_reserve_hardware(); + if (ret) { + x86_del_exclusive(x86_lbr_exclusive_bts); + return ret; + } + event->destroy = bts_event_destroy; return 0; From b675101824a103b3cf8f05c407a53ad50f987730 Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Mon, 8 Jun 2015 14:46:49 +0200 Subject: [PATCH 0005/2091] perf/x86: Honor the architectural performance monitoring version commit 2c33645d366d13b969d936b68b9f4875b1fdddea upstream. Architectural performance monitoring, version 1, doesn't support fixed counters. Currently, even if a hypervisor advertises support for architectural performance monitoring version 1, perf may still try to use the fixed counters, as the constraints are set up based on the CPU model. This patch ensures that perf honors the architectural performance monitoring version returned by CPUID, and it only uses the fixed counters for version 2 and above. (Some of the ideas in this patch came from Peter Zijlstra.) Signed-off-by: Imre Palik Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Anthony Liguori Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433767609-1039-1-git-send-email-imrep.amz@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index afdd7c08d0fcb..2813ea0f142e9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3324,13 +3324,13 @@ __init int intel_pmu_init(void) * counter, so do not extend mask to generic counters */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != FIXED_EVENT_FLAGS - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { - continue; + if (c->cmask == FIXED_EVENT_FLAGS + && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } - - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; + c->idxmsk64 &= + ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); } } From c7bd5d27d2cb7796a0142f53a5033498f266ec07 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 18 Jun 2015 13:00:32 +0200 Subject: [PATCH 0006/2091] perf tools: Fix build breakage if prefix= is specified commit 75e84ab906ef8935cff3df3d8929f1bafea81599 upstream. Invoking Makefile.perf with prefix= breaks the build since Makefile.perf hands that variable down to Makefile.build where it overrides prefix := $(subst ./,,$(OUTPUT)$(dir)/) leading to errors like this: No rule to make target '/usrabspath.o', needed by '/usrlibperf-in.o' Signed-off-by: Lukas Wunner Acked-by: Jiri Olsa Cc: David Ahern Fixes: c819e2cf2eb6f65d3208d195d7a0edef6108d5 Link: http://lkml.kernel.org/r/5582c48a.84a22b0a.a918.5285SMTPIN_ADDED_MISSING@mx.google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/build/Makefile.build | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 10df57237a66d..98cfc388ea330 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -94,12 +94,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) # '$(OUTPUT)/dir' prefix to all objects -prefix := $(subst ./,,$(OUTPUT)$(dir)/) -obj-y := $(addprefix $(prefix),$(obj-y)) -subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) +objprefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(objprefix),$(obj-y)) +subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y)) # Final '$(obj)-in.o' object -in-target := $(prefix)$(obj)-in.o +in-target := $(objprefix)$(obj)-in.o PHONY += $(subdir-y) From 754bad2c5cb0475a3dd1fc65887eddcf48c3c8f0 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Thu, 18 Jun 2015 20:41:51 +0300 Subject: [PATCH 0007/2091] Bluetooth: ath3k: Add support of 04ca:300d AR3012 device commit 7e730c7f3d1f39c25cf5f7cf70c0ff4c28d7bec7 upstream. BugLink: https://bugs.launchpad.net/bugs/1394368 This device requires new firmware files AthrBT_0x11020100.dfu and ramps_0x11020100_40.dfu added to /lib/firmware/ar3k/ that are not included in linux-firmware yet. T: Bus=02 Lev=01 Prnt=01 Port=04 Cnt=03 Dev#= 5 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=300d Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 8c81af6dbe063..12bac167641f3 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -88,6 +88,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x3007) }, { USB_DEVICE(0x04CA, 0x3008) }, { USB_DEVICE(0x04CA, 0x300b) }, + { USB_DEVICE(0x04CA, 0x300d) }, { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x0930, 0x0219) }, @@ -145,6 +146,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3c10d4dfe9a79..eef15d2c882a2 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -186,6 +186,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, From a621d2c97a591fd811c192af007c0ae0d796dc30 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sat, 6 Jun 2015 20:25:40 +0300 Subject: [PATCH 0008/2091] ath3k: Add support of 0489:e076 AR3012 device commit 692c062e7c282164fd7cda68077f79dafd176eaf upstream. BugLink: https://bugs.launchpad.net/bugs/1462614 This device requires new firmware files AthrBT_0x11020100.dfu and ramps_0x11020100_40.dfu added to /lib/firmware/ar3k/ that are not included in linux-firmware yet. T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=06 Dev#= 7 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e076 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 12bac167641f3..0e1c6fe9b91e3 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -80,6 +80,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0489, 0xe057) }, { USB_DEVICE(0x0489, 0xe056) }, { USB_DEVICE(0x0489, 0xe05f) }, + { USB_DEVICE(0x0489, 0xe076) }, { USB_DEVICE(0x0489, 0xe078) }, { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x04CA, 0x3004) }, @@ -138,6 +139,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index eef15d2c882a2..09527e5cb3765 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -178,6 +178,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, From 46c266ff8435422631402284e2a3b62ef1560141 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sat, 6 Jun 2015 20:29:25 +0300 Subject: [PATCH 0009/2091] ath3k: add support of 13d3:3474 AR3012 device commit 0d0cef6183aec0fb6d0c9f00a09ff51ee086bbe2 upstream. BugLink: https://bugs.launchpad.net/bugs/1427680 This device requires new firmware files AthrBT_0x11020100.dfu and ramps_0x11020100_40.dfu added to /lib/firmware/ar3k/ that are not included in linux-firmware yet. T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3474 Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 0e1c6fe9b91e3..e527a3e13939c 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -115,6 +115,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x13d3, 0x3408) }, { USB_DEVICE(0x13d3, 0x3423) }, { USB_DEVICE(0x13d3, 0x3432) }, + { USB_DEVICE(0x13d3, 0x3474) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, @@ -174,6 +175,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ { USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 09527e5cb3765..420cc9f3eb76f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -213,6 +213,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, From d0a2a8df991b409174b5cc663791de0f95d875c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 6 Jun 2015 22:45:59 +0200 Subject: [PATCH 0010/2091] b43: fix support for 14e4:4321 PCI dev with BCM4321 chipset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 90f91b129810c3f169e443252be30ed7c0130326 upstream. It seems Broadcom released two devices with conflicting device id. There are for sure 14e4:4321 PCI devices with BCM4321 (N-PHY) chipset, they can be found in routers, e.g. Netgear WNR834Bv2. However, according to Broadcom public sources 0x4321 is also used for 5 GHz BCM4306 (G-PHY). It's unsure if they meant PCI device id, or "virtual" id (from SPROM). To distinguish these devices lets check PHY type (G vs. N). Signed-off-by: Rafał Miłecki Cc: # 3.16+ Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/b43/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b2f9521fe551a..4cdac7801c8be 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -5365,6 +5365,10 @@ static void b43_supported_bands(struct b43_wldev *dev, bool *have_2ghz_phy, *have_5ghz_phy = true; return; case 0x4321: /* BCM4306 */ + /* There are 14e4:4321 PCI devs with 2.4 GHz BCM4321 (N-PHY) */ + if (dev->phy.type != B43_PHYTYPE_G) + break; + /* fall through */ case 0x4313: /* BCM4311 */ case 0x431a: /* BCM4318 */ case 0x432a: /* BCM4321 */ From 81440341dbbb9655ba666c62325cceb17dec4621 Mon Sep 17 00:00:00 2001 From: Alexey Sokolov Date: Tue, 2 Jun 2015 11:49:30 +0300 Subject: [PATCH 0011/2091] cdc-acm: Add support of ATOL FPrint fiscal printers commit 15bf722e6f6c0b884521a0363204532e849deb7f upstream. ATOL FPrint fiscal printers require usb_clear_halt to be executed to work properly. Add quirk to fix the issue. Signed-off-by: Alexey Sokolov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 +++++++++ drivers/usb/class/cdc-acm.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5c8f58114677d..a086e1d69bc70 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1477,6 +1477,11 @@ static int acm_probe(struct usb_interface *intf, goto alloc_fail8; } + if (quirks & CLEAR_HALT_CONDITIONS) { + usb_clear_halt(usb_dev, usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress)); + usb_clear_halt(usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress)); + } + return 0; alloc_fail8: if (acm->country_codes) { @@ -1756,6 +1761,10 @@ static const struct usb_device_id acm_ids[] = { .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ + .driver_info = CLEAR_HALT_CONDITIONS, + }, + /* Nokia S60 phones expose two ACM channels. The first is * a modem and is picked up by the standard AT-command * information below. The second is 'vendor-specific' but diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index ffeb3c83941f5..b3b6c9db6fe5b 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -133,3 +133,4 @@ struct acm { #define NO_DATA_INTERFACE BIT(4) #define IGNORE_DEVICE BIT(5) #define QUIRK_CONTROL_LINE_STATE BIT(6) +#define CLEAR_HALT_CONDITIONS BIT(7) From 6a010c0abd49388a49af3d5a5bfc00e0d5767607 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Jun 2015 12:55:35 -0700 Subject: [PATCH 0012/2091] Linux 4.1.1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f5c8983aeeb7f..1caf4ad3eb8a7 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 0 +SUBLEVEL = 1 EXTRAVERSION = -NAME = Hurr durr I'ma sheep +NAME = Series 4800 # *DOCUMENTATION* # To see a list of typical targets execute "make help" From 9bdbb8968e9e7e6b9619076d1867da1c0cfec2c7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 8 Mar 2015 21:53:42 +0100 Subject: [PATCH 0013/2091] ARM: dts: sunxi: Adjust touchscreen compatible for sun5i and later commit 8bf1b9b3d90194a174493febc731f7783f2adf1a upstream. The touchscreen controller in the A13 and later has a different temperature curve than the one in the original A10, change the compatible for the A13 and later so that the kernel will use the correct curve. Reported-by: Tong Zhang Signed-off-by: Hans de Goede Signed-off-by: Maxime Ripard Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun5i-a10s.dtsi | 2 +- arch/arm/boot/dts/sun5i-a13.dtsi | 2 +- arch/arm/boot/dts/sun7i-a20.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 2fd8988f310c6..3794ca16499d6 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -573,7 +573,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; #thermal-sensor-cells = <0>; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 883cb4873688f..5098185abde67 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -555,7 +555,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; #thermal-sensor-cells = <0>; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index fdd181792b4be..2b4847c7cbd41 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -1042,7 +1042,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; + compatible = "allwinner,sun5i-a13-ts"; reg = <0x01c25000 0x100>; interrupts = ; #thermal-sensor-cells = <0>; From 9d281572f2be11afa36e1534da43890cb9a130cc Mon Sep 17 00:00:00 2001 From: Sebastien Szymanski Date: Wed, 20 May 2015 16:30:37 +0200 Subject: [PATCH 0014/2091] ARM: clk-imx6q: refine sata's parent commit da946aeaeadcd24ff0cda9984c6fb8ed2bfd462a upstream. According to IMX6D/Q RM, table 18-3, sata clock's parent is ahb, not ipg. Signed-off-by: Sebastien Szymanski Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo [dirk.behme: Adjust moved file] Signed-off-by: Dirk Behme Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 469a150bf98f9..a2e8ef3c0bd9f 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -443,7 +443,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) clk[IMX6QDL_CLK_GPMI_IO] = imx_clk_gate2("gpmi_io", "enfc", base + 0x78, 28); clk[IMX6QDL_CLK_GPMI_APB] = imx_clk_gate2("gpmi_apb", "usdhc3", base + 0x78, 30); clk[IMX6QDL_CLK_ROM] = imx_clk_gate2("rom", "ahb", base + 0x7c, 0); - clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ipg", base + 0x7c, 4); + clk[IMX6QDL_CLK_SATA] = imx_clk_gate2("sata", "ahb", base + 0x7c, 4); clk[IMX6QDL_CLK_SDMA] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6); clk[IMX6QDL_CLK_SPBA] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12); clk[IMX6QDL_CLK_SPDIF] = imx_clk_gate2("spdif", "spdif_podf", base + 0x7c, 14); From aa0b8c72f5c1e0208bfafb383b535bdc9d3b2c6b Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Thu, 11 Jun 2015 02:05:33 -0400 Subject: [PATCH 0015/2091] KVM: nSVM: Check for NRIPS support before updating control field commit f104765b4f81fd74d69e0eb161e89096deade2db upstream. If hardware doesn't support DecodeAssist - a feature that provides more information about the intercept in the VMCB, KVM decodes the instruction and then updates the next_rip vmcb control field. However, NRIP support itself depends on cpuid Fn8000_000A_EDX[NRIPS]. Since skip_emulated_instruction() doesn't verify nrip support before accepting control.next_rip as valid, avoid writing this field if support isn't present. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9afa233b5482f..4911bf19122b4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -511,8 +511,10 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->vmcb->control.next_rip != 0) + if (svm->vmcb->control.next_rip != 0) { + WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; + } if (!svm->next_rip) { if (emulate_instruction(vcpu, EMULTYPE_SKIP) != @@ -4317,7 +4319,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; } - vmcb->control.next_rip = info->next_rip; + /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ + if (static_cpu_has(X86_FEATURE_NRIPS)) + vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; vmexit = nested_svm_exit_handled(svm); From 1a2cddd17ebe5e76a796c25e11cdd5518310eabc Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sun, 21 Jun 2015 18:50:44 +0200 Subject: [PATCH 0016/2091] can: fix loss of CAN frames in raw_rcv commit 36c01245eb8046c16eee6431e7dbfbb302635fa8 upstream. As reported by Manfred Schlaegl here http://marc.info/?l=linux-netdev&m=143482089824232&w=2 commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. As net timestamping is influenced by several players (netstamp_needed and netdev_tstamp_prequeue) Manfred missed a proper timestamp which leads to CAN frame loss. As skb timestamping became now mandatory for CAN related skbs this patch makes sure that received CAN skbs always have a proper timestamp set. Maybe there's a better solution in the future but this patch fixes the CAN frame loss so far. Reported-by: Manfred Schlaegl Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 5 +++++ drivers/net/can/slcan.c | 1 + drivers/net/can/vcan.c | 3 +++ net/can/af_can.c | 6 +++++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b0f69248cb71c..e9b1810d319f0 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,6 +440,9 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; + if (!(skb->tstamp.tv64)) + __net_timestamp(skb); + netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -575,6 +578,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; + __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -603,6 +607,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; + __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index c837eb91d43e3..f64f5290d6f8f 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,6 +207,7 @@ static void slc_bump(struct slcan *sl) if (!skb) return; + __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 674f367087c54..0ce868de855df 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,6 +78,9 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!(skb->tstamp.tv64)) + __net_timestamp(skb); + netif_rx_ni(skb); } diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc9..689c818ed0077 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -310,8 +310,12 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) + if (newskb) { + if (!(newskb->tstamp.tv64)) + __net_timestamp(newskb); + netif_rx_ni(newskb); + } /* update statistics */ can_stats.tx_frames++; From bd0a0d20ebd08f250af9023530b5de4bc433ebaa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 12 Jun 2015 10:16:41 -0300 Subject: [PATCH 0017/2091] sctp: fix ASCONF list handling [ Upstream commit 2d45a02d0166caf2627fe91897c6ffc3b19514c4 ] ->auto_asconf_splist is per namespace and mangled by functions like sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization. Also, the call to inet_sk_copy_descendant() was backuping ->auto_asconf_list through the copy but was not honoring ->do_auto_asconf, which could lead to list corruption if it was different between both sockets. This commit thus fixes the list handling by using ->addr_wq_lock spinlock to protect the list. A special handling is done upon socket creation and destruction for that. Error handlig on sctp_init_sock() will never return an error after having initialized asconf, so sctp_destroy_sock() can be called without addrq_wq_lock. The lock now will be take on sctp_close_sock(), before locking the socket, so we don't do it in inverse order compared to sctp_addr_wq_timeout_handler(). Instead of taking the lock on sctp_sock_migrate() for copying and restoring the list values, it's preferred to avoid rewritting it by implementing sctp_copy_descendant(). Issue was found with a test application that kept flipping sysctl default_auto_asconf on and off, but one could trigger it by issuing simultaneous setsockopt() calls on multiple sockets or by creating/destroying sockets fast enough. This is only triggerable locally. Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).") Reported-by: Ji Jianwen Suggested-by: Neil Horman Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/netns/sctp.h | 1 + include/net/sctp/structs.h | 4 ++++ net/sctp/socket.c | 43 ++++++++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 3573a81815ad9..8ba379f9e4678 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -31,6 +31,7 @@ struct netns_sctp { struct list_head addr_waitq; struct timer_list addr_wq_timer; struct list_head auto_asconf_splist; + /* Lock that protects both addr_waitq and auto_asconf_splist */ spinlock_t addr_wq_lock; /* Lock that protects the local_addr_list writers */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2bb2fcf5b11f0..495c87e367b3f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -223,6 +223,10 @@ struct sctp_sock { atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; + + /* These must be the last fields, as they will skipped on copies, + * like on accept and peeloff operations + */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f09de7fac2e6a..5f6c4e61325b6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1528,8 +1528,10 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - local_bh_disable(); + spin_lock_bh(&net->sctp.addr_wq_lock); bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() @@ -1539,7 +1541,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - local_bh_enable(); + spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); @@ -3580,6 +3582,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) return 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); if (val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; @@ -3588,6 +3591,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); return 0; } @@ -4121,18 +4125,28 @@ static int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(net, sk->sk_prot, 1); + + /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ if (net->sctp.default_auto_asconf) { + spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; - } else + spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); + } else { sp->do_auto_asconf = 0; + } + local_bh_enable(); return 0; } -/* Cleanup any SCTP per socket resources. */ +/* Cleanup any SCTP per socket resources. Must be called with + * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true + */ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -7195,6 +7209,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_list = NULL; } +static inline void sctp_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + int ancestor_size = sizeof(struct inet_sock) + + sizeof(struct sctp_sock) - + offsetof(struct sctp_sock, auto_asconf_list); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); +} + /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ @@ -7209,7 +7236,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; - struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -7217,12 +7243,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - if (oldsp->do_auto_asconf) { - memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); - inet_sk_copy_descendant(newsk, oldsk); - memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); - } else - inet_sk_copy_descendant(newsk, oldsk); + sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. From 08be544ef5d8453b7778bd57f3da8eeebcf1cd65 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 15 Jun 2015 20:28:51 +0300 Subject: [PATCH 0018/2091] bridge: fix br_stp_set_bridge_priority race conditions [ Upstream commit 2dab80a8b486f02222a69daca6859519e05781d9 ] After the ->set() spinlocks were removed br_stp_set_bridge_priority was left running without any protection when used via sysfs. It can race with port add/del and could result in use-after-free cases and corrupted lists. Tested by running port add/del in a loop with stp enabled while setting priority in a loop, crashes are easily reproducible. The spinlocks around sysfs ->set() were removed in commit: 14f98f258f19 ("bridge: range check STP parameters") There's also a race condition in the netlink priority support that is fixed by this change, but it was introduced recently and the fixes tag covers it, just in case it's needed the commit is: af615762e972 ("bridge: add ageing_time, stp_state, priority over netlink") Signed-off-by: Nikolay Aleksandrov Fixes: 14f98f258f19 ("bridge: range check STP parameters") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_ioctl.c | 2 -- net/bridge/br_stp_if.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index a9a4a1b7863d1..8d423bc649b9c 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); br_stp_set_bridge_priority(br, args[1]); - spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_PORT_PRIORITY: diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 41146872c1b47..7832d07f48f66 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -243,12 +243,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) return true; } -/* called under bridge lock */ +/* Acquires and releases bridge lock */ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) { struct net_bridge_port *p; int wasroot; + spin_lock_bh(&br->lock); wasroot = br_is_root_bridge(br); list_for_each_entry(p, &br->port_list, list) { @@ -266,6 +267,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) br_port_state_selection(br); if (br_is_root_bridge(br) && !wasroot) br_become_root_bridge(br); + spin_unlock_bh(&br->lock); } /* called under bridge lock */ From d7884e43677ba8618ae2f1ead7b96b215b409e20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Jun 2015 07:59:11 -0700 Subject: [PATCH 0019/2091] packet: read num_members once in packet_rcv_fanout() [ Upstream commit f98f4514d07871da7a113dd9e3e330743fd70ae4 ] We need to tell compiler it must not read f->num_members multiple times. Otherwise testing if num is not zero is flaky, and we could attempt an invalid divide by 0 in fanout_demux_cpu() Note bug was present in packet_rcv_fanout_hash() and packet_rcv_fanout_lb() but final 3.1 had a simple location after commit 95ec3eb417115fb ("packet: Add 'cpu' fanout policy.") Fixes: dc99f600698dc ("packet: Add fanout support.") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5989c6ee5513..131545a06f050 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1353,7 +1353,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; - unsigned int num = f->num_members; + unsigned int num = READ_ONCE(f->num_members); struct packet_sock *po; unsigned int idx; From 2c330edb4b31fdf18c6a6c4d1d9b482632c3925b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 17 Jun 2015 15:59:34 -0400 Subject: [PATCH 0020/2091] packet: avoid out of bounds read in round robin fanout [ Upstream commit 468479e6043c84f5a65299cc07cb08a22a28c2b1 ] PACKET_FANOUT_LB computes f->rr_cur such that it is modulo f->num_members. It returns the old value unconditionally, but f->num_members may have changed since the last store. Ensure that the return value is always < num. When modifying the logic, simplify it further by replacing the loop with an unconditional atomic increment. Fixes: dc99f600698d ("packet: Add fanout support.") Suggested-by: Eric Dumazet Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 131545a06f050..fe1610ddeacf7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1272,16 +1272,6 @@ static void packet_sock_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static int fanout_rr_next(struct packet_fanout *f, unsigned int num) -{ - int x = atomic_read(&f->rr_cur) + 1; - - if (x >= num) - x = 0; - - return x; -} - static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) @@ -1293,13 +1283,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - int cur, old; + unsigned int val = atomic_inc_return(&f->rr_cur); - cur = atomic_read(&f->rr_cur); - while ((old = atomic_cmpxchg(&f->rr_cur, cur, - fanout_rr_next(f, num))) != cur) - cur = old; - return cur; + return val % num; } static unsigned int fanout_demux_cpu(struct packet_fanout *f, From 914b0ef228c97dfea025091428f6c7809c9d38ad Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 16 Jun 2015 22:56:39 +0300 Subject: [PATCH 0021/2091] neigh: do not modify unlinked entries [ Upstream commit 2c51a97f76d20ebf1f50fef908b986cb051fdff9 ] The lockless lookups can return entry that is unlinked. Sometimes they get reference before last neigh_cleanup_and_release, sometimes they do not need reference. Later, any modification attempts may result in the following problems: 1. entry is not destroyed immediately because neigh_update can start the timer for dead entry, eg. on change to NUD_REACHABLE state. As result, entry lives for some time but is invisible and out of control. 2. __neigh_event_send can run in parallel with neigh_destroy while refcnt=0 but if timer is started and expired refcnt can reach 0 for second time leading to second neigh_destroy and possible crash. Thanks to Eric Dumazet and Ying Xue for their work and analyze on the __neigh_event_send change. Fixes: 767e97e1e0db ("neigh: RCU conversion of struct neighbour") Fixes: a263b3093641 ("ipv4: Make neigh lookups directly in output packet path.") Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().") Cc: Eric Dumazet Cc: Ying Xue Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3de6542560288..2237c1b3cdd20 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -957,6 +957,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 0; if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; + if (neigh->dead) + goto out_dead; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + @@ -1013,6 +1015,13 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) write_unlock(&neigh->lock); local_bh_enable(); return rc; + +out_dead: + if (neigh->nud_state & NUD_STALE) + goto out_unlock_bh; + write_unlock_bh(&neigh->lock); + kfree_skb(skb); + return 1; } EXPORT_SYMBOL(__neigh_event_send); @@ -1076,6 +1085,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT))) goto out; + if (neigh->dead) + goto out; if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1225,6 +1236,8 @@ EXPORT_SYMBOL(neigh_update); */ void __neigh_set_probe_once(struct neighbour *neigh) { + if (neigh->dead) + return; neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; From 80b856db9f9e1dbaf20a24020f8ade95c14f7477 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Jun 2015 13:54:54 +0200 Subject: [PATCH 0022/2091] mac80211: fix locking in update_vlan_tailroom_need_count() [ Upstream commit 51f458d9612177f69c2e2c437034ae15f93078e7 ] Unfortunately, Michal's change to fix AP_VLAN crypto tailroom caused a locking issue that was reported by lockdep, but only in a few cases - the issue was a classic ABBA deadlock caused by taking the mtx after the key_mtx, where normally they're taken the other way around. As the key mutex protects the field in question (I'm adding a few annotations to make that clear) only the iteration needs to be protected, but we can also iterate the interface list with just RCU protection while holding the key mutex. Fixes: f9dca80b98ca ("mac80211: fix AP_VLAN crypto tailroom calculation") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mac80211/key.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a907f2d5c12d8..81e9785f38bc2 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) if (sdata->vif.type != NL80211_IFTYPE_AP) return; - mutex_lock(&sdata->local->mtx); + /* crypto_tx_tailroom_needed_cnt is protected by this */ + assert_key_lock(sdata->local); + + rcu_read_lock(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list) vlan->crypto_tx_tailroom_needed_cnt += delta; - mutex_unlock(&sdata->local->mtx); + rcu_read_unlock(); } static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) @@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ + assert_key_lock(sdata->local); + update_vlan_tailroom_need_count(sdata, 1); if (!sdata->crypto_tx_tailroom_needed_cnt++) { @@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { + assert_key_lock(sdata->local); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); update_vlan_tailroom_need_count(sdata, -delta); From 1bc31b1e80a716b903cf40ae4d679c820f71d518 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 18 Jun 2015 18:36:03 +0300 Subject: [PATCH 0023/2091] mvneta: add forgotten initialization of autonegotiation bits [ Upstream commit 538761b794c1542f1c6e31eadd9d7aae118889f7 ] The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state signaling") changed mvneta_adjust_link() so that it does not clear the auto-negotiation bits in MVNETA_GMAC_AUTONEG_CONFIG register. This was necessary for auto-negotiation mode to work. Unfortunately I haven't checked if these bits are ever initialized. It appears they are not. This patch adds the missing initialization of the auto-negotiation bits in the MVNETA_GMAC_AUTONEG_CONFIG register. It fixes the following regression: https://www.mail-archive.com/netdev@vger.kernel.org/msg67928.html Since the patch was tested to fix a regression, it should be applied to stable tree. Tested-by: Arnaud Ebalard CC: Thomas Petazzoni CC: Florian Fainelli CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Stas Sergeev Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff060..74176ec4f39de 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1013,6 +1013,12 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } mvneta_set_ucast_table(pp, -1); From c31967d447989b85b631dda39487a319df21e03a Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Thu, 18 Jun 2015 09:15:34 -0700 Subject: [PATCH 0024/2091] tcp: Do not call tcp_fastopen_reset_cipher from interrupt context [ Upstream commit dfea2aa654243f70dc53b8648d0bbdeec55a7df1 ] tcp_fastopen_reset_cipher really cannot be called from interrupt context. It allocates the tcp_fastopen_context with GFP_KERNEL and calls crypto_alloc_cipher, which allocates all kind of stuff with GFP_KERNEL. Thus, we might sleep when the key-generation is triggered by an incoming TFO cookie-request which would then happen in interrupt- context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP: [ 36.001813] BUG: sleeping function called from invalid context at mm/slub.c:1266 [ 36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill [ 36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14 [ 36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 36.008250] 00000000000004f2 ffff88007f8838a8 ffffffff8171d53a ffff880075a084a8 [ 36.009630] ffff880075a08000 ffff88007f8838c8 ffffffff810967d3 ffff88007f883928 [ 36.011076] 0000000000000000 ffff88007f8838f8 ffffffff81096892 ffff88007f89be00 [ 36.012494] Call Trace: [ 36.012953] [] dump_stack+0x4f/0x6d [ 36.014085] [] ___might_sleep+0x103/0x170 [ 36.015117] [] __might_sleep+0x52/0x90 [ 36.016117] [] kmem_cache_alloc_trace+0x47/0x190 [ 36.017266] [] ? tcp_fastopen_reset_cipher+0x42/0x130 [ 36.018485] [] tcp_fastopen_reset_cipher+0x42/0x130 [ 36.019679] [] tcp_fastopen_init_key_once+0x61/0x70 [ 36.020884] [] __tcp_fastopen_cookie_gen+0x1c/0x60 [ 36.022058] [] tcp_try_fastopen+0x58f/0x730 [ 36.023118] [] tcp_conn_request+0x3e8/0x7b0 [ 36.024185] [] ? __module_text_address+0x12/0x60 [ 36.025327] [] tcp_v4_conn_request+0x51/0x60 [ 36.026410] [] tcp_rcv_state_process+0x190/0xda0 [ 36.027556] [] ? __inet_lookup_established+0x47/0x170 [ 36.028784] [] tcp_v4_do_rcv+0x16d/0x3d0 [ 36.029832] [] ? security_sock_rcv_skb+0x16/0x20 [ 36.030936] [] tcp_v4_rcv+0x77a/0x7b0 [ 36.031875] [] ? iptable_filter_hook+0x33/0x70 [ 36.032953] [] ip_local_deliver_finish+0x92/0x1f0 [ 36.034065] [] ip_local_deliver+0x9a/0xb0 [ 36.035069] [] ? ip_rcv+0x3d0/0x3d0 [ 36.035963] [] ip_rcv_finish+0x119/0x330 [ 36.036950] [] ip_rcv+0x2e7/0x3d0 [ 36.037847] [] __netif_receive_skb_core+0x552/0x930 [ 36.038994] [] __netif_receive_skb+0x27/0x70 [ 36.040033] [] process_backlog+0xd2/0x1f0 [ 36.041025] [] net_rx_action+0x122/0x310 [ 36.042007] [] __do_softirq+0x103/0x2f0 [ 36.042978] [] do_softirq_own_stack+0x1c/0x30 This patch moves the call to tcp_fastopen_init_key_once to the places where a listener socket creates its TFO-state, which always happens in user-context (either from the setsockopt, or implicitly during the listen()-call) Cc: Eric Dumazet Cc: Hannes Frederic Sowa Fixes: 222e83d2e0ae ("tcp: switch tcp_fastopen key generation to net_get_random_once") Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_fastopen.c | 2 -- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d040..a5aa54ea65336 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int backlog) err = 0; if (err) goto out; + + tcp_fastopen_init_key_once(true); } err = inet_csk_listen_start(sk, backlog); if (err) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1377f2a0472e..bb2ce74f60049 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2545,10 +2545,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | - TCPF_LISTEN))) + TCPF_LISTEN))) { + tcp_fastopen_init_key_once(true); + err = fastopen_init_queue(sk, val); - else + } else { err = -EINVAL; + } break; case TCP_TIMESTAMP: if (!tp->repair) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 46b087a27503a..f9c0fb84e4352 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -78,8 +78,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path, struct tcp_fastopen_context *ctx; bool ok = false; - tcp_fastopen_init_key_once(true); - rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { From 6fc8b947b364ceb6d91e5b6f3e3d22cd9a013ac0 Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Fri, 19 Jun 2015 14:21:51 +0200 Subject: [PATCH 0025/2091] xen-netback: fix a BUG() during initialization [ Upstream commit 12b322ac85208de564ecf23aa754d796a91de21f ] Commit edafc132baac ("xen-netback: making the bandwidth limiter runtime settable") introduced the capability to change the bandwidth rate limit at runtime. But it also introduced a possible crashing bug. If netback receives two XenbusStateConnected without getting the hotplug-status watch firing in between, then it will try to register the watches for the rate limiter again. But this triggers a BUG() in the watch registration code. The fix modifies connect() to remove the possibly existing packet-rate watches before trying to install those watches. This behaviour is in line with how connect() deals with the hotplug-status watch. Signed-off-by: Imre Palik Cc: Matt Wilson Acked-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/xenbus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 968787abf78d4..ec383b0f54435 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -681,6 +681,9 @@ static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) char *node; unsigned maxlen = strlen(dev->nodename) + sizeof("/rate"); + if (vif->credit_watch.node) + return -EADDRINUSE; + node = kmalloc(maxlen, GFP_KERNEL); if (!node) return -ENOMEM; @@ -770,6 +773,7 @@ static void connect(struct backend_info *be) } xen_net_read_rate(dev, &credit_bytes, &credit_usec); + xen_unregister_watchers(be->vif); xen_register_watchers(dev, be->vif); read_xenbus_vif_flags(be); From 66634bb1c4f1eda70583eddaf8c5e980f05a8fb9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 23 Jun 2015 08:34:39 +0300 Subject: [PATCH 0026/2091] ip: report the original address of ICMP messages [ Upstream commit 34b99df4e6256ddafb663c6de0711dceceddfe0e ] ICMP messages can trigger ICMP and local errors. In this case serr->port is 0 and starting from Linux 4.0 we do not return the original target address to the error queue readers. Add function to define which errors provide addr_offset. With this fix my ping command is not silent anymore. Fixes: c247f0534cc5 ("ip: fix error queue empty skb handling") Signed-off-by: Julian Anastasov Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 11 ++++++++++- net/ipv6/datagram.c | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7cfb0893f2636..6ddde89996f44 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -432,6 +432,15 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv4 supports cmsg on all imcp errors and some timestamps * * Timestamp code paths do not initialize the fields expected by cmsg: @@ -498,7 +507,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv4_datagram_support_addr(serr)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 762a58c772b81..62d908e64eeb5 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. * * At one point, excluding local errors was a quick test to identify icmp/icmp6 @@ -389,7 +399,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv6_datagram_support_addr(serr)) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; From f3f6617f6b90f2e27ee7362f8a2f4063c7eac6a7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 25 Jun 2015 11:29:41 +0300 Subject: [PATCH 0027/2091] net/mlx4_en: Release TX QP when destroying TX ring [ Upstream commit 0eb08514fdbdcd16fd6870680cd638f203662e9d ] TX ring QP wasn't released at mlx4_en_destroy_tx_ring. Instead, the code used the deprecated base_tx_qpn field. Move TX QP release to mlx4_en_destroy_tx_ring and remove the base_tx_qpn field. Fixes: ddae0349fdb7 ('net/mlx4: Change QP allocation scheme') Signed-off-by: Eran Ben Elisha Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ---- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 1 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index cf467a9f6cc78..a5a0b8420d26b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1973,10 +1973,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } - if (priv->base_tx_qpn) { - mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num); - priv->base_tx_qpn = 0; - } } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7bed3a88579fa..0ab298f036a8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -180,6 +180,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); + mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d021f079f181b..9a4b3807eb0a6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -579,7 +579,6 @@ struct mlx4_en_priv { int vids[128]; bool wol; struct device *ddev; - int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; From 7a9aa8ab0c706b3d2770a3996c9e63f08074c855 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 25 Jun 2015 11:29:42 +0300 Subject: [PATCH 0028/2091] net/mlx4_en: Wake TX queues only when there's enough room [ Upstream commit 488a9b48e398b157703766e2cd91ea45ac6997c5 ] Indication of a single completed packet, marked by txbbs_skipped being bigger then zero, in not enough in order to wake up a stopped TX queue. The completed packet may contain a single TXBB, while next packet to be sent (after the wake up) may have multiple TXBBs (LSO/TSO packets for example), causing overflow in queue followed by WQE corruption and TX queue timeout. Instead, wake the stopped queue only when there's enough room for the worst case (maximum sized WQE) packet that we should need to handle after the queue is opened again. Also created an helper routine - mlx4_en_is_tx_ring_full, which checks if the current TX ring is full or not. It provides better code readability and removes code duplication. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 19 +++++++++++-------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 0ab298f036a8f..c10d98f6ad967 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -66,6 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->stride = stride; + ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node); @@ -232,6 +233,11 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } +static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) +{ + return ring->prod - ring->cons > ring->full_size; +} + static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) @@ -474,11 +480,10 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netdev_tx_completed_queue(ring->tx_queue, packets, bytes); - /* - * Wakeup Tx queue if this stopped, and at least 1 packet - * was completed + /* Wakeup Tx queue if this stopped, and ring is not full. */ - if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) { + if (netif_tx_queue_stopped(ring->tx_queue) && + !mlx4_en_is_tx_ring_full(ring)) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } @@ -922,8 +927,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); /* Check available TXBBs And 2K spare for prefetch */ - stop_queue = (int)(ring->prod - ring_cons) > - ring->size - HEADROOM - MAX_DESC_TXBBS; + stop_queue = mlx4_en_is_tx_ring_full(ring); if (unlikely(stop_queue)) { netif_tx_stop_queue(ring->tx_queue); ring->queue_stopped++; @@ -992,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) smp_rmb(); ring_cons = ACCESS_ONCE(ring->cons); - if (unlikely(((int)(ring->prod - ring_cons)) <= - ring->size - HEADROOM - MAX_DESC_TXBBS)) { + if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9a4b3807eb0a6..909fcf803c542 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -279,6 +279,7 @@ struct mlx4_en_tx_ring { u32 size; /* number of TXBBs */ u32 size_mask; u16 stride; + u32 full_size; u16 cqn; /* index of port CQ associated with this ring */ u32 buf_size; __be32 doorbell_qpn; From 1b74080050336e29a82abc2a9b2e1f901485a362 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 25 Jun 2015 11:29:43 +0300 Subject: [PATCH 0029/2091] net/mlx4_en: Fix wrong csum complete report when rxvlan offload is disabled [ Upstream commit 79a258526ce1051cb9684018c25a89d51ac21be8 ] The check_csum() function relied on hwtstamp_rx_filter to know if rxvlan offload is disabled. This is wrong since rxvlan offload can be switched on/off regardless of hwtstamp_rx_filter. Also moved check_csum to query CQE information to identify VLAN packets and removed the check of IP packets, since it has been validated before. Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE') Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2a77a6b191216..eab4e080ebd2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -723,7 +723,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, - int hwtstamp_rx_filter) + netdev_features_t dev_features) { __wsum hw_checksum = 0; @@ -731,14 +731,8 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, hw_checksum = csum_unfold((__force __sum16)cqe->checksum); - if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && - hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { - /* next protocol non IPv4 or IPv6 */ - if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IP) && - ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IPV6)) - return -1; + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) && + !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); hdr += sizeof(struct vlan_hdr); } @@ -901,7 +895,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (ip_summed == CHECKSUM_COMPLETE) { void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); - if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, gro_skb, va, + dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_none++; ring->csum_complete--; @@ -956,7 +951,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, skb, skb->data, dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_complete--; ring->csum_none++; From 62a9ad17a245002cc611fc4667c2919ef422d8ee Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 25 Jun 2015 11:29:44 +0300 Subject: [PATCH 0030/2091] mlx4: Disable HA for SRIOV PF RoCE devices [ Upstream commit 7254acffeeec3c0a75b9c5364c29a6eb00014930 ] When in HA mode, the driver exposes an IB (RoCE) device instance with only one port. Under SRIOV, the existing implementation doesn't go well with the PF RoCE driver's role of Special QPs Para-Virtualization, etc. As such, disable HA for the mlx4 PF RoCE device in SRIOV mode. Fixes: a57500903093 ('IB/mlx4: Add port aggregation support') Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/intf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 6fce587188372..0d80aed590437 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -93,8 +93,14 @@ int mlx4_register_interface(struct mlx4_interface *intf) mutex_lock(&intf_mutex); list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) + list_for_each_entry(priv, &dev_list, dev_list) { + if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) { + mlx4_dbg(&priv->dev, + "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol); + intf->flags &= ~MLX4_INTFF_BONDING; + } mlx4_add_device(intf, priv); + } mutex_unlock(&intf_mutex); From 6c10c84170c40bf3d4a9953ae5a2ffe59ad736d4 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Thu, 25 Jun 2015 22:21:02 +0530 Subject: [PATCH 0031/2091] net: phy: fix phy link up when limiting speed via device tree [ Upstream commit eb686231fce3770299760f24fdcf5ad041f44153 ] When limiting phy link speed using "max-speed" to 100mbps or less on a giga bit phy, phy never completes auto negotiation and phy state machine is held in PHY_AN. Fixing this issue by comparing the giga bit advertise though phydev->supported doesn't have it but phy has BMSR_ESTATEN set. So that auto negotiation is restarted as old and new advertise are different and link comes up fine. Signed-off-by: Mugunthan V N Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index bdfe51fc3a650..d551df62e61a3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -796,10 +796,11 @@ static int genphy_config_advert(struct phy_device *phydev) if (phydev->supported & (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) { adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); - if (adv != oldadv) - changed = 1; } + if (adv != oldadv) + changed = 1; + err = phy_write(phydev, MII_CTRL1000, adv); if (err < 0) return err; From 7e2a3d667c4c9ca494c73f5930365240a31c4eb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Jun 2015 07:32:29 +0200 Subject: [PATCH 0032/2091] bnx2x: fix lockdep splat [ Upstream commit d53c66a5b80698620f7c9ba2372fff4017e987b8 ] Michel reported following lockdep splat [ 44.718117] INFO: trying to register non-static key. [ 44.723081] the code is fine but needs lockdep annotation. [ 44.728559] turning off the locking correctness validator. [ 44.734036] CPU: 8 PID: 5483 Comm: ethtool Not tainted 4.1.0 [ 44.770289] Call Trace: [ 44.772741] [] dump_stack+0x4c/0x65 [ 44.777879] [] ? console_unlock+0x1f1/0x510 [ 44.783708] [] __lock_acquire+0x1d05/0x1f10 [ 44.789538] [] ? mark_held_locks+0x6a/0x90 [ 44.795276] [] ? trace_hardirqs_on_caller+0x105/0x1d0 [ 44.801967] [] ? trace_hardirqs_on+0xd/0x10 [ 44.807793] [] ? hrtimer_try_to_cancel+0x4a/0x250 [ 44.814142] [] lock_acquire+0xb6/0x290 [ 44.819537] [] ? flush_work+0x5/0x280 [ 44.824844] [] flush_work+0x3d/0x280 [ 44.830061] [] ? flush_work+0x5/0x280 [ 44.835366] [] ? schedule_hrtimeout_range+0x13/0x20 [ 44.841889] [] ? usleep_range+0x4b/0x50 [ 44.847365] [] ? mark_held_locks+0x6a/0x90 [ 44.853102] [] ? __cancel_work_timer+0x105/0x1c0 [ 44.859359] [] ? trace_hardirqs_on_caller+0x105/0x1d0 [ 44.866045] [] __cancel_work_timer+0x9f/0x1c0 [ 44.872048] [] ? bnx2x_func_stop+0x42/0x90 [bnx2x] [ 44.878481] [] cancel_work_sync+0x10/0x20 [ 44.884134] [] bnx2x_chip_cleanup+0x245/0x730 [bnx2x] [ 44.890829] [] ? up+0x32/0x50 [ 44.895439] [] ? del_timer_sync+0x5/0xd0 [ 44.901005] [] bnx2x_nic_unload+0x20d/0x8e0 [bnx2x] [ 44.907527] [] ? might_fault+0x5f/0xb0 [ 44.912921] [] bnx2x_reload_if_running+0x2c/0x50 [bnx2x] [ 44.919879] [] bnx2x_set_ringparam+0x2b5/0x460 [bnx2x] [ 44.926664] [] dev_ethtool+0x55b/0x1c40 [ 44.932148] [] ? rtnl_lock+0x17/0x20 [ 44.937364] [] dev_ioctl+0x17b/0x630 [ 44.942582] [] sock_do_ioctl+0x5d/0x70 [ 44.947972] [] sock_ioctl+0x73/0x280 [ 44.953192] [] do_vfs_ioctl+0x88/0x5b0 [ 44.958587] [] ? up_read+0x23/0x40 [ 44.963631] [] ? __fget_light+0x6c/0xa0 [ 44.969105] [] SyS_ioctl+0x91/0xb0 [ 44.974149] [] system_call_fastpath+0x12/0x6f As bnx2x_init_ptp() is only called if bp->flags contains PTP_SUPPORTED, we also need to guard bnx2x_stop_ptp() with same condition, otherwise ptp_task workqueue is not initialized and kernel barfs on cancel_work_sync() Fixes: eeed018cbfa30 ("bnx2x: Add timestamping and PTP hardware clock support") Reported-by: Michel Lespinasse Signed-off-by: Eric Dumazet Cc: Michal Kalderon Cc: Ariel Elior Cc: Yuval Mintz Cc: David Decotigny Acked-by: Sony Chacko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 33501bcddc48e..8a97d28f3d650 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9323,7 +9323,8 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) * function stop ramrod is sent, since as part of this ramrod FW access * PTP registers. */ - bnx2x_stop_ptp(bp); + if (bp->flags & PTP_SUPPORTED) + bnx2x_stop_ptp(bp); /* Disable HW interrupts, NAPI */ bnx2x_netif_stop(bp, 1); From 67866a8c30f4707cef895635d5d08cb8105d2f5e Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 29 Jun 2015 10:41:03 +0200 Subject: [PATCH 0033/2091] sctp: Fix race between OOTB responce and route removal [ Upstream commit 29c4afc4e98f4dc0ea9df22c631841f9c220b944 ] There is NULL pointer dereference possible during statistics update if the route used for OOTB responce is removed at unfortunate time. If the route exists when we receive OOTB packet and we finally jump into sctp_packet_transmit() to send ABORT, but in the meantime route is removed under our feet, we take "no_route" path and try to update stats with IP_INC_STATS(sock_net(asoc->base.sk), ...). But sctp_ootb_pkt_new() used to prepare responce packet doesn't call sctp_transport_set_owner() and therefore there is no asoc associated with this packet. Probably temporary asoc just for OOTB responces is overkill, so just introduce a check like in all other places in sctp_packet_transmit(), where "asoc" is dereferenced. To reproduce this, one needs to 0. ensure that sctp module is loaded (otherwise ABORT is not generated) 1. remove default route on the machine 2. while true; do ip route del [interface-specific route] ip route add [interface-specific route] done 3. send enough OOTB packets (i.e. HB REQs) from another host to trigger ABORT responce On x86_64 the crash looks like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] sctp_packet_transmit+0x63c/0x730 [sctp] PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: ... CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.0.5-1-ARCH #1 Hardware name: ... task: ffffffff818124c0 ti: ffffffff81800000 task.ti: ffffffff81800000 RIP: 0010:[] [] sctp_packet_transmit+0x63c/0x730 [sctp] RSP: 0018:ffff880127c037b8 EFLAGS: 00010296 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000015ff66b480 RDX: 00000015ff66b400 RSI: ffff880127c17200 RDI: ffff880123403700 RBP: ffff880127c03888 R08: 0000000000017200 R09: ffffffff814625af R10: ffffea00047e4680 R11: 00000000ffffff80 R12: ffff8800b0d38a28 R13: ffff8800b0d38a28 R14: ffff8800b3e88000 R15: ffffffffa05f24e0 FS: 0000000000000000(0000) GS:ffff880127c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000020 CR3: 00000000c855b000 CR4: 00000000000007f0 Stack: ffff880127c03910 ffff8800b0d38a28 ffffffff8189d240 ffff88011f91b400 ffff880127c03828 ffffffffa05c94c5 0000000000000000 ffff8800baa1c520 0000000000000000 0000000000000001 0000000000000000 0000000000000000 Call Trace: [] ? sctp_sf_tabort_8_4_8.isra.20+0x85/0x140 [sctp] [] ? sctp_transport_put+0x52/0x80 [sctp] [] sctp_do_sm+0xb8c/0x19a0 [sctp] [] ? trigger_load_balance+0x90/0x210 [] ? update_process_times+0x59/0x60 [] ? timerqueue_add+0x60/0xb0 [] ? enqueue_hrtimer+0x29/0xa0 [] ? read_tsc+0x9/0x10 [] ? put_page+0x55/0x60 [] ? clockevents_program_event+0x6d/0x100 [] ? skb_free_head+0x58/0x80 [] ? chksum_update+0x1b/0x27 [crc32c_generic] [] ? crypto_shash_update+0xce/0xf0 [] sctp_endpoint_bh_rcv+0x113/0x280 [sctp] [] sctp_inq_push+0x46/0x60 [sctp] [] sctp_rcv+0x880/0x910 [sctp] [] ? sctp_packet_transmit_chunk+0xb0/0xb0 [sctp] [] ? sctp_csum_update+0x20/0x20 [sctp] [] ? ip_route_input_noref+0x235/0xd30 [] ? ack_ioapic_level+0x7b/0x150 [] ip_local_deliver_finish+0xae/0x210 [] ip_local_deliver+0x35/0x90 [] ip_rcv_finish+0xf5/0x370 [] ip_rcv+0x2b8/0x3a0 [] __netif_receive_skb_core+0x763/0xa50 [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xd0 [] napi_gro_receive+0xe8/0x120 [] rtl8169_poll+0x2da/0x660 [r8169] [] net_rx_action+0x21a/0x360 [] __do_softirq+0xe1/0x2d0 [] irq_exit+0xad/0xb0 [] do_IRQ+0x58/0xf0 [] common_interrupt+0x6d/0x6d [] ? hrtimer_start+0x18/0x20 [] ? sctp_transport_destroy_rcu+0x29/0x30 [sctp] [] ? mwait_idle+0x60/0xa0 [] arch_cpu_idle+0xf/0x20 [] cpu_startup_entry+0x3ec/0x480 [] rest_init+0x85/0x90 [] start_kernel+0x48b/0x4ac [] ? early_idt_handlers+0x120/0x120 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0x161/0x184 Code: 90 48 8b 80 b8 00 00 00 48 89 85 70 ff ff ff 48 83 bd 70 ff ff ff 00 0f 85 cd fa ff ff 48 89 df 31 db e8 18 63 e7 e0 48 8b 45 80 <48> 8b 40 20 48 8b 40 30 48 8b 80 68 01 00 00 65 48 ff 40 78 e9 RIP [] sctp_packet_transmit+0x63c/0x730 [sctp] RSP CR2: 0000000000000020 ---[ end trace 5aec7fd2dc983574 ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff) drm_kms_helper: panic occurred, switching back to text console ---[ end Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Alexander Sverdlin Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index fc5e45b8a832d..abe7c2db24120 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -599,7 +599,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) return err; no_route: kfree_skb(nskb); - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the From 8c6e5415f83cb5ba0da1321ec743b650c8a16764 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 29 Jun 2015 11:22:12 -0500 Subject: [PATCH 0034/2091] amd-xgbe: Add the __GFP_NOWARN flag to Rx buffer allocation [ Upstream commit 472cfe7127760d68b819cf35a26e5a1b44b30f4e ] When allocating Rx related buffers, alloc_pages is called using an order number that is decreased until successful. A system under stress can experience failures during this allocation process resulting in a warning being issued. This message can be of concern to end users even though the failure is not fatal. Since the failure is not fatal and can occur multiple times, the driver should include the __GFP_NOWARN flag to suppress the warning message from being issued. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index d81fc6bd47590..5c92fb71b37ed 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -263,7 +263,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, int ret; /* Try to obtain pages, decreasing order if necessary */ - gfp |= __GFP_COLD | __GFP_COMP; + gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; while (order >= 0) { pages = alloc_pages(gfp, order); if (pages) From b5aded8311788994641bd1d04b4b922f9c202b8f Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 30 Jun 2015 16:20:20 +0200 Subject: [PATCH 0035/2091] net: mvneta: introduce compatible string "marvell, armada-xp-neta" [ Upstream commit f522a975a8101895a85354b9c143f41b8248e71a ] The mvneta driver supports the Ethernet IP found in the Armada 370, XP, 380 and 385 SoCs. Since at least one more hardware feature is available for the Armada XP SoCs then a way to identify them is needed. This patch introduces a new compatible string "marvell,armada-xp-neta". Signed-off-by: Simon Guinot Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Cc: # v3.8+ Acked-by: Gregory CLEMENT Acked-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/net/marvell-armada-370-neta.txt | 2 +- drivers/net/ethernet/marvell/mvneta.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index 750d577e8083e..f5a8ca29aff06 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -1,7 +1,7 @@ * Marvell Armada 370 / Armada XP Ethernet Controller (NETA) Required properties: -- compatible: should be "marvell,armada-370-neta". +- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta". - reg: address and length of the register set for the device. - interrupts: interrupt for the device - phy: See ethernet.txt file in the same directory. diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 74176ec4f39de..4fb27eac6ce59 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3185,6 +3185,7 @@ static int mvneta_remove(struct platform_device *pdev) static const struct of_device_id mvneta_match[] = { { .compatible = "marvell,armada-370-neta" }, + { .compatible = "marvell,armada-xp-neta" }, { } }; MODULE_DEVICE_TABLE(of, mvneta_match); From 5c40e8bf8f8239a5e1fe85ceeb8c6d930f28ef8e Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 30 Jun 2015 16:20:21 +0200 Subject: [PATCH 0036/2091] ARM: mvebu: update Ethernet compatible string for Armada XP [ Upstream commit ea3b55fe83b5fcede82d183164b9d6831b26e33b ] This patch updates the Ethernet DT nodes for Armada XP SoCs with the compatible string "marvell,armada-xp-neta". Signed-off-by: Simon Guinot Fixes: 77916519cba3 ("arm: mvebu: Armada XP MV78230 has only three Ethernet interfaces") Cc: # v3.8+ Acked-by: Gregory CLEMENT Reviewed-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-370-xp.dtsi | 2 -- arch/arm/boot/dts/armada-370.dtsi | 8 ++++++++ arch/arm/boot/dts/armada-xp-mv78260.dtsi | 2 +- arch/arm/boot/dts/armada-xp-mv78460.dtsi | 2 +- arch/arm/boot/dts/armada-xp.dtsi | 10 +++++++++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index ec96f0b363465..06a2f2ae9d1e4 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -270,7 +270,6 @@ }; eth0: ethernet@70000 { - compatible = "marvell,armada-370-neta"; reg = <0x70000 0x4000>; interrupts = <8>; clocks = <&gateclk 4>; @@ -286,7 +285,6 @@ }; eth1: ethernet@74000 { - compatible = "marvell,armada-370-neta"; reg = <0x74000 0x4000>; interrupts = <10>; clocks = <&gateclk 3>; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 00b50db57c9c0..ca4257b2f77dc 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -307,6 +307,14 @@ dmacap,memset; }; }; + + ethernet@70000 { + compatible = "marvell,armada-370-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-370-neta"; + }; }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi index 8479fdc9e9c24..c5fdc99f0dbeb 100644 --- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi @@ -318,7 +318,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi index 661d54c815802..0e24f1a38540e 100644 --- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi @@ -356,7 +356,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 013d63f69e361..8fdd6d7c0ab12 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -177,7 +177,7 @@ }; eth2: ethernet@30000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x30000 0x4000>; interrupts = <12>; clocks = <&gateclk 2>; @@ -220,6 +220,14 @@ }; }; + ethernet@70000 { + compatible = "marvell,armada-xp-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-xp-neta"; + }; + xor@f0900 { compatible = "marvell,orion-xor"; reg = <0xF0900 0x100 From bfa06e6258be556d44aad030fe1babeed4f92240 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 30 Jun 2015 16:20:22 +0200 Subject: [PATCH 0037/2091] net: mvneta: disable IP checksum with jumbo frames for Armada 370 [ Upstream commit b65657fc240ae6c1d2a1e62db9a0e61ac9631d7a ] The Ethernet controller found in the Armada 370, 380 and 385 SoCs don't support TCP/IP checksumming with frame sizes larger than 1600 bytes. This patch fixes the issue by disabling the features NETIF_F_IP_CSUM and NETIF_F_TSO for the Armada 370 and compatibles SoCs when the MTU is set to a value greater than 1600 bytes. Signed-off-by: Simon Guinot Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Cc: # v3.8+ Acked-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4fb27eac6ce59..74d0389bf2332 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -310,6 +310,7 @@ struct mvneta_port { unsigned int link; unsigned int duplex; unsigned int speed; + unsigned int tx_csum_limit; int use_inband_status:1; }; @@ -2508,8 +2509,10 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) dev->mtu = mtu; - if (!netif_running(dev)) + if (!netif_running(dev)) { + netdev_update_features(dev); return 0; + } /* The interface is running, so we have to force a * reallocation of the queues @@ -2538,9 +2541,26 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mvneta_start_dev(pp); mvneta_port_up(pp); + netdev_update_features(dev); + return 0; } +static netdev_features_t mvneta_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct mvneta_port *pp = netdev_priv(dev); + + if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) { + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + netdev_info(dev, + "Disable IP checksum for MTU greater than %dB\n", + pp->tx_csum_limit); + } + + return features; +} + /* Get mac address */ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr) { @@ -2862,6 +2882,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_set_rx_mode = mvneta_set_rx_mode, .ndo_set_mac_address = mvneta_set_mac_addr, .ndo_change_mtu = mvneta_change_mtu, + .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, .ndo_do_ioctl = mvneta_ioctl, }; @@ -3107,6 +3128,9 @@ static int mvneta_probe(struct platform_device *pdev) } } + if (of_device_is_compatible(dn, "marvell,armada-370-neta")) + pp->tx_csum_limit = 1600; + pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; From 1e4205d4e0b0579b906de8ec7c5920968bfb32a9 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Wed, 20 May 2015 14:52:40 +0100 Subject: [PATCH 0038/2091] usb: gadget: f_fs: add extra check before unregister_gadget_item commit f14e9ad17f46051b02bffffac2036486097de19e upstream. ffs_closed can race with configfs_rmdir which will call config_item_release, so add an extra check to avoid calling the unregister_gadget_item with an null gadget item. Signed-off-by: Rui Miguel Silva Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 3507f880eb742..45b8c8b338df7 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3435,6 +3435,7 @@ static int ffs_ready(struct ffs_data *ffs) static void ffs_closed(struct ffs_data *ffs) { struct ffs_dev *ffs_obj; + struct f_fs_opts *opts; ENTER(); ffs_dev_lock(); @@ -3449,8 +3450,13 @@ static void ffs_closed(struct ffs_data *ffs) ffs_obj->ffs_closed_callback) ffs_obj->ffs_closed_callback(ffs); - if (!ffs_obj->opts || ffs_obj->opts->no_configfs - || !ffs_obj->opts->func_inst.group.cg_item.ci_parent) + if (ffs_obj->opts) + opts = ffs_obj->opts; + else + goto done; + + if (opts->no_configfs || !opts->func_inst.group.cg_item.ci_parent + || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) goto done; unregister_gadget_item(ffs_obj->opts-> From 0e566fe9dea0750713fa267b5a0b23f0b78b26ea Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Mon, 11 May 2015 20:03:24 +0300 Subject: [PATCH 0039/2091] crypto: talitos - avoid memleak in talitos_alg_alloc() commit 5fa7dadc898567ce14d6d6d427e7bd8ce6eb5d39 upstream. Fixes: 1d11911a8c57 ("crypto: talitos - fix warning: 'alg' may be used uninitialized in this function") Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414afa29a8..c52b4b32bcbd1 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2561,6 +2561,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, break; default: dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type); + kfree(t_alg); return ERR_PTR(-EINVAL); } From 44cb6ff1db11f2a147cccc943927a5ca20d1bb81 Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Mon, 11 May 2015 20:04:49 +0300 Subject: [PATCH 0040/2091] Revert "crypto: talitos - convert to use be16_add_cpu()" commit 69d9cd8c592f1abce820dbce7181bbbf6812cfbd upstream. This reverts commit 7291a932c6e27d9768e374e9d648086636daf61c. The conversion to be16_add_cpu() is incorrect in case cryptlen is negative due to premature (i.e. before addition / subtraction) implicit conversion of cryptlen (int -> u16) leading to sign loss. Cc: Wei Yongjun Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/talitos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c52b4b32bcbd1..f062158d4dc9e 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -925,7 +925,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, sg_count--; link_tbl_ptr--; } - be16_add_cpu(&link_tbl_ptr->len, cryptlen); + link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len) + + cryptlen); /* tag end of link table */ link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; From 72e09509a225e553f3736b3822a91f99f8c4777c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jun 2015 17:47:42 +0100 Subject: [PATCH 0041/2091] iommu/arm-smmu: Fix broken ATOS check commit d38f0ff9ab35414644995bae187d015c31aae19c upstream. Commit 83a60ed8f0b5 ("iommu/arm-smmu: fix ARM_SMMU_FEAT_TRANS_OPS condition") accidentally negated the ID0_ATOSNS predicate in the ATOS feature check, causing the driver to attempt ATOS requests on SMMUv2 hardware without the ATOS feature implemented. This patch restores the predicate to the correct value. Reported-by: Varun Sethi Signed-off-by: Will Deacon Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 66a803b9dd3af..65075ef75e2a3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1567,7 +1567,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) { + if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } From 396887ba3a77ee7f8b17ea85afa8501262e230b4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Jun 2015 10:48:34 +0200 Subject: [PATCH 0042/2091] iommu/amd: Handle large pages correctly in free_pagetable commit 0b3fff54bc01e8e6064d222a33e6fa7adabd94cd upstream. Make sure that we are skipping over large PTEs while walking the page-table tree. Fixes: 5c34c403b723 ("iommu/amd: Fix memory leak in free_pagetable") Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e1c7e9e510451..ca9f4edbb9409 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1869,9 +1869,15 @@ static void free_pt_##LVL (unsigned long __pt) \ pt = (u64 *)__pt; \ \ for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ if (!IOMMU_PTE_PRESENT(pt[i])) \ continue; \ \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ FN(p); \ } \ From 4b81f9f8194ffa58b27d269233c9f778b758cb2b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 12 Jun 2015 11:45:02 +0200 Subject: [PATCH 0043/2091] mmc: sdhci: fix low memory corruption commit 62a7f368ffbc13d9aedfdd7aeae711b177db69ac upstream. When dma mapping (dma_map_sg) fails in sdhci_pre_dma_transfer, -EINVAL is returned. There are 3 callers of sdhci_pre_dma_transfer: * sdhci_pre_req and sdhci_adma_table_pre: handle negative return * sdhci_prepare_data: handles 0 (error) and "else" (good) only sdhci_prepare_data is therefore broken. When it receives -EINVAL from sdhci_pre_dma_transfer, it assumes 1 sg mapping was mapped. Later, this non-existent mapping with address 0 is kmap'ped and written to: Corrupted low memory at ffff880000001000 (1000 phys) = 22b7d67df2f6d1cf Corrupted low memory at ffff880000001008 (1008 phys) = 63848a5216b7dd95 Corrupted low memory at ffff880000001010 (1010 phys) = 330eb7ddef39e427 Corrupted low memory at ffff880000001018 (1018 phys) = 8017ac7295039bda Corrupted low memory at ffff880000001020 (1020 phys) = 8ce039eac119074f ... So teach sdhci_prepare_data to understand negative return values from sdhci_pre_dma_transfer and disable DMA in that case, as well as for zero. It was introduced in 348487cb28e66b032bae1b38424d81bf5b444408 (mmc: sdhci: use pipeline mmc requests to improve performance). The commit seems to be suspicious also by assigning host->sg_count both in sdhci_pre_dma_transfer and sdhci_adma_table_pre. Signed-off-by: Jiri Slaby Fixes: 348487cb28e6 Cc: Ulf Hansson Cc: Haibo Chen Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c80287a027356..9231cdfe27575 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -848,7 +848,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) int sg_cnt; sg_cnt = sdhci_pre_dma_transfer(host, data, NULL); - if (sg_cnt == 0) { + if (sg_cnt <= 0) { /* * This only happens when someone fed * us an invalid request. From cd430d3e58e79d93597c26ee684730302940ae8c Mon Sep 17 00:00:00 2001 From: Joe Konno Date: Tue, 12 May 2015 07:59:42 -0700 Subject: [PATCH 0044/2091] intel_pstate: set BYT MSR with wrmsrl_on_cpu() commit 0dd23f94251f49da99a6cbfb22418b2d757d77d6 upstream. Commit 007bea098b86 (intel_pstate: Add setting voltage value for baytrail P states.) introduced byt_set_pstate() with the assumption that it would always be run by the CPU whose MSR is to be written by it. It turns out, however, that is not always the case in practice, so modify byt_set_pstate() to enforce the MSR write done by it to always happen on the right CPU. Fixes: 007bea098b86 (intel_pstate: Add setting voltage value for baytrail P states.) Signed-off-by: Joe Konno Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6414661ac1c46..c45d274a75c8b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -535,7 +535,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) val |= vid; - wrmsrl(MSR_IA32_PERF_CTL, val); + wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } #define BYT_BCLK_FREQS 5 From 805f18e0bcbf894c4412e3069c7ac4c3e1feb9bf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 4 Jun 2015 15:57:25 -0400 Subject: [PATCH 0045/2091] selinux: fix setting of security labels on NFS commit 9fc2b4b436cff7d8403034676014f1be9d534942 upstream. Before calling into the filesystem, vfs_setxattr calls security_inode_setxattr, which ends up calling selinux_inode_setxattr in our case. That returns -EOPNOTSUPP whenever SBLABEL_MNT is not set. SBLABEL_MNT was supposed to be set by sb_finish_set_opts, which sets it only if selinux_is_sblabel_mnt returns true. The selinux_is_sblabel_mnt logic was broken by eadcabc697e9 "SELinux: do all flags twiddling in one place", which didn't take into the account the SECURITY_FS_USE_NATIVE behavior that had been introduced for nfs with eb9ae686507b "SELinux: Add new labeling type native labels". This caused setxattr's of security labels over NFSv4.2 to fail. Cc: Eric Paris Cc: David Quigley Reported-by: Richard Chan Signed-off-by: J. Bruce Fields Acked-by: Stephen Smalley [PM: added the stable dependency] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7dade28affba5..212070e1de1ac 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -403,6 +403,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) return sbsec->behavior == SECURITY_FS_USE_XATTR || sbsec->behavior == SECURITY_FS_USE_TRANS || sbsec->behavior == SECURITY_FS_USE_TASK || + sbsec->behavior == SECURITY_FS_USE_NATIVE || /* Special handling. Genfs but also in-core setxattr handler */ !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || From c8bdf091472978ecfc884148b50be5f93761fdf9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Mar 2015 10:59:43 +0000 Subject: [PATCH 0046/2091] arm: KVM: force execution of HCPTR access on VM exit commit 85e84ba31039595995dae80b277378213602891b upstream. On VM entry, we disable access to the VFP registers in order to perform a lazy save/restore of these registers. On VM exit, we restore access, test if we did enable them before, and save/restore the guest/host registers if necessary. In this sequence, the FPEXC register is always accessed, irrespective of the trapping configuration. If the guest didn't touch the VFP registers, then the HCPTR access has now enabled such access, but we're missing a barrier to ensure architectural execution of the new HCPTR configuration. If the HCPTR access has been delayed/reordered, the subsequent access to FPEXC will cause a trap, which we aren't prepared to handle at all. The same condition exists when trapping to enable VFP for the guest. The fix is to introduce a barrier after enabling VFP access. In the vmexit case, it can be relaxed to only takes place if the guest hasn't accessed its view of the VFP registers, making the access to FPEXC safe. The set_hcptr macro is modified to deal with both vmenter/vmexit and vmtrap operations, and now takes an optional label that is branched to when the guest hasn't touched the VFP registers. Reported-by: Vikram Sethi Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/interrupts.S | 10 ++++------ arch/arm/kvm/interrupts_head.S | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 79caf79b304a0..f7db3a5d80e3b 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -170,13 +170,9 @@ __kvm_vcpu_return: @ Don't trap coprocessor accesses for host kernel set_hstr vmexit set_hdcr vmexit - set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore #ifdef CONFIG_VFPv3 - @ Save floating point registers we if let guest use them. - tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) - bne after_vfp_restore - @ Switch VFP/NEON hardware state to the host's add r7, vcpu, #VCPU_VFP_GUEST store_vfp_state r7 @@ -188,6 +184,8 @@ after_vfp_restore: @ Restore FPEXC_EN which we clobbered on entry pop {r2} VFPFMXR FPEXC, r2 +#else +after_vfp_restore: #endif @ Reset Hyp-role @@ -483,7 +481,7 @@ switch_to_guest_vfp: push {r3-r7} @ NEON/VFP used. Turn on VFP access. - set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11)) @ Switch VFP/NEON hardware state to the guest's add r7, r0, #VCPU_VFP_HOST diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 35e4a3a0c476c..48efe2ee452cf 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -591,8 +591,13 @@ ARM_BE8(rev r6, r6 ) .endm /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return - * (hardware reset value is 0). Keep previous value in r2. */ -.macro set_hcptr operation, mask + * (hardware reset value is 0). Keep previous value in r2. + * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if + * VFP wasn't already enabled (always executed on vmtrap). + * If a label is specified with vmexit, it is branched to if VFP wasn't + * enabled. + */ +.macro set_hcptr operation, mask, label = none mrc p15, 4, r2, c1, c1, 2 ldr r3, =\mask .if \operation == vmentry @@ -601,6 +606,17 @@ ARM_BE8(rev r6, r6 ) bic r3, r2, r3 @ Don't trap defined coproc-accesses .endif mcr p15, 4, r3, c1, c1, 2 + .if \operation != vmentry + .if \operation == vmexit + tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + beq 1f + .endif + isb + .if \label != none + b \label + .endif +1: + .endif .endm /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return From 3f3587c4ff8c828aac436237aeca8694a26defd3 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 10 Jun 2015 15:19:24 +0100 Subject: [PATCH 0047/2091] ARM: kvm: psci: fix handling of unimplemented functions commit e2d997366dc5b6c9d14035867f73957f93e7578c upstream. According to the PSCI specification and the SMC/HVC calling convention, PSCI function_ids that are not implemented must return NOT_SUPPORTED as return value. Current KVM implementation takes an unhandled PSCI function_id as an error and injects an undefined instruction into the guest if PSCI implementation is called with a function_id that is not handled by the resident PSCI version (ie it is not implemented), which is not the behaviour expected by a guest when calling a PSCI function_id that is not implemented. This patch fixes this issue by returning NOT_SUPPORTED whenever the kvm PSCI call is executed for a function_id that is not implemented by the PSCI kvm layer. Cc: Christoffer Dall Acked-by: Sudeep Holla Signed-off-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/psci.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 02fa8eff6ae1d..531e922486b2f 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -230,10 +230,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; - case PSCI_0_2_FN_MIGRATE: - case PSCI_0_2_FN64_MIGRATE: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_MIGRATE_INFO_TYPE: /* * Trusted OS is MP hence does not require migration @@ -242,10 +238,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) */ val = PSCI_0_2_TOS_MP; break; - case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU: - case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: - val = PSCI_RET_NOT_SUPPORTED; - break; case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* @@ -271,7 +263,8 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ret = 0; break; default: - return -EINVAL; + val = PSCI_RET_NOT_SUPPORTED; + break; } *vcpu_reg(vcpu, 0) = val; @@ -291,12 +284,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) case KVM_PSCI_FN_CPU_ON: val = kvm_psci_vcpu_on(vcpu); break; - case KVM_PSCI_FN_CPU_SUSPEND: - case KVM_PSCI_FN_MIGRATE: + default: val = PSCI_RET_NOT_SUPPORTED; break; - default: - return -EINVAL; } *vcpu_reg(vcpu, 0) = val; From 3544f27efa25890516158c8e13d21e878969d125 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jan 2015 13:58:57 +0300 Subject: [PATCH 0048/2091] ARM: tegra20: Store CPU "resettable" status in IRAM commit 4d48edb3c3e1234d6b3fcdfb9ac24d7c6de449cb upstream. Commit 7232398abc6a ("ARM: tegra: Convert PMC to a driver") changed tegra_resume() location storing from late to early and, as a result, broke suspend on Tegra20. PMC scratch register 41 is used by tegra LP1 resume code for retrieving stored physical memory address of common resume function and in the same time used by tegra20_cpu_shutdown() (shared by Tegra20 cpuidle driver and platform SMP code), which is storing CPU1 "resettable" status. It implies strict order of scratch register usage, otherwise resume function address is lost on Tegra20 after disabling non-boot CPU's on suspend. Fix it by storing "resettable" status in IRAM instead of PMC scratch register. Signed-off-by: Dmitry Osipenko Fixes: 7232398abc6a (ARM: tegra: Convert PMC to a driver) Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tegra/cpuidle-tegra20.c | 5 ++-- arch/arm/mach-tegra/reset-handler.S | 10 +++++--- arch/arm/mach-tegra/reset.h | 4 +++ arch/arm/mach-tegra/sleep-tegra20.S | 37 +++++++++++++++------------ arch/arm/mach-tegra/sleep.h | 4 +++ 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 88de2dce2e872..7469347b17493 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -34,6 +34,7 @@ #include "iomap.h" #include "irq.h" #include "pm.h" +#include "reset.h" #include "sleep.h" #ifdef CONFIG_PM_SLEEP @@ -70,15 +71,13 @@ static struct cpuidle_driver tegra_idle_driver = { #ifdef CONFIG_PM_SLEEP #ifdef CONFIG_SMP -static void __iomem *pmc = IO_ADDRESS(TEGRA_PMC_BASE); - static int tegra20_reset_sleeping_cpu_1(void) { int ret = 0; tegra_pen_lock(); - if (readl(pmc + PMC_SCRATCH41) == CPU_RESETTABLE) + if (readb(tegra20_cpu1_resettable_status) == CPU_RESETTABLE) tegra20_cpu_shutdown(1); else ret = -EINVAL; diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 71be4af5e975b..e3070fdab80b8 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -169,10 +169,10 @@ after_errata: cmp r6, #TEGRA20 bne 1f /* If not CPU0, don't let CPU0 reset CPU1 now that CPU1 is coming up. */ - mov32 r5, TEGRA_PMC_BASE - mov r0, #0 + mov32 r5, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + mov r0, #CPU_NOT_RESETTABLE cmp r10, #0 - strne r0, [r5, #PMC_SCRATCH41] + strneb r0, [r5, #__tegra20_cpu1_resettable_status_offset] 1: #endif @@ -281,6 +281,10 @@ __tegra_cpu_reset_handler_data: .rept TEGRA_RESET_DATA_SIZE .long 0 .endr + .globl __tegra20_cpu1_resettable_status_offset + .equ __tegra20_cpu1_resettable_status_offset, \ + . - __tegra_cpu_reset_handler_start + .byte 0 .align L1_CACHE_SHIFT ENTRY(__tegra_cpu_reset_handler_end) diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h index 76a93434c6ee0..29c3dec0126a4 100644 --- a/arch/arm/mach-tegra/reset.h +++ b/arch/arm/mach-tegra/reset.h @@ -35,6 +35,7 @@ extern unsigned long __tegra_cpu_reset_handler_data[TEGRA_RESET_DATA_SIZE]; void __tegra_cpu_reset_handler_start(void); void __tegra_cpu_reset_handler(void); +void __tegra20_cpu1_resettable_status_offset(void); void __tegra_cpu_reset_handler_end(void); void tegra_secondary_startup(void); @@ -47,6 +48,9 @@ void tegra_secondary_startup(void); (IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \ ((u32)&__tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_LP2] - \ (u32)__tegra_cpu_reset_handler_start))) +#define tegra20_cpu1_resettable_status \ + (IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \ + (u32)__tegra20_cpu1_resettable_status_offset)) #endif #define tegra_cpu_reset_handler_offset \ diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S index be4bc5f853f5c..e6b684e14322c 100644 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ b/arch/arm/mach-tegra/sleep-tegra20.S @@ -97,9 +97,10 @@ ENDPROC(tegra20_hotplug_shutdown) ENTRY(tegra20_cpu_shutdown) cmp r0, #0 reteq lr @ must not be called for CPU 0 - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_RESETTABLE - str r12, [r1] + strb r12, [r1, r2] cpu_to_halt_reg r1, r0 ldr r3, =TEGRA_FLOW_CTRL_VIRT @@ -182,38 +183,41 @@ ENDPROC(tegra_pen_unlock) /* * tegra20_cpu_clear_resettable(void) * - * Called to clear the "resettable soon" flag in PMC_SCRATCH41 when + * Called to clear the "resettable soon" flag in IRAM variable when * it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_clear_resettable) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_NOT_RESETTABLE - str r12, [r1] + strb r12, [r1, r2] ret lr ENDPROC(tegra20_cpu_clear_resettable) /* * tegra20_cpu_set_resettable_soon(void) * - * Called to set the "resettable soon" flag in PMC_SCRATCH41 when + * Called to set the "resettable soon" flag in IRAM variable when * it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_set_resettable_soon) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset mov r12, #CPU_RESETTABLE_SOON - str r12, [r1] + strb r12, [r1, r2] ret lr ENDPROC(tegra20_cpu_set_resettable_soon) /* * tegra20_cpu_is_resettable_soon(void) * - * Returns true if the "resettable soon" flag in PMC_SCRATCH41 has been + * Returns true if the "resettable soon" flag in IRAM variable has been * set because it is expected that the secondary CPU will be idle soon. */ ENTRY(tegra20_cpu_is_resettable_soon) - mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 - ldr r12, [r1] + mov32 r1, TEGRA_IRAM_RESET_BASE_VIRT + ldr r2, =__tegra20_cpu1_resettable_status_offset + ldrb r12, [r1, r2] cmp r12, #CPU_RESETTABLE_SOON moveq r0, #1 movne r0, #0 @@ -256,9 +260,10 @@ ENTRY(tegra20_sleep_cpu_secondary_finish) mov r0, #TEGRA_FLUSH_CACHE_LOUIS bl tegra_disable_clean_inv_dcache - mov32 r0, TEGRA_PMC_VIRT + PMC_SCRATCH41 + mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT + ldr r4, =__tegra20_cpu1_resettable_status_offset mov r3, #CPU_RESETTABLE - str r3, [r0] + strb r3, [r0, r4] bl tegra_cpu_do_idle @@ -274,10 +279,10 @@ ENTRY(tegra20_sleep_cpu_secondary_finish) bl tegra_pen_lock - mov32 r3, TEGRA_PMC_VIRT - add r0, r3, #PMC_SCRATCH41 + mov32 r0, TEGRA_IRAM_RESET_BASE_VIRT + ldr r4, =__tegra20_cpu1_resettable_status_offset mov r3, #CPU_NOT_RESETTABLE - str r3, [r0] + strb r3, [r0, r4] bl tegra_pen_unlock diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h index 92d46ec1361ab..0d59360d891da 100644 --- a/arch/arm/mach-tegra/sleep.h +++ b/arch/arm/mach-tegra/sleep.h @@ -18,6 +18,7 @@ #define __MACH_TEGRA_SLEEP_H #include "iomap.h" +#include "irammap.h" #define TEGRA_ARM_PERIF_VIRT (TEGRA_ARM_PERIF_BASE - IO_CPU_PHYS \ + IO_CPU_VIRT) @@ -29,6 +30,9 @@ + IO_APB_VIRT) #define TEGRA_PMC_VIRT (TEGRA_PMC_BASE - IO_APB_PHYS + IO_APB_VIRT) +#define TEGRA_IRAM_RESET_BASE_VIRT (IO_IRAM_VIRT + \ + TEGRA_IRAM_RESET_HANDLER_OFFSET) + /* PMC_SCRATCH37-39 and 41 are used for tegra_pen_lock and idle */ #define PMC_SCRATCH37 0x130 #define PMC_SCRATCH38 0x134 From 301773b62355e8a6028a8023be2d04c7317915d9 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 16 Jun 2015 14:12:57 +0200 Subject: [PATCH 0049/2091] ARM: mvebu: fix suspend to RAM on big-endian configurations commit 2f5bc307be2480ba89e4c5d118f406f04a4a7299 upstream. The current Armada XP suspend to RAM implementation, as added in commit 27432825ae19f ("ARM: mvebu: Armada XP GP specific suspend/resume code") does not handle big-endian configurations properly: the small bit of assembly code putting the DRAM in self-refresh and toggling the GPIOs to turn off power forgets to convert the values to little-endian. This commit fixes that by making sure the two values we will write to the DRAM controller register and GPIO register are already in little-endian before entering the critical assembly code. Signed-off-by: Thomas Petazzoni Fixes: 27432825ae19f ("ARM: mvebu: Armada XP GP specific suspend/resume code") Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mvebu/pm-board.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c index 6dfd4ab97b2aa..301ab38d38ba8 100644 --- a/arch/arm/mach-mvebu/pm-board.c +++ b/arch/arm/mach-mvebu/pm-board.c @@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd) for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) ackcmd |= BIT(pic_raw_gpios[i]); + srcmd = cpu_to_le32(srcmd); + ackcmd = cpu_to_le32(ackcmd); + /* * Wait a while, the PIC needs quite a bit of time between the * two GPIO commands. From b9d118e11a1b94c155a1cc985f1a2d3a9e03da2f Mon Sep 17 00:00:00 2001 From: preeti Date: Wed, 24 Jun 2015 01:48:01 -0500 Subject: [PATCH 0050/2091] tick/idle/powerpc: Do not register idle states with CPUIDLE_FLAG_TIMER_STOP set in periodic mode commit cc5a2f7b8f39e7db559778f7913a2410257b3e50 upstream. On some archs, the local clockevent device stops in deep cpuidle states. The broadcast framework is used to wakeup cpus in these idle states, in which either an external clockevent device is used to send wakeup ipis or the hrtimer broadcast framework kicks in in the absence of such a device. One cpu is nominated as the broadcast cpu and this cpu sends wakeup ipis to sleeping cpus at the appropriate time. This is the implementation in the oneshot mode of broadcast. In periodic mode of broadcast however, the presence of such cpuidle states results in the cpuidle driver calling tick_broadcast_enable() which shuts down the local clockevent devices of all the cpus and appoints the tick broadcast device as the clockevent device for each of them. This works on those archs where the tick broadcast device is a real clockevent device. But on archs which depend on the hrtimer mode of broadcast, the tick broadcast device hapens to be a pseudo device. The consequence is that the local clockevent devices of all cpus are shutdown and the kernel hangs at boot time in periodic mode. Let us thus not register the cpuidle states which have CPUIDLE_FLAG_TIMER_STOP flag set, on archs which depend on the hrtimer mode of broadcast in periodic mode. This patch takes care of doing this on powerpc. The cpus would not have entered into such deep cpuidle states in periodic mode on powerpc anyway. So there is no loss here. Signed-off-by: Preeti U Murthy Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpuidle/cpuidle-powernv.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 59372077ec7c1..3442764a52938 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -60,6 +60,8 @@ static int nap_loop(struct cpuidle_device *dev, return index; } +/* Register for fastsleep only in oneshot mode of broadcast */ +#ifdef CONFIG_TICK_ONESHOT static int fastsleep_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -83,7 +85,7 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; } - +#endif /* * States for dedicated partition case. */ @@ -209,7 +211,14 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; powernv_states[nr_idle_states].enter = &nap_loop; - } else if (flags[i] & OPAL_PM_SLEEP_ENABLED || + } + + /* + * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come + * within this config dependency check. + */ +#ifdef CONFIG_TICK_ONESHOT + if (flags[i] & OPAL_PM_SLEEP_ENABLED || flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ strcpy(powernv_states[nr_idle_states].name, "FastSleep"); @@ -218,7 +227,7 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].target_residency = 300000; powernv_states[nr_idle_states].enter = &fastsleep_loop; } - +#endif powernv_states[nr_idle_states].exit_latency = ((unsigned int)latency_ns[i]) / 1000; From f6707abd213039644950edb760d5c46503f065fa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:10:24 +1000 Subject: [PATCH 0051/2091] powerpc/perf: Fix book3s kernel to userspace backtraces commit 72e349f1124a114435e599479c9b8d14bfd1ebcd upstream. When we take a PMU exception or a software event we call perf_read_regs(). This overloads regs->result with a boolean that describes if we should use the sampled instruction address register (SIAR) or the regs. If the exception is in kernel, we start with the kernel regs and backtrace through the kernel stack. At this point we switch to the userspace regs and backtrace the user stack with perf_callchain_user(). Unfortunately these regs have not got the perf_read_regs() treatment, so regs->result could be anything. If it is non zero, perf_instruction_pointer() decides to use the SIAR, and we get issues like this: 0.11% qemu-system-ppc [kernel.kallsyms] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--52.35%-- 0 | | | |--46.39%-- __hrtimer_start_range_ns | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | | | | |--67.08%-- _raw_spin_lock_irqsave <--- hi mum | | | | | | | --100.00%-- 0x7e714 | | | 0x7e714 Notice the bogus _raw_spin_irqsave when we transition from kernel (system_call) to userspace (0x7e714). We inserted what was in the SIAR. Add a check in regs_use_siar() to check that the regs in question are from a PMU exception. With this fix the backtrace makes sense: 0.47% qemu-system-ppc [kernel.vmlinux] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--53.83%-- 0 | | | |--44.73%-- hrtimer_try_to_cancel | | kvmppc_start_thread | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | __ioctl | | 0x7e714 | | 0x7e714 Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 12b638425bb9b..d90893b76e7ce 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { } static bool regs_use_siar(struct pt_regs *regs) { - return !!regs->result; + /* + * When we take a performance monitor exception the regs are setup + * using perf_read_regs() which overloads some fields, in particular + * regs->result to tell us whether to use SIAR. + * + * However if the regs are from another exception, eg. a syscall, then + * they have not been setup using perf_read_regs() and so regs->result + * is something random. + */ + return ((TRAP(regs) == 0xf00) && regs->result); } /* From 9dc6d43528e147f9d243df8da6ccf54c7ee244de Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 9 Jun 2015 17:31:38 -0500 Subject: [PATCH 0052/2091] x86/PCI: Use host bridge _CRS info on systems with >32 bit addressing commit 3d9fecf6bfb8b12bc2f9a4c7109895a2a2bb9436 upstream. We enable _CRS on all systems from 2008 and later. On older systems, we ignore _CRS and assume the whole physical address space (excluding RAM and other devices) is available for PCI devices, but on systems that support physical address spaces larger than 4GB, it's doubtful that the area above 4GB is really available for PCI. After d56dbf5bab8c ("PCI: Allocate 64-bit BARs above 4G when possible"), we try to use that space above 4GB *first*, so we're more likely to put a device there. On Juan's Toshiba Satellite Pro U200, BIOS left the graphics, sound, 1394, and card reader devices unassigned (but only after Windows had been booted). Only the sound device had a 64-bit BAR, so it was the only device placed above 4GB, and hence the only device that didn't work. Keep _CRS enabled even on pre-2008 systems if they support physical address space larger than 4GB. Fixes: d56dbf5bab8c ("PCI: Allocate 64-bit BARs above 4G when possible") Reported-and-tested-by: Juan Dayer Reported-and-tested-by: Alan Horsfield Link: https://bugzilla.kernel.org/show_bug.cgi?id=99221 Link: https://bugzilla.opensuse.org/show_bug.cgi?id=907092 Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/acpi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 14a63ed6fe092..546f18ad09f07 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -121,8 +121,10 @@ void __init pci_acpi_crs_quirks(void) { int year; - if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) - pci_use_crs = false; + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) { + if (iomem_resource.end <= 0xffffffff) + pci_use_crs = false; + } dmi_check_system(pci_crs_quirks); From b6f2faffa0a189c28a2f2242c0b23fb031ba3075 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 9 Jun 2015 18:54:07 -0500 Subject: [PATCH 0053/2091] x86/PCI: Use host bridge _CRS info on Foxconn K8M890-8237A commit 1dace0116d0b05c967d94644fc4dfe96be2ecd3d upstream. The Foxconn K8M890-8237A has two PCI host bridges, and we can't assign resources correctly without the information from _CRS that tells us which address ranges are claimed by which bridge. In the bugs mentioned below, we incorrectly assign a sound card address (this example is from 1033299): bus: 00 index 2 [mem 0x80000000-0xfcffffffff] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-7f]) pci_root PNP0A08:00: host bridge window [mem 0x80000000-0xbfefffff] (ignored) pci_root PNP0A08:00: host bridge window [mem 0xc0000000-0xdfffffff] (ignored) pci_root PNP0A08:00: host bridge window [mem 0xf0000000-0xfebfffff] (ignored) ACPI: PCI Root Bridge [PCI1] (domain 0000 [bus 80-ff]) pci_root PNP0A08:01: host bridge window [mem 0xbff00000-0xbfffffff] (ignored) pci 0000:80:01.0: [1106:3288] type 0 class 0x000403 pci 0000:80:01.0: reg 10: [mem 0xbfffc000-0xbfffffff 64bit] pci 0000:80:01.0: address space collision: [mem 0xbfffc000-0xbfffffff 64bit] conflicts with PCI Bus #00 [mem 0x80000000-0xfcffffffff] pci 0000:80:01.0: BAR 0: assigned [mem 0xfd00000000-0xfd00003fff 64bit] BUG: unable to handle kernel paging request at ffffc90000378000 IP: [] azx_create+0x37c/0x822 [snd_hda_intel] We assigned 0xfd_0000_0000, but that is not in any of the host bridge windows, and the sound card doesn't work. Turn on pci=use_crs automatically for this system. Link: https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 Link: https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/acpi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 546f18ad09f07..ff9911707160a 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -81,6 +81,17 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), }, }, + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */ + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */ + { + .callback = set_use_crs, + .ident = "Foxconn K8M890-8237A", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"), + DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, /* Now for the blacklist.. */ From dff1316f4f15d9c3ff3ff9c11f2c92e0a2040a49 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 May 2015 08:35:43 +0200 Subject: [PATCH 0054/2091] KVM: mips: use id_to_memslot correctly commit 69a1220060c1523fd0515216eaa29e22f133b894 upstream. The argument to KVM_GET_DIRTY_LOG is a memslot id; it may not match the position in the memslots array, which is sorted by gfn. Reviewed-by: James Hogan Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bb68e8d520e83..52f205ae12815 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -982,7 +982,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = &kvm->memslots->memslots[log->slot]; + memslot = id_to_memslot(kvm->memslots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); From 4f3d3bc20372afb29708307608946e2bb56a5a64 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 27 Apr 2015 15:07:16 +0100 Subject: [PATCH 0055/2091] MIPS: Fix KVM guest fixmap address commit 8e748c8d09a9314eedb5c6367d9acfaacddcdc88 upstream. KVM guest kernels for trap & emulate run in user mode, with a modified set of kernel memory segments. However the fixmap address is still in the normal KSeg3 region at 0xfffe0000 regardless, causing problems when cache alias handling makes use of them when handling copy on write. Therefore define FIXADDR_TOP as 0x7ffe0000 in the guest kernel mapped region when CONFIG_KVM_GUEST is defined. Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9887/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mach-generic/spaces.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h index 9488fa5f88660..afc96ecb90042 100644 --- a/arch/mips/include/asm/mach-generic/spaces.h +++ b/arch/mips/include/asm/mach-generic/spaces.h @@ -94,7 +94,11 @@ #endif #ifndef FIXADDR_TOP +#ifdef CONFIG_KVM_GUEST +#define FIXADDR_TOP ((unsigned long)(long)(int)0x7ffe0000) +#else #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif +#endif #endif /* __ASM_MACH_GENERIC_SPACES_H */ From 14fe2f14d73609cfdb19b71467d085e5347f7353 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 30 Apr 2015 13:33:59 +0200 Subject: [PATCH 0056/2091] KVM: s390: fix external call injection without sigp interpretation commit b938eacea0b6881f2116a061e6da3ec840e75137 upstream. Commit ea5f49692575 ("KVM: s390: only one external call may be pending at a time") introduced a bug on machines that don't have SIGP interpretation facility installed. The injection of an external call will now always fail with -EBUSY (if none is already pending). This leads to the following symptoms: - An external call will be injected but with the wrong "src cpu id", as this id will not be remembered. - The target vcpu will not be woken up, therefore the guest will hang if it cannot deal with unexpected failures of the SIGP EXTERNAL CALL instruction. - If an external call is already pending, -EBUSY will not be reported. Reviewed-by: Christian Borntraeger Reviewed-by: Jens Freimann Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9de47265ef73d..2836b25f63d3b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1061,7 +1061,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) if (sclp_has_sigpif()) return __inject_extcall_sigpif(vcpu, src_id); - if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) + if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; *extcall = irq->u.extcall; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); From 534c9f9886c25221d4f4dd396cd78b67214c1796 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 22 Jun 2015 13:20:12 +0200 Subject: [PATCH 0057/2091] KVM: s390: clear floating interrupt bitmap and parameters commit f2ae45edbca7ba5324eef01719ede0151dc5cead upstream. commit 6d3da24141 ("KVM: s390: deliver floating interrupts in order of priority") introduced a regression for the reset handling. We don't clear the bitmap of pending floating interrupts and interrupt parameters. This could result in stale interrupts even after a reset. Let's fix this by clearing the pending bitmap and the parameters for service and machine check interrupts. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2836b25f63d3b..b745a109bfc15 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1606,6 +1606,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) int i; spin_lock(&fi->lock); + fi->pending_irqs = 0; + memset(&fi->srv_signal, 0, sizeof(fi->srv_signal)); + memset(&fi->mchk, 0, sizeof(fi->mchk)); for (i = 0; i < FIRQ_LIST_COUNT; i++) clear_irq_list(&fi->lists[i]); for (i = 0; i < FIRQ_MAX_COUNT; i++) From 17de07163877d0f1fdd9185130ac4111f5f340f7 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 21 May 2015 15:39:31 +0200 Subject: [PATCH 0058/2091] s390/bpf: Fix backward jumps commit b035b60ded132592055c0f9bd1cc280259c7de4b upstream. Currently all backward jumps crash for JITed s390x eBPF programs with an illegal instruction program check and kernel panic. Because for negative values the opcode of the jump instruction is overriden by the negative branch offset an illegal instruction is generated by the JIT: 000003ff802da378: c01100000002 lgfi %r1,2 000003ff802da37e: fffffff52065 unknown <-- illegal instruction 000003ff802da384: b904002e lgr %r2,%r14 So fix this and mask the offset in order not to damage the opcode. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 55423d8be5801..9afb9d602f841 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -227,7 +227,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) ({ \ /* Branch instruction needs 6 bytes */ \ int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ - _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \ + _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) From 2c7a81c60df147a14735ca3c0f039cf4507b65fc Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 11 Jun 2015 19:59:04 +0200 Subject: [PATCH 0059/2091] s390/kdump: fix REGSET_VX_LOW vector register ELF notes commit 3c8e5105e759e7b2d88ea8a85b1285e535bc7500 upstream. The REGSET_VX_LOW ELF notes should contain the lower 64 bit halfes of the first sixteen 128 bit vector registers. Unfortunately currently we copy the upper halfes. Fix this and correctly copy the lower halfes. Fixes: a62bc0739253 ("s390/kdump: add support for vector extension") Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/crash_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f73c80590229..49b74454d7ee6 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -415,7 +415,7 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) ptr += len; /* Copy lower halves of SIMD registers 0-15 */ for (i = 0; i < 16; i++) { - memcpy(ptr, &vx_regs[i], 8); + memcpy(ptr, &vx_regs[i].u[2], 8); ptr += 8; } return ptr; From aae3d9fecc22c1c7565c94f0818704a1908543ce Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 29 Jun 2015 16:44:01 +0200 Subject: [PATCH 0060/2091] KVM: s390: virtio-ccw: don't overwrite config space values commit 431dae778aea4eed31bd12e5ee82edc571cd4d70 upstream. Eric noticed problems with vhost-scsi and virtio-ccw: vhost-scsi complained about overwriting values in the config space, which was triggered by a broken implementation of virtio-ccw's config get/set routines. It was probably sheer luck that we did not hit this before. When writing a value to the config space, the WRITE_CONF ccw will always write from the beginning of the config space up to and including the value to be set. If the config space up to the value has not yet been retrieved from the device, however, we'll end up overwriting values. Keep track of the known config space and update if needed to avoid this. Moreover, READ_CONF will only read the number of bytes it has been instructed to retrieve, so we must not copy more than that to the buffer, or we might overwrite trailing values. Reported-by: Eric Farman Signed-off-by: Cornelia Huck Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- drivers/s390/kvm/virtio_ccw.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 6f1fa1773e764..f8d8fdb26b72a 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -65,6 +65,7 @@ struct virtio_ccw_device { bool is_thinint; bool going_away; bool device_lost; + unsigned int config_ready; void *airq_info; }; @@ -833,8 +834,11 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; - memcpy(vcdev->config, config_area, sizeof(vcdev->config)); - memcpy(buf, &vcdev->config[offset], len); + memcpy(vcdev->config, config_area, offset + len); + if (buf) + memcpy(buf, &vcdev->config[offset], len); + if (vcdev->config_ready < offset + len) + vcdev->config_ready = offset + len; out_free: kfree(config_area); @@ -857,6 +861,9 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, if (!config_area) goto out_free; + /* Make sure we don't overwrite fields. */ + if (vcdev->config_ready < offset) + virtio_ccw_get_config(vdev, 0, NULL, offset); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); From da75b45af7778af80c7c1d9853868e9bb4b75065 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Jun 2015 14:43:35 +0100 Subject: [PATCH 0061/2091] KVM: arm/arm64: vgic: Avoid injecting reserved IRQ numbers commit 4839ddc27b7212ec58874f62c97da7400c8523be upstream. Commit fd1d0ddf2ae9 (KVM: arm/arm64: check IRQ number on userland injection) rightly limited the range of interrupts userspace can inject in a guest, but failed to consider the (unlikely) case where a guest is configured with 1024 interrupts. In this case, interrupts ranging from 1020 to 1023 are unuseable, as they have a special meaning for the GIC CPU interface. Make sure that these number cannot be used as an IRQ. Also delete a redundant (and similarily buggy) check in kvm_set_irq. Reported-by: Peter Maydell Cc: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 78fb8201014f7..950064a0942dd 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1561,7 +1561,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, goto out; } - if (irq_num >= kvm->arch.vgic.nr_irqs) + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) return -EINVAL; vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); @@ -2161,10 +2161,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, BUG_ON(!vgic_initialized(kvm)); - if (spi > kvm->arch.vgic.nr_irqs) - return -EINVAL; return kvm_vgic_inject_irq(kvm, 0, spi, level); - } /* MSI not implemented yet */ From 58382447b9a9989da551a7b17e72756f6e238bb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 30 Jun 2015 22:19:17 +0200 Subject: [PATCH 0062/2091] KVM: x86: properly restore LVT0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit db1385624c686fe99fe2d1b61a36e1537b915d08 upstream. Legacy NMI watchdog didn't work after migration/resume, because vapics_in_nmi_mode was left at 0. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4c7deb4f78a14..556397ee48096 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1808,6 +1808,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); apic_update_lvtt(apic); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; From 8ed8b759437fadfd18202be9bb65a7f80c3c6d63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 1 Jul 2015 15:31:49 +0200 Subject: [PATCH 0063/2091] KVM: x86: make vapics_in_nmi_mode atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 42720138b06301cc8a7ee8a495a6d021c4b6a9bc upstream. Writes were a bit racy, but hard to turn into a bug at the same time. (Particularly because modern Linux doesn't use this feature anymore.) Signed-off-by: Radim Krčmář [Actually the next patch makes it much, much easier to trigger the race so I'm including this one for stable@ as well. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/i8254.c | 2 +- arch/x86/kvm/lapic.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4a555beef190..41b06fca39f70 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -591,7 +591,7 @@ struct kvm_arch { struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - int vapics_in_nmi_mode; + atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4dce6f8b6129e..f90952f64e796 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -305,7 +305,7 @@ static void pit_do_work(struct kthread_work *work) * LVT0 to NMI delivery. Other PIC interrupts are just sent to * VCPU0, and only if its LVT0 is in EXTINT mode. */ - if (kvm->arch.vapics_in_nmi_mode > 0) + if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 556397ee48096..67d07e0514368 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1250,10 +1250,10 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) if (!nmi_wd_enabled) { apic_debug("Receive NMI setting on APIC_LVT0 " "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; + atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) From 4c1215740834bef3dec9e1ef9c4f5bbe86f1f6cb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:52 +0200 Subject: [PATCH 0064/2091] fs: Fix S_NOSEC handling commit 2426f3910069ed47c0cc58559a6d088af7920201 upstream. file_remove_suid() could mistakenly set S_NOSEC inode bit when root was modifying the file. As a result following writes to the file by ordinary user would avoid clearing suid or sgid bits. Fix the bug by checking actual mode bits before setting S_NOSEC. Signed-off-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index ea37cd17b53f0..6e342cadef81b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1693,8 +1693,8 @@ int file_remove_suid(struct file *file) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; + if (!error) + inode_has_no_xattr(inode); return error; } From 60c92e3205a6eb78c7970bf370cc9c550bd0c132 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Jun 2015 10:09:32 +1000 Subject: [PATCH 0065/2091] fs/ufs: revert "ufs: fix deadlocks introduced by sb mutex merge" commit 13b987ea275840d74d9df9a44326632fab1894da upstream. This reverts commit 9ef7db7f38d0 ("ufs: fix deadlocks introduced by sb mutex merge") That patch tried to solve commit 0244756edc4b98c ("ufs: sb mutex merge + mutex_destroy") which is itself partially reverted due to multiple deadlocks. Signed-off-by: Fabian Frederick Suggested-by: Jan Kara Cc: Ian Campbell Cc: Evgeniy Dushistov Cc: Alexey Khoroshilov Cc: Roger Pau Monne Cc: Ian Jackson Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ufs/inode.c | 5 ++++- fs/ufs/namei.c | 14 ++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index be7d42c7d9382..2d93ab07da8a6 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -902,6 +902,9 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) + if (want_delete) { + lock_ufs(inode->i_sb); ufs_free_inode(inode); + unlock_ufs(inode->i_sb); + } } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index e491a93a7e9af..1f5223c9e1e2c 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -128,12 +128,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; + lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_notlocked; + goto out; - lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -184,9 +184,13 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; + lock_ufs(dir->i_sb); + inode_inc_link_count(dir); + inode = ufs_new_inode(dir, S_IFDIR|mode); + err = PTR_ERR(inode); if (IS_ERR(inode)) - return PTR_ERR(inode); + goto out_dir; inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -194,9 +198,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -215,6 +216,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_dec_link_count(inode); unlock_new_inode(inode); iput (inode); +out_dir: inode_dec_link_count(dir); unlock_ufs(dir->i_sb); goto out; From 0da5a72210cbe177ab1043be058403ca49eb7a2b Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Jun 2015 10:09:32 +1000 Subject: [PATCH 0066/2091] fs/ufs: restore s_lock mutex commit cdd9eefdf905e92e7fc6cc393314efe68dc6ff66 upstream. Commit 0244756edc4b98c ("ufs: sb mutex merge + mutex_destroy") generated deadlocks in read/write mode on mkdir. This patch partially reverts it keeping fixes by Andrew Morton and mutex_destroy() [AV: fixed a missing bit in ufs_remount()] Signed-off-by: Fabian Frederick Reported-by: Ian Campbell Suggested-by: Jan Kara Cc: Ian Campbell Cc: Evgeniy Dushistov Cc: Alexey Khoroshilov Cc: Roger Pau Monne Cc: Ian Jackson Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/balloc.c | 34 +++++++++++++++++----------------- fs/ufs/ialloc.c | 16 ++++++++-------- fs/ufs/super.c | 10 ++++++++++ fs/ufs/ufs.h | 1 + 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2c1036080d527..a7106eda50241 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -51,8 +51,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); - - lock_ufs(sb); + + mutex_lock(&UFS_SB(sb)->s_lock); cgno = ufs_dtog(uspi, fragment); bit = ufs_dtogd(uspi, fragment); @@ -115,13 +115,13 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - - unlock_ufs(sb); + + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return; } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) goto failed; } - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); do_more: overflow = 0; @@ -211,12 +211,12 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) } ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed_unlock: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); failed: UFSD("EXIT (FAILED)\n"); return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, usb1 = ubh_get_usb_first(uspi); *err = -ENOSPC; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); tmp = ufs_data_ptr_to_cpu(sb, p); if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, "fragment %llu, tmp %llu\n", (unsigned long long)fragment, (unsigned long long)tmp); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return INVBLOCK; } if (fragment < UFS_I(inode)->i_lastfrag) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } else { if (tmp) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, * There is not enough space for user on the device */ if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, fragment + count); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return result; } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7caa016528883..fd0203ce1f7fd 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode) ino = inode->i_ino; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode) bit = ufs_inotocgoff (ino); ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sbi = UFS_SB(sb); uspi = sbi->s_uspi; - lock_ufs(sb); + mutex_lock(&sbi->s_lock); /* * Try to place the inode in its parent directory @@ -331,21 +331,21 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sync_dirty_buffer(bh); brelse(bh); } - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); clear_nlink(inode); unlock_new_inode(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); make_bad_inode(inode); iput (inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b3bc3e7ae79db..afe9955654c87 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -694,6 +694,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) unsigned flags; lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); UFSD("ENTER\n"); @@ -711,6 +712,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_put_cstotal(sb); UFSD("EXIT\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; @@ -1277,6 +1279,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) sync_filesystem(sb); lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1290,6 +1293,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } @@ -1297,12 +1301,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { pr_err("ufstype can't be changed during remount\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } @@ -1326,6 +1332,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ #ifndef CONFIG_UFS_FS_WRITE pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; #else @@ -1335,11 +1342,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { pr_err("this ufstype is read-only supported\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { pr_err("failed during remounting\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EPERM; } @@ -1347,6 +1356,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 2a07396d5f9eb..cf6368d42d4ab 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -30,6 +30,7 @@ struct ufs_sb_info { int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct mutex s_lock; }; struct ufs_inode_info { From 1eda16d166170124b56e4090075ce6997c3d43af Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 24 May 2015 09:25:00 -0500 Subject: [PATCH 0067/2091] vfs: Remove incorrect debugging WARN in prepend_path commit 93e3bce6287e1fb3e60d3324ed08555b5bbafa89 upstream. The warning message in prepend_path is unclear and outdated. It was added as a warning that the mechanism for generating names of pseudo files had been removed from prepend_path and d_dname should be used instead. Unfortunately the warning reads like a general warning, making it unclear what to do with it. Remove the warning. The transition it was added to warn about is long over, and I added code several years ago which in rare cases causes the warning to fire on legitimate code, and the warning is now firing and scaring people for no good reason. Reported-by: Ivan Delalande Reported-by: Omar Sandoval Fixes: f48cfddc6729e ("vfs: In d_path don't call d_dname on a mount point") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 37b5afdaf6989..50bb3c207621c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2927,17 +2927,6 @@ static int prepend_path(const struct path *path, vfsmnt = &mnt->mnt; continue; } - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || - dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, - dentry->d_name.name); - } if (!error) error = is_mounted(vfsmnt) ? 1 : 2; break; From c89d4319ae55186496c43b7a6e510aa1d09dd387 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 7 Jan 2015 08:10:09 -0600 Subject: [PATCH 0068/2091] vfs: Ignore unlocked mounts in fs_fully_visible commit ceeb0e5d39fcdf4dca2c997bf225c7fc49200b37 upstream. Limit the mounts fs_fully_visible considers to locked mounts. Unlocked can always be unmounted so considering them adds hassle but no security benefit. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1b9e11167baed..1d4a97c573e03 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3185,11 +3185,15 @@ bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; - /* This mount is not fully visible if there are any child mounts - * that cover anything except for empty directories. + /* This mount is not fully visible if there are any + * locked child mounts that cover anything except for + * empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; + /* Only worry about locked mounts */ + if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) + continue; if (!S_ISDIR(inode->i_mode)) goto next; if (inode->i_nlink > 2) From 918ef5dc2e9761f4cf34e8059e74cc2f834234ca Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jun 2015 14:52:04 +0200 Subject: [PATCH 0069/2091] ufs: Fix warning from unlock_new_inode() commit 12ecbb4b1d765a5076920999298d9625439dbe58 upstream. Commit e4502c63f56aeca88 (ufs: deal with nfsd/iget races) introduced unlock_new_inode() call into ufs_add_nondir(). However that function gets called also from ufs_link() which hands it already initialized inode and thus unlock_new_inode() complains. The problem is harmless but annoying. Fix the problem by opencoding necessary stuff in ufs_link() Fixes: e4502c63f56aeca887ced37f24e0def1ef11cec8 Signed-off-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 1f5223c9e1e2c..2346b83fa12ba 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -174,7 +174,12 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); ihold(inode); - error = ufs_add_nondir(dentry, inode); + error = ufs_add_link(dentry, inode); + if (error) { + inode_dec_link_count(inode); + iput(inode); + } else + d_instantiate(dentry, inode); unlock_ufs(dir->i_sb); return error; } From b94332a90b6572713d0a4ca92563e9655a7dd33d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 Jun 2015 11:26:34 +0200 Subject: [PATCH 0070/2091] ufs: Fix possible deadlock when looking up directories commit 514d748f69c97a51a2645eb198ac5c6218f22ff9 upstream. Commit e4502c63f56aeca88 (ufs: deal with nfsd/iget races) made ufs create inodes with I_NEW flag set. However ufs_mkdir() never cleared this flag. Thus if someone ever tried to lookup the directory by inode number, he would deadlock waiting for I_NEW to be cleared. Luckily this mostly happens only if the filesystem is exported over NFS since otherwise we have the inode attached to dentry and don't look it up by inode number. In rare cases dentry can get freed without inode being freed and then we'd hit the deadlock even without NFS export. Fix the problem by clearing I_NEW before instantiating new directory inode. Fixes: e4502c63f56aeca887ced37f24e0def1ef11cec8 Reported-by: Fabian Frederick Signed-off-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 2346b83fa12ba..60ee32249b726 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -212,6 +212,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) goto out_fail; unlock_ufs(dir->i_sb); + unlock_new_inode(inode); d_instantiate(dentry, inode); out: return err; From 4e1fc88c6112cb9b691807cc4fe0b6bfa66b1e60 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 17 Jun 2015 18:15:45 +0200 Subject: [PATCH 0071/2091] fs/ufs: restore s_lock mutex_init() commit e4f95517f18271b1da36cfc5d700e46844396d6e upstream. Add last missing line in commit "cdd9eefdf905" ("fs/ufs: restore s_lock mutex") Signed-off-by: Fabian Frederick Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ufs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index afe9955654c87..dc33f94163404 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -801,6 +801,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); mutex_init(&sbi->mutex); + mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* From 5cf9896dc5c72a6c68c36140568b755f697f7760 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Jul 2015 09:50:06 -0700 Subject: [PATCH 0072/2091] Linux 4.1.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1caf4ad3eb8a7..cef84c061f027 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Series 4800 From c2f633b99857d27333de18f53d123a180672c52b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 15:54:49 -0500 Subject: [PATCH 0073/2091] fs: Add helper functions for permanently empty directories. commit fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d upstream. To ensure it is safe to mount proc and sysfs I need to check if filesystems that are mounted on top of them are mounted on truly empty directories. Given that some directories can gain entries over time, knowing that a directory is empty right now is insufficient. Therefore add supporting infrastructure for permantently empty directories that proc and sysfs can use when they create mount points for filesystems and fs_fully_visible can use to test for permanently empty directories to ensure that nothing will be gained by mounting a fresh copy of proc or sysfs. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 2 + 2 files changed, 98 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index cb1fb4b9b6377..02813592e1216 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, return -EINVAL; } EXPORT_SYMBOL(simple_nosetlease); + + +/* + * Operations for a permanently empty directory. + */ +static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = d_inode(dentry); + generic_fillattr(inode, stat); + return 0; +} + +static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) +{ + return -EPERM; +} + +static int empty_dir_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + return -EOPNOTSUPP; +} + +static int empty_dir_removexattr(struct dentry *dentry, const char *name) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) +{ + return -EOPNOTSUPP; +} + +static const struct inode_operations empty_dir_inode_operations = { + .lookup = empty_dir_lookup, + .permission = generic_permission, + .setattr = empty_dir_setattr, + .getattr = empty_dir_getattr, + .setxattr = empty_dir_setxattr, + .getxattr = empty_dir_getxattr, + .removexattr = empty_dir_removexattr, + .listxattr = empty_dir_listxattr, +}; + +static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) +{ + /* An empty directory has two entries . and .. at offsets 0 and 1 */ + return generic_file_llseek_size(file, offset, whence, 2, 2); +} + +static int empty_dir_readdir(struct file *file, struct dir_context *ctx) +{ + dir_emit_dots(file, ctx); + return 0; +} + +static const struct file_operations empty_dir_operations = { + .llseek = empty_dir_llseek, + .read = generic_read_dir, + .iterate = empty_dir_readdir, + .fsync = noop_fsync, +}; + + +void make_empty_dir_inode(struct inode *inode) +{ + set_nlink(inode, 2); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_rdev = 0; + inode->i_size = 2; + inode->i_blkbits = PAGE_SHIFT; + inode->i_blocks = 0; + + inode->i_op = &empty_dir_inode_operations; + inode->i_fop = &empty_dir_operations; +} + +bool is_empty_dir_inode(struct inode *inode) +{ + return (inode->i_fop == &empty_dir_operations) && + (inode->i_op == &empty_dir_inode_operations); +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1a..0d5ae7d5dc53f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2780,6 +2780,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); From bdbdf7ee9d561da7b4b840435c963344061953d6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 22:09:14 -0500 Subject: [PATCH 0074/2091] sysctl: Allow creating permanently empty directories that serve as mountpoints. commit f9bd6733d3f11e24f3949becf277507d422ee1eb upstream. Add a magic sysctl table sysctl_mount_point that when used to create a directory forces that directory to be permanently empty. Update the code to use make_empty_dir_inode when accessing permanently empty directories. Update the code to not allow adding to permanently empty directories. Update /proc/sys/fs/binfmt_misc to be a permanently empty directory. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/sysctl.h | 3 +++ kernel/sysctl.c | 8 +------- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fea2561d773bb..fdda62e6115e1 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* Support for permanently empty directories */ + +struct ctl_table sysctl_mount_point[] = { + { } +}; + +static bool is_empty_dir(struct ctl_table_header *head) +{ + return head->ctl_table[0].child == sysctl_mount_point; +} + +static void set_empty_dir(struct ctl_dir *dir) +{ + dir->header.ctl_table[0].child = sysctl_mount_point; +} + +static void clear_empty_dir(struct ctl_dir *dir) + +{ + dir->header.ctl_table[0].child = NULL; +} + void proc_sys_poll_notify(struct ctl_table_poll *poll) { if (!poll) @@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) struct ctl_table *entry; int err; + /* Is this a permanently empty directory? */ + if (is_empty_dir(&dir->header)) + return -EROFS; + + /* Am I creating a permanently empty directory? */ + if (header->ctl_table == sysctl_mount_point) { + if (!RB_EMPTY_ROOT(&dir->root)) + return -EINVAL; + set_empty_dir(dir); + } + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -202,6 +235,8 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) erase_header(header); put_links(header); fail_links: + if (header->ctl_table == sysctl_mount_point) + clear_empty_dir(dir); header->parent = NULL; drop_sysctl_table(&dir->header); return err; @@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mode |= S_IFDIR; inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; + if (is_empty_dir(head)) + make_empty_dir_inode(inode); } out: return inode; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea56977..fa7bc29925c92 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2082b1a88fb9a..c3eee4c6d6c17 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1531,12 +1531,6 @@ static struct ctl_table vm_table[] = { { } }; -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) -static struct ctl_table binfmt_misc_table[] = { - { } -}; -#endif - static struct ctl_table fs_table[] = { { .procname = "inode-nr", @@ -1690,7 +1684,7 @@ static struct ctl_table fs_table[] = { { .procname = "binfmt_misc", .mode = 0555, - .child = binfmt_misc_table, + .child = sysctl_mount_point, }, #endif { From a2020b02c1ec4fffddb77785773dff533428f814 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 May 2015 16:44:25 -0500 Subject: [PATCH 0075/2091] proc: Allow creating permanently empty directories that serve as mount points commit eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 upstream. Add a new function proc_create_mount_point that when used to creates a directory that can not be added to. Add a new function is_empty_pde to test if a function is a mount point. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Update /proc/openprom and /proc/fs/nfsd to be permanently empty directories. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/proc/generic.c | 23 +++++++++++++++++++++++ fs/proc/inode.c | 4 ++++ fs/proc/internal.h | 6 ++++++ fs/proc/root.c | 4 ++-- 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index df6327a2b8650..e5dee5c3188eb 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, WARN(1, "create '/proc/%s' by hand\n", qstr.name); return NULL; } + if (is_empty_pde(*parent)) { + WARN(1, "attempt to add to permanently empty directory"); + return NULL; + } ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); if (!ent) @@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name, } EXPORT_SYMBOL(proc_mkdir); +struct proc_dir_entry *proc_create_mount_point(const char *name) +{ + umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, name, mode, 2); + if (ent) { + ent->data = NULL; + ent->proc_fops = NULL; + ent->proc_iops = NULL; + if (proc_register(parent, ent) < 0) { + kfree(ent); + parent->nlink--; + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8272aaba1bb06..e3eb5524639fb 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; + } if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c835b94c0cd3a..aa2781095bd15 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) } extern void pde_put(struct proc_dir_entry *); +static inline bool is_empty_pde(const struct proc_dir_entry *pde) +{ + return S_ISDIR(pde->mode) && !pde->proc_iops; +} +struct proc_dir_entry *proc_create_mount_point(const char *name); + /* * inode.c */ diff --git a/fs/proc/root.c b/fs/proc/root.c index b7fa4bfe896a2..3d987dfdef830 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -182,10 +182,10 @@ void __init proc_root_init(void) #endif proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); - proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ + proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ - proc_mkdir("openprom", NULL); + proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); From 80c298105bf80e33ae494eaeba51f00352b5373c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:09:29 -0500 Subject: [PATCH 0076/2091] kernfs: Add support for always empty directories. commit ea015218f2f7ace2dad9cedd21ed95bdba2886d7 upstream. Add a new function kernfs_create_empty_dir that can be used to create directory that can not be modified. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 38 +++++++++++++++++++++++++++++++++++++- fs/kernfs/inode.c | 2 ++ include/linux/kernfs.h | 3 +++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fffca9517321c..2d48d28e16401 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; + if (parent->flags & KERNFS_EMPTY_DIR) + goto out_unlock; + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) goto out_unlock; @@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, return ERR_PTR(rc); } +/** + * kernfs_create_empty_dir - create an always empty directory + * @parent: parent in which to create a new directory + * @name: name of the new directory + * + * Returns the created node on success, ERR_PTR() value on failure. + */ +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name) +{ + struct kernfs_node *kn; + int rc; + + /* allocate */ + kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR); + if (!kn) + return ERR_PTR(-ENOMEM); + + kn->flags |= KERNFS_EMPTY_DIR; + kn->dir.root = parent->dir.root; + kn->ns = NULL; + kn->priv = NULL; + + /* link in */ + rc = kernfs_add_one(kn); + if (!rc) + return kn; + + kernfs_put(kn); + return ERR_PTR(rc); +} + static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&kernfs_mutex); error = -ENOENT; - if (!kernfs_active(kn) || !kernfs_active(new_parent)) + if (!kernfs_active(kn) || !kernfs_active(new_parent) || + (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; error = 0; diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 2da8493a380b8..756dd56aaf60a 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) case KERNFS_DIR: inode->i_op = &kernfs_dir_iops; inode->i_fop = &kernfs_dir_fops; + if (kn->flags & KERNFS_EMPTY_DIR) + make_empty_dir_inode(inode); break; case KERNFS_FILE: inode->i_size = kn->attr.size; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b8..29d1896c3ba55 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, From 9924f6e89823a41bfd272ab759636276b9f9ee9c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:31:40 -0500 Subject: [PATCH 0077/2091] sysfs: Add support for permanently empty directories to serve as mount points. commit 87d2846fcf88113fae2341da1ca9a71f0d916f2c upstream. Add two functions sysfs_create_mount_point and sysfs_remove_mount_point that hang a permanently empty directory off of a kobject or remove a permanently emptpy directory hanging from a kobject. Export these new functions so modular filesystems can use them. Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 15 +++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0b45ff42f3741..94374e4350259 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); } + +/** + * sysfs_create_mount_point - create an always empty directory + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to add + */ +int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *kn, *parent = parent_kobj->sd; + + kn = kernfs_create_empty_dir(parent, name); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(parent, name); + return PTR_ERR(kn); + } + + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_create_mount_point); + +/** + * sysfs_remove_mount_point - remove an always empty directory. + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to remove + * + */ +void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *parent = parent_kobj->sd; + + kernfs_remove_by_name_ns(parent, name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_mount_point); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17eb..9f65758311a4e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) From 28dd1f346b2f0fc2ab8285046ed0bd91e9b808d3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 17:35:41 -0500 Subject: [PATCH 0078/2091] sysfs: Create mountpoints with sysfs_create_mount_point commit f9bb48825a6b5d02f4cabcc78967c75db903dcdc upstream. This allows for better documentation in the code and it allows for a simpler and fully correct version of fs_fully_visible to be written. The mount points converted and their filesystems are: /sys/hypervisor/s390/ s390_hypfs /sys/kernel/config/ configfs /sys/kernel/debug/ debugfs /sys/firmware/efi/efivars/ efivarfs /sys/fs/fuse/connections/ fusectl /sys/fs/pstore/ pstore /sys/kernel/tracing/ tracefs /sys/fs/cgroup/ cgroup /sys/kernel/security/ securityfs /sys/fs/selinux/ selinuxfs /sys/fs/smackfs/ smackfs Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/s390/hypfs/inode.c | 12 ++++-------- drivers/firmware/efi/efi.c | 6 ++---- fs/configfs/mount.c | 10 ++++------ fs/debugfs/inode.c | 11 ++++------- fs/fuse/inode.c | 9 +++------ fs/pstore/inode.c | 12 ++++-------- fs/tracefs/inode.c | 6 ++---- kernel/cgroup.c | 10 ++++------ security/inode.c | 10 ++++------ security/selinux/selinuxfs.c | 11 +++++------ security/smack/smackfs.c | 8 ++++---- 11 files changed, 40 insertions(+), 65 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b981..2eeb0a0f506d5 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,8 +456,6 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static struct kobject *s390_kobj; - static int __init hypfs_init(void) { int rc; @@ -481,18 +479,16 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_sprp_exit; } - s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); - if (!s390_kobj) { - rc = -ENOMEM; + rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); + if (rc) goto fail_hypfs_diag0c_exit; - } rc = register_filesystem(&hypfs_type); if (rc) goto fail_filesystem; return 0; fail_filesystem: - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); fail_hypfs_diag0c_exit: hypfs_diag0c_exit(); fail_hypfs_sprp_exit: @@ -510,7 +506,7 @@ static int __init hypfs_init(void) static void __exit hypfs_exit(void) { unregister_filesystem(&hypfs_type); - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); hypfs_diag0c_exit(); hypfs_sprp_exit(); hypfs_vm_exit(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3061bb8629dc3..e14363d126906 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -65,7 +65,6 @@ static int __init parse_efi_cmdline(char *str) early_param("efi", parse_efi_cmdline); static struct kobject *efi_kobj; -static struct kobject *efivars_kobj; /* * Let's not leave out systab information that snuck into @@ -212,10 +211,9 @@ static int __init efisubsys_init(void) goto err_remove_group; /* and the standard mountpoint for efivarfs */ - efivars_kobj = kobject_create_and_add("efivars", efi_kobj); - if (!efivars_kobj) { + error = sysfs_create_mount_point(efi_kobj, "efivars"); + if (error) { pr_err("efivars: Subsystem registration failed.\n"); - error = -ENOMEM; goto err_remove_group; } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 5373567420912..a8f3b589a2dfe 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -129,8 +129,6 @@ void configfs_release_fs(void) } -static struct kobject *config_kobj; - static int __init configfs_init(void) { int err = -ENOMEM; @@ -141,8 +139,8 @@ static int __init configfs_init(void) if (!configfs_dir_cachep) goto out; - config_kobj = kobject_create_and_add("config", kernel_kobj); - if (!config_kobj) + err = sysfs_create_mount_point(kernel_kobj, "config"); + if (err) goto out2; err = register_filesystem(&configfs_fs_type); @@ -152,7 +150,7 @@ static int __init configfs_init(void) return 0; out3: pr_err("Unable to register filesystem!\n"); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); out2: kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; @@ -163,7 +161,7 @@ static int __init configfs_init(void) static void __exit configfs_exit(void) { unregister_filesystem(&configfs_fs_type); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c1e7ffb0dab65..12756040ca201 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -716,20 +716,17 @@ bool debugfs_initialized(void) } EXPORT_SYMBOL_GPL(debugfs_initialized); - -static struct kobject *debug_kobj; - static int __init debugfs_init(void) { int retval; - debug_kobj = kobject_create_and_add("debug", kernel_kobj); - if (!debug_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "debug"); + if (retval) + return retval; retval = register_filesystem(&debug_fs_type); if (retval) - kobject_put(debug_kobj); + sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f397..18dacf9ed8ff3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void) } static struct kobject *fuse_kobj; -static struct kobject *connections_kobj; static int fuse_sysfs_init(void) { @@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void) goto out_err; } - connections_kobj = kobject_create_and_add("connections", fuse_kobj); - if (!connections_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fuse_kobj, "connections"); + if (err) goto out_fuse_unregister; - } return 0; @@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void) static void fuse_sysfs_cleanup(void) { - kobject_put(connections_kobj); + sysfs_remove_mount_point(fuse_kobj, "connections"); kobject_put(fuse_kobj); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index dc43b5f29305e..3adcc4669faca 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = { .kill_sb = pstore_kill_sb, }; -static struct kobject *pstore_kobj; - static int __init init_pstore_fs(void) { - int err = 0; + int err; /* Create a convenient mount point for people to access pstore */ - pstore_kobj = kobject_create_and_add("pstore", fs_kobj); - if (!pstore_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "pstore"); + if (err) goto out; - } err = register_filesystem(&pstore_fs_type); if (err < 0) - kobject_put(pstore_kobj); + sysfs_remove_mount_point(fs_kobj, "pstore"); out: return err; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a7..a43df11a163f1 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -631,14 +631,12 @@ bool tracefs_initialized(void) return tracefs_registered; } -static struct kobject *trace_kobj; - static int __init tracefs_init(void) { int retval; - trace_kobj = kobject_create_and_add("tracing", kernel_kobj); - if (!trace_kobj) + retval = sysfs_create_mount_point(kernel_kobj, "tracing"); + if (retval) return -EINVAL; retval = register_filesystem(&trace_fs_type); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 469dd547770ca..e8a5491be756a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1924,8 +1924,6 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; -static struct kobject *cgroup_kobj; - /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -5044,13 +5042,13 @@ int __init cgroup_init(void) ss->bind(init_css_set.subsys[ssid]); } - cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); - if (!cgroup_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "cgroup"); + if (err) + return err; err = register_filesystem(&cgroup_fs_type); if (err < 0) { - kobject_put(cgroup_kobj); + sysfs_remove_mount_point(fs_kobj, "cgroup"); return err; } diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8f..0e37e4fba8fac 100644 --- a/security/inode.c +++ b/security/inode.c @@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(securityfs_remove); -static struct kobject *security_kobj; - static int __init securityfs_init(void) { int retval; - security_kobj = kobject_create_and_add("security", kernel_kobj); - if (!security_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "security"); + if (retval) + return retval; retval = register_filesystem(&fs_type); if (retval) - kobject_put(security_kobj); + sysfs_remove_mount_point(kernel_kobj, "security"); return retval; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index d2787cca1fcb9..3d22014130289 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = { }; struct vfsmount *selinuxfs_mount; -static struct kobject *selinuxfs_kobj; static int __init init_sel_fs(void) { @@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void) if (!selinux_enabled) return 0; - selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj); - if (!selinuxfs_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "selinux"); + if (err) + return err; err = register_filesystem(&sel_fs_type); if (err) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); return err; } @@ -1887,7 +1886,7 @@ __initcall(init_sel_fs); #ifdef CONFIG_SECURITY_SELINUX_DISABLE void exit_sel_fs(void) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); kern_unmount(selinuxfs_mount); unregister_filesystem(&sel_fs_type); } diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index d9682985349e6..ac4cac7c661a3 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2241,16 +2241,16 @@ static const struct file_operations smk_revoke_subj_ops = { .llseek = generic_file_llseek, }; -static struct kset *smackfs_kset; /** * smk_init_sysfs - initialize /sys/fs/smackfs * */ static int smk_init_sysfs(void) { - smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); - if (!smackfs_kset) - return -ENOMEM; + int err; + err = sysfs_create_mount_point(fs_kobj, "smackfs"); + if (err) + return err; return 0; } From 8e7c56b6f14d1388d5aa2feb5b28dd6360199cef Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 20:51:09 -0500 Subject: [PATCH 0079/2091] mnt: Update fs_fully_visible to test for permanently empty directories commit 7236c85e1be51a9e25ba0f6e087a66ca89605a49 upstream. fs_fully_visible attempts to make fresh mounts of proc and sysfs give the mounter no more access to proc and sysfs than if they could have by creating a bind mount. One aspect of proc and sysfs that makes this particularly tricky is that there are other filesystems that typically mount on top of proc and sysfs. As those filesystems are mounted on empty directories in practice it is safe to ignore them. However testing to ensure filesystems are mounted on empty directories has not been something the in kernel data structures have supported so the current test for an empty directory which checks to see if nlink <= 2 is a bit lacking. proc and sysfs have recently been modified to use the new empty_dir infrastructure to create all of their dedicated mount points. Instead of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a directory is empty, test for is_empty_dir_inode(inode). That small change guaranteess mounts found on proc and sysfs really are safe to ignore, because the directories are not only empty but nothing can ever be added to them. This guarantees there is nothing to worry about when mounting proc and sysfs. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1d4a97c573e03..c40b48cd76472 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3194,9 +3194,8 @@ bool fs_fully_visible(struct file_system_type *type) /* Only worry about locked mounts */ if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) continue; - if (!S_ISDIR(inode->i_mode)) - goto next; - if (inode->i_nlink > 2) + /* Is the directory permanetly empty? */ + if (!is_empty_dir_inode(inode)) goto next; } visible = true; From b5eb51f2ee063044401492650e9e01bb35974870 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:22:29 -0500 Subject: [PATCH 0080/2091] mnt: Refactor the logic for mounting sysfs and proc in a user namespace commit 1b852bceb0d111e510d1a15826ecc4a19358d512 upstream. Fresh mounts of proc and sysfs are a very special case that works very much like a bind mount. Unfortunately the current structure can not preserve the MNT_LOCK... mount flags. Therefore refactor the logic into a form that can be modified to preserve those lock bits. Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount of the filesystem be fully visible in the current mount namespace, before the filesystem may be mounted. Move the logic for calling fs_fully_visible from proc and sysfs into fs/namespace.c where it has greater access to mount namespace state. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 8 +++++++- fs/proc/root.c | 5 +---- fs/sysfs/mount.c | 5 +---- include/linux/fs.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index c40b48cd76472..50a0d950404c7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,6 +2332,8 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; } +static bool fs_fully_visible(struct file_system_type *fs_type); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -2363,6 +2365,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } + if (type->fs_flags & FS_USERNS_VISIBLE) { + if (!fs_fully_visible(type)) + return -EPERM; + } } mnt = vfs_kern_mount(type, flags, name, data); @@ -3164,7 +3170,7 @@ bool current_chrooted(void) return chrooted; } -bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mount *mnt; diff --git a/fs/proc/root.c b/fs/proc/root.c index 3d987dfdef830..68feb0f70e635 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - /* Does the mounter have privilege over the pid namespace? */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; void __init proc_root_init(void) diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8a49486bf30c9..1c6ac6fcee9fb 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, bool new_sb; if (!(flags & MS_KERNMOUNT)) { - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } @@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0d5ae7d5dc53f..571aab91bfc03 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1897,6 +1897,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1984,7 +1985,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); From 51c2c47ef6349d49e49002054f8c0d11d3b5646e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:49:47 -0500 Subject: [PATCH 0081/2091] mnt: Modify fs_fully_visible to deal with locked ro nodev and atime commit 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 upstream. Ignore an existing mount if the locked readonly, nodev or atime attributes are less permissive than the desired attributes of the new mount. On success ensure the new mount locks all of the same readonly, nodev and atime attributes as the old mount. The nosuid and noexec attributes are not checked here as this change is destined for stable and enforcing those attributes causes a regression in lxc and libvirt-lxc where those applications will not start and there are no known executables on sysfs or proc and no known way to create exectuables without code modifications Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 50a0d950404c7..02c6875dd9457 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2332,7 +2332,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; } -static bool fs_fully_visible(struct file_system_type *fs_type); +static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags); /* * create a new mount for userspace and request it to be added into the @@ -2366,7 +2366,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type)) + if (!fs_fully_visible(type, &mnt_flags)) return -EPERM; } } @@ -3170,9 +3170,10 @@ bool current_chrooted(void) return chrooted; } -static bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; @@ -3191,6 +3192,19 @@ static bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; + /* Verify the mount flags are equal to or more permissive + * than the proposed new mount. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(new_flags & MNT_READONLY)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(new_flags & MNT_NODEV)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) + continue; + /* This mount is not fully visible if there are any * locked child mounts that cover anything except for * empty directories. @@ -3204,6 +3218,10 @@ static bool fs_fully_visible(struct file_system_type *type) if (!is_empty_dir_inode(inode)) goto next; } + /* Preserve the locked attributes */ + *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \ + MNT_LOCK_NODEV | \ + MNT_LOCK_ATIME); visible = true; goto found; next: ; From 03c29ef2e86669e356493153d80b5c4ed0fc2f84 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 28 May 2015 10:58:49 +0800 Subject: [PATCH 0082/2091] gpio: crystalcove: set IRQCHIP_SKIP_SET_WAKE for the irqchip commit 61e749d7e1627d375156553ea0ae83c4f6bb5a9b upstream. The CrystalCove GPIO irqchip doesn't have irq_set_wake callback defined so we should set IRQCHIP_SKIP_SET_WAKE for it or it would cause an irq desc's wake_depth unbalanced warning during system resume phase from the gpio_keys driver, which is the driver for the power button of the ASUS T100 laptop. Signed-off-by: Aaron Lu Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-crystalcove.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 91a7ffe831350..ab457fc00e755 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -255,6 +255,7 @@ static struct irq_chip crystalcove_irqchip = { .irq_set_type = crystalcove_irq_type, .irq_bus_lock = crystalcove_bus_lock, .irq_bus_sync_unlock = crystalcove_bus_sync_unlock, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data) From 9b553d64d949da26b63759e4ae9af863676bad37 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 May 2015 13:21:37 +0200 Subject: [PATCH 0083/2091] gpio: rcar: Check for irq_set_irq_wake() failures commit 501ef0f95a57e7c32138733c468394a52244c85b upstream. If an interrupt controller doesn't support wake-up configuration, irq_set_irq_wake() returns an error code. Then any subsequent call trying to deconfigure wake-up will cause an imbalance, and a warning will be printed: WARNING: CPU: 1 PID: 1341 at kernel/irq/manage.c:540 irq_set_irq_wake+0x9c/0xf8() Unbalanced IRQ 26 wake disable To fix this, refrain from any further parent interrupt controller (de)configuration if irq_set_irq_wake() failed. Alternative fixes would be: - calling "gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE)" from the platform code, - setting "gic_chip.flags = IRQCHIP_SKIP_SET_WAKE" in the GIC driver code, but these were withheld as the GIC hardware doesn't really support wake-up interrupts. Fixes: ab82fa7da4dce5c7 ("gpio: rcar: Prevent module clock disable when wake-up is enabled") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-rcar.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index fd39774659484..1e14a6c74ed13 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -177,8 +177,17 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on) struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct gpio_rcar_priv *p = container_of(gc, struct gpio_rcar_priv, gpio_chip); - - irq_set_irq_wake(p->irq_parent, on); + int error; + + if (p->irq_parent) { + error = irq_set_irq_wake(p->irq_parent, on); + if (error) { + dev_dbg(&p->pdev->dev, + "irq %u doesn't support irq_set_wake\n", + p->irq_parent); + p->irq_parent = 0; + } + } if (!p->clk) return 0; From a6556a506ea1c737e8adafd59015786448ffbf3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 May 2015 11:13:05 -0700 Subject: [PATCH 0084/2091] rcu: Correctly handle non-empty Tiny RCU callback list with none ready commit 6e91f8cb138625be96070b778d9ba71ce520ea7e upstream. If, at the time __rcu_process_callbacks() is invoked, there are callbacks in Tiny RCU's callback list, but none of them are ready to be invoked, the current list-management code will knit the non-ready callbacks out of the list. This can result in hangs and possibly worse. This commit therefore inserts a check for there being no callbacks that can be invoked immediately. This bug is unlikely to occur -- you have to get a new callback between the time rcu_sched_qs() or rcu_bh_qs() was called, but before we get to __rcu_process_callbacks(). It was detected by the addition of RCU-bh testing to rcutorture, which in turn was instigated by Iftekhar Ahmed's mutation testing. Although this bug was made much more likely by 915e8a4fe45e (rcu: Remove fastpath from __rcu_process_callbacks()), this did not cause the bug, but rather made it much more probable. That said, it takes more than 40 hours of rcutorture testing, on average, for this bug to appear, so this fix cannot be considered an emergency. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tiny.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 069742d61c688..ec3086879cb51 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -170,6 +170,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + if (rcp->donetail == &rcp->rcucblist) { + /* No callbacks ready, so just leave. */ + local_irq_restore(flags); + return; + } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; From 77d10175396e764488886923eeaec58c3ff1fb7b Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 13 May 2015 08:50:27 -0500 Subject: [PATCH 0085/2091] ipr: Increase default adapter init stage change timeout commit 45c44b5ff9caa743ed9c2bfd44307c536c9caf1e upstream. Increase the default init stage change timeout from 15 seconds to 30 seconds. This resolves issues we have seen with some adapters not transitioning to the first init stage within 15 seconds, which results in adapter initialization failures. Signed-off-by: Brian King Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 47412cf4eaac5..73790a1d09690 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -272,7 +272,7 @@ #define IPR_RUNTIME_RESET 0x40000000 #define IPR_IPL_INIT_MIN_STAGE_TIME 5 -#define IPR_IPL_INIT_DEFAULT_STAGE_TIME 15 +#define IPR_IPL_INIT_DEFAULT_STAGE_TIME 30 #define IPR_IPL_INIT_STAGE_UNKNOWN 0x0 #define IPR_IPL_INIT_STAGE_TRANSOP 0xB0000000 #define IPR_IPL_INIT_STAGE_MASK 0xff000000 From 37100f76f93236e99a89c0f0c5531eff03920725 Mon Sep 17 00:00:00 2001 From: Ryan Underwood Date: Sun, 25 Jan 2015 16:07:09 -0800 Subject: [PATCH 0086/2091] Disable write buffering on Toshiba ToPIC95 commit 2fb22a8042fe96b4220843f79241c116d90922c4 upstream. Disable write buffering on the Toshiba ToPIC95 if it is enabled by somebody (it is not supposed to be a power-on default according to the datasheet). On the ToPIC95, practically no 32-bit Cardbus card will work under heavy load without locking up the whole system if this is left enabled. I tried about a dozen. It does not affect 16-bit cards. This is similar to the O2 bugs in early controller revisions it seems. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=55961 Signed-off-by: Ryan C. Underwood Signed-off-by: Dominik Brodowski Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/topic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pcmcia/topic.h b/drivers/pcmcia/topic.h index 615a45a8fe867..582688fe75054 100644 --- a/drivers/pcmcia/topic.h +++ b/drivers/pcmcia/topic.h @@ -104,6 +104,9 @@ #define TOPIC_EXCA_IF_CONTROL 0x3e /* 8 bit */ #define TOPIC_EXCA_IFC_33V_ENA 0x01 +#define TOPIC_PCI_CFG_PPBCN 0x3e /* 16-bit */ +#define TOPIC_PCI_CFG_PPBCN_WBEN 0x0400 + static void topic97_zoom_video(struct pcmcia_socket *sock, int onoff) { struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket); @@ -138,6 +141,7 @@ static int topic97_override(struct yenta_socket *socket) static int topic95_override(struct yenta_socket *socket) { u8 fctrl; + u16 ppbcn; /* enable 3.3V support for 16bit cards */ fctrl = exca_readb(socket, TOPIC_EXCA_IF_CONTROL); @@ -146,6 +150,18 @@ static int topic95_override(struct yenta_socket *socket) /* tell yenta to use exca registers to power 16bit cards */ socket->flags |= YENTA_16BIT_POWER_EXCA | YENTA_16BIT_POWER_DF; + /* Disable write buffers to prevent lockups under load with numerous + Cardbus cards, observed on Tecra 500CDT and reported elsewhere on the + net. This is not a power-on default according to the datasheet + but some BIOSes seem to set it. */ + if (pci_read_config_word(socket->dev, TOPIC_PCI_CFG_PPBCN, &ppbcn) == 0 + && socket->dev->revision <= 7 + && (ppbcn & TOPIC_PCI_CFG_PPBCN_WBEN)) { + ppbcn &= ~TOPIC_PCI_CFG_PPBCN_WBEN; + pci_write_config_word(socket->dev, TOPIC_PCI_CFG_PPBCN, ppbcn); + dev_info(&socket->dev->dev, "Disabled ToPIC95 Cardbus write buffers.\n"); + } + return 0; } From 08e394684b82b0987295d96201071f52504c83f5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Jun 2015 11:56:22 +0200 Subject: [PATCH 0087/2091] ALSA: pcm: Fix pcm_class sysfs output commit 60b93030b44a8c2cd015cebe5624fd7552ec67ec upstream. The pcm_class sysfs of each PCM substream gives only "none" since the recent code change to embed the struct device. Fix the code to point directly to the embedded device object properly. Fixes: ef46c7af93f9 ('ALSA: pcm: Embed struct device') Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index b25bcf5b86446..dfed728d8c872 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -1027,7 +1027,8 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) static ssize_t show_pcm_class(struct device *dev, struct device_attribute *attr, char *buf) { - struct snd_pcm *pcm; + struct snd_pcm_str *pstr = container_of(dev, struct snd_pcm_str, dev); + struct snd_pcm *pcm = pstr->pcm; const char *str; static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = { [SNDRV_PCM_CLASS_GENERIC] = "generic", @@ -1036,8 +1037,7 @@ static ssize_t show_pcm_class(struct device *dev, [SNDRV_PCM_CLASS_DIGITIZER] = "digitizer", }; - if (! (pcm = dev_get_drvdata(dev)) || - pcm->dev_class > SNDRV_PCM_CLASS_LAST) + if (pcm->dev_class > SNDRV_PCM_CLASS_LAST) str = "none"; else str = strs[pcm->dev_class]; From 704ffc4cf721867e35cb922b12cae3210d6f7e67 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 24 Jun 2015 10:46:33 +0200 Subject: [PATCH 0088/2091] ALSA: hda - Fix Dock Headphone on Thinkpad X250 seen as a Line Out commit ec56af67a10a0d82b79027878a81fce08d002d50 upstream. Thinkpad X250, when attached to a dock, has two headphone outs but no line out. Make sure we don't try to turn this into one headphone and one line out (since that disables the headphone amp on the dock). Alsa-info at http://www.alsa-project.org/db/?f=36f8764e1d782397928feec715d0ef90dfddd4c1 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6d010452c1f5c..9b0f1ed02cc95 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4505,6 +4505,7 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, + ALC292_FIXUP_TPT440_DOCK2, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -4953,6 +4954,12 @@ static const struct hda_fixup alc269_fixups[] = { .chain_id = ALC269_FIXUP_HEADSET_MODE }, [ALC292_FIXUP_TPT440_DOCK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + .chained = true, + .chain_id = ALC292_FIXUP_TPT440_DOCK2 + }, + [ALC292_FIXUP_TPT440_DOCK2] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { { 0x16, 0x21211010 }, /* dock headphone */ From 167bdde510b17d86767b71282c91036ab671340f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Jun 2015 14:37:18 -0400 Subject: [PATCH 0089/2091] ALSA: hda - set proper caps for newer AMD hda audio in KB/KV commit 650474fb737c3e0ea0f6ab8e43c2cd161080ce5c upstream. Fixes audio problems on newer asics. Noticed by: Kelly Anderson Signed-off-by: Alex Deucher Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b6db25b23dd31..c403dd10d126c 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2054,6 +2054,8 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* ATI HDMI */ + { PCI_DEVICE(0x1002, 0x1308), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x793b), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x7919), @@ -2062,6 +2064,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x970f), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, + { PCI_DEVICE(0x1002, 0x9840), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaa00), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0xaa08), From c69c5674d87690f87e48a4267bc68e62fc605d9c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Jun 2015 08:48:54 +0200 Subject: [PATCH 0090/2091] ALSA: hda - Disable widget power-save for VIA codecs commit 735c75cf4d434862e38c01dcfb2ce8d2fcb9035f upstream. The widget power-save that was enabled in 4.1 kernel seems resulting in the silent output on VIA codecs by some reason. Some widgets get wrong power states. As a quick fix, turn this flag off while keeping power_down_unused flag. This will bring back to the state of 4.0.x. Fixes: 688b12cc3ca8 ('ALSA: hda - Use the new power control for VIA codecs') Reported-and-tested-by: Harald Dunkel Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_via.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index bab6c04932aa0..0baeecc2213c5 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -238,7 +238,9 @@ static int via_pin_power_ctl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = codec->power_save_node; + struct via_spec *spec = codec->spec; + + ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused; return 0; } @@ -249,9 +251,9 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol, struct via_spec *spec = codec->spec; bool val = !!ucontrol->value.enumerated.item[0]; - if (val == codec->power_save_node) + if (val == spec->gen.power_down_unused) return 0; - codec->power_save_node = val; + /* codec->power_save_node = val; */ /* widget PM seems yet broken */ spec->gen.power_down_unused = val; analog_low_current_mode(codec); return 1; From d097fff2c1fbeef22c20f0d6a9a9ed236baabba7 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 26 Jun 2015 12:35:17 +0800 Subject: [PATCH 0091/2091] ALSA: hda - restore the MIC FIXUP for some Dell machines commit 831bfdf9520e389357cfeee42a6174a73ce7bdb7 upstream. Those FIXUPs were applied to the machines through pin quirks, but recently the PCI_QUIRK makes them can't apply to the machines. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=99851 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9b0f1ed02cc95..5e5e40a66e928 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4516,6 +4516,8 @@ enum { ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13_GPIO6, + ALC288_FIXUP_DELL_XPS_13, + ALC288_FIXUP_DISABLE_AAMIX, ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, }; @@ -5046,9 +5048,23 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC288_FIXUP_DISABLE_AAMIX] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6 + }, + [ALC288_FIXUP_DELL_XPS_13] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_dell_xps13, + .chained = true, + .chain_id = ALC288_FIXUP_DISABLE_AAMIX + }, [ALC292_FIXUP_DISABLE_AAMIX] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE }, [ALC292_FIXUP_DELL_E7X] = { .type = HDA_FIXUP_FUNC, @@ -5079,7 +5095,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X), + SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From 4b461d112cdbca4183f4b2f0d67a081a650269ab Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 27 Jun 2015 10:21:13 +0200 Subject: [PATCH 0092/2091] ALSA: hda - Add headset support to Acer Aspire V5 commit 7819717b11346b8a5420b223b46600e394049c66 upstream. Acer Aspire V5 with ALC282 codec needs the similar quirk like Dell laptops to support the headset mic. The headset mic pin is 0x19 and it's not exposed by BIOS, thus we need to fix the pincfg as well. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96201 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5e5e40a66e928..fc67aeddd80ed 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4478,6 +4478,7 @@ enum { ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, + ALC269_FIXUP_ASPIRE_HEADSET_MIC, ALC269_FIXUP_ASUS_X101_FUNC, ALC269_FIXUP_ASUS_X101_VERB, ALC269_FIXUP_ASUS_X101, @@ -4754,6 +4755,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_no_hp_mic, }, + [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5079,6 +5089,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700), + SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), From f367c3b94b1a20a27eb51a2646d7409a1da2d761 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Jun 2015 08:38:02 +0200 Subject: [PATCH 0093/2091] ALSA: hda - Fix the dock headphone output on Fujitsu Lifebook E780 commit 4df3fd1700abbb53bd874143dfd1f9ac9e7cbf4b upstream. Fujitsu Lifebook E780 sets the sequence number 0x0f to only only of the two headphones, thus the driver tries to assign another as the line-out, and this results in the inconsistent mapping between the created jack ctl and the actual I/O. Due to this, PulseAudio doesn't handle it properly and gets the silent output. The fix is to ignore the non-HP sequencer checks. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=99681 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc67aeddd80ed..944a87b395b88 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4458,6 +4458,7 @@ enum { ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_LIFEBOOK_HP_PIN, + ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4627,6 +4628,10 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5191,6 +5196,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), From 13e888e7678c544cf4460eb997f6b9f660193563 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Jun 2015 10:56:53 +0200 Subject: [PATCH 0094/2091] ALSA: hda - Add a fixup for Dell E7450 commit 4275554dccdf0afac07b2b638ba7456095629558 upstream. Dell E7450 [0128:062e] needs the same quirk as other E7xx models. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100571 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 944a87b395b88..0e75998db39fc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5109,6 +5109,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), + SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From 9e6004867b1a9e4ededb8d56edb827de67073291 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2015 01:33:36 +0200 Subject: [PATCH 0095/2091] ACPI / init: Switch over platform to the ACPI mode later commit b064a8fa77dfead647564c46ac8fc5b13bd1ab73 upstream. Commit 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()" moved the ACPI subsystem initialization, including the ACPI mode enabling, to an earlier point in the initialization sequence, to allow the timekeeping subsystem use ACPI early. Unfortunately, that resulted in boot regressions on some systems and the early ACPI initialization was moved toward its original position in the kernel initialization code by commit c4e1acbb35e4 "ACPI / init: Invoke early ACPI initialization later". However, that turns out to be insufficient, as boot is still broken on the Tyan S8812 mainboard. To fix that issue, split the ACPI early initialization code into two pieces so the majority of it still located in acpi_early_init() and the part switching over the platform into the ACPI mode goes into a new function, acpi_subsystem_init(), executed at the original early ACPI initialization spot. That fixes the Tyan S8812 boot problem, but still allows ACPI tables to be loaded earlier which is useful to the EFI code in efi_enter_virtual_mode(). Link: https://bugzilla.kernel.org/show_bug.cgi?id=97141 Fixes: 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()" Reported-and-tested-by: Marius Tolzmann Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Reviewed-by: Hanjun Guo Reviewed-by: Lee, Chun-Yi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/bus.c | 56 ++++++++++++++++++++++++++++++++------------ include/linux/acpi.h | 2 ++ init/main.c | 1 + 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index c412fdb28d344..513e7230e3d04 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -470,6 +470,16 @@ static int __init acpi_bus_init_irq(void) return 0; } +/** + * acpi_early_init - Initialize ACPICA and populate the ACPI namespace. + * + * The ACPI tables are accessible after this, but the handling of events has not + * been initialized and the global lock is not available yet, so AML should not + * be executed at this point. + * + * Doing this before switching the EFI runtime services to virtual mode allows + * the EfiBootServices memory to be freed slightly earlier on boot. + */ void __init acpi_early_init(void) { acpi_status status; @@ -533,26 +543,42 @@ void __init acpi_early_init(void) acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi; } #endif + return; + + error0: + disable_acpi(); +} + +/** + * acpi_subsystem_init - Finalize the early initialization of ACPI. + * + * Switch over the platform to the ACPI mode (if possible), initialize the + * handling of ACPI events, install the interrupt and global lock handlers. + * + * Doing this too early is generally unsafe, but at the same time it needs to be + * done before all things that really depend on ACPI. The right spot appears to + * be before finalizing the EFI initialization. + */ +void __init acpi_subsystem_init(void) +{ + acpi_status status; + + if (acpi_disabled) + return; status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX "Unable to enable ACPI\n"); - goto error0; + disable_acpi(); + } else { + /* + * If the system is using ACPI then we can be reasonably + * confident that any regulators are managed by the firmware + * so tell the regulator core it has everything it needs to + * know. + */ + regulator_has_full_constraints(); } - - /* - * If the system is using ACPI then we can be reasonably - * confident that any regulators are managed by the firmware - * so tell the regulator core it has everything it needs to - * know. - */ - regulator_has_full_constraints(); - - return; - - error0: - disable_acpi(); - return; } static int __init acpi_bus_init(void) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29cd..4550be3bb63b5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -440,6 +440,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 extern void acpi_early_init(void); +extern void acpi_subsystem_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); @@ -494,6 +495,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) } static inline void acpi_early_init(void) { } +static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { diff --git a/init/main.c b/init/main.c index 2115055faeac9..2a89545e0a5d6 100644 --- a/init/main.c +++ b/init/main.c @@ -664,6 +664,7 @@ asmlinkage __visible void __init start_kernel(void) check_bugs(); + acpi_subsystem_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { From 5ab4a6010600c24c37f5178d6da981e18cbb090a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2015 01:32:38 +0200 Subject: [PATCH 0096/2091] ACPI / PM: Add missing pm_generic_complete() invocation commit 3d56402d3fa8d10749eeb36293dd1992bd5ad0c3 upstream. Add missing invocation of pm_generic_complete() to acpi_subsys_complete() to allow ->complete callbacks provided by the drivers of devices using the ACPI PM domain to be executed during system resume. Fixes: f25c0ae2b4c4 (ACPI / PM: Avoid resuming devices in ACPI PM domain during system suspend) Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 735db11a9b001..8217e0bda60f6 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -953,6 +953,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_prepare); */ void acpi_subsys_complete(struct device *dev) { + pm_generic_complete(dev); /* * If the device had been runtime-suspended before the system went into * the sleep state it is going out of and it has never been resumed till From c75c95bb4bb656d1d2914caf17d71317941100f2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Jun 2015 18:32:02 +0200 Subject: [PATCH 0097/2091] ACPI / PNP: Avoid conflicting resource reservations commit 0f1b414d190724617eb1cdd615592fa8cd9d0b50 upstream. Commit b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" overlooked the fact that the memory and/or I/O regions reserved by acpi_reserve_resources() may conflict with those reserved by the PNP "system" driver. If that conflict actually takes place, it causes the reservations made by the "system" driver to fail while before commit b9a5e5e18fbf all reservations made by it and by acpi_reserve_resources() would be successful. In turn, that allows the resources that haven't been reserved by the "system" driver to be used by others (e.g. PCI) which sometimes leads to functional problems (up to and including boot failures). To fix that issue, introduce a common resource reservation routine, acpi_reserve_region(), to be used by both acpi_reserve_resources() and the "system" driver, that will track all resources reserved by it and avoid making conflicting requests. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 6 +- drivers/acpi/resource.c | 160 ++++++++++++++++++++++++++++++++++++++++ drivers/pnp/system.c | 35 ++++++--- include/linux/acpi.h | 10 +++ 4 files changed, 197 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7ccba395c9ddb..5226a8b921ae1 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,11 +175,7 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - /* Resources are never freed */ - if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) - request_region(addr, length, desc); - else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - request_mem_region(addr, length, desc); + acpi_reserve_region(addr, length, gas->space_id, 0, desc); } static void __init acpi_reserve_resources(void) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8244f013f2109..fcb7807ea8b73 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86 @@ -621,3 +622,162 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); + +struct reserved_region { + struct list_head node; + u64 start; + u64 end; +}; + +static LIST_HEAD(reserved_io_regions); +static LIST_HEAD(reserved_mem_regions); + +static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, + char *desc) +{ + unsigned int length = end - start + 1; + struct resource *res; + + res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? + request_region(start, length, desc) : + request_mem_region(start, length, desc); + if (!res) + return -EIO; + + res->flags &= ~flags; + return 0; +} + +static int add_region_before(u64 start, u64 end, u8 space_id, + unsigned long flags, char *desc, + struct list_head *head) +{ + struct reserved_region *reg; + int error; + + reg = kmalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) + return -ENOMEM; + + error = request_range(start, end, space_id, flags, desc); + if (error) + return error; + + reg->start = start; + reg->end = end; + list_add_tail(®->node, head); + return 0; +} + +/** + * acpi_reserve_region - Reserve an I/O or memory region as a system resource. + * @start: Starting address of the region. + * @length: Length of the region. + * @space_id: Identifier of address space to reserve the region from. + * @flags: Resource flags to clear for the region after requesting it. + * @desc: Region description (for messages). + * + * Reserve an I/O or memory region as a system resource to prevent others from + * using it. If the new region overlaps with one of the regions (in the given + * address space) already reserved by this routine, only the non-overlapping + * parts of it will be reserved. + * + * Returned is either 0 (success) or a negative error code indicating a resource + * reservation problem. It is the code of the first encountered error, but the + * routine doesn't abort until it has attempted to request all of the parts of + * the new region that don't overlap with other regions reserved previously. + * + * The resources requested by this routine are never released. + */ +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc) +{ + struct list_head *regions; + struct reserved_region *reg; + u64 end = start + length - 1; + int ret = 0, error = 0; + + if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) + regions = &reserved_io_regions; + else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + regions = &reserved_mem_regions; + else + return -EINVAL; + + if (list_empty(regions)) + return add_region_before(start, end, space_id, flags, desc, regions); + + list_for_each_entry(reg, regions, node) + if (reg->start == end + 1) { + /* The new region can be prepended to this one. */ + ret = request_range(start, end, space_id, flags, desc); + if (!ret) + reg->start = start; + + return ret; + } else if (reg->start > end) { + /* No overlap. Add the new region here and get out. */ + return add_region_before(start, end, space_id, flags, + desc, ®->node); + } else if (reg->end == start - 1) { + goto combine; + } else if (reg->end >= start) { + goto overlap; + } + + /* The new region goes after the last existing one. */ + return add_region_before(start, end, space_id, flags, desc, regions); + + overlap: + /* + * The new region overlaps an existing one. + * + * The head part of the new region immediately preceding the existing + * overlapping one can be combined with it right away. + */ + if (reg->start > start) { + error = request_range(start, reg->start - 1, space_id, flags, desc); + if (error) + ret = error; + else + reg->start = start; + } + + combine: + /* + * The new region is adjacent to an existing one. If it extends beyond + * that region all the way to the next one, it is possible to combine + * all three of them. + */ + while (reg->end < end) { + struct reserved_region *next = NULL; + u64 a = reg->end + 1, b = end; + + if (!list_is_last(®->node, regions)) { + next = list_next_entry(reg, node); + if (next->start <= end) + b = next->start - 1; + } + error = request_range(a, b, space_id, flags, desc); + if (!error) { + if (next && next->start == b + 1) { + reg->end = next->end; + list_del(&next->node); + kfree(next); + } else { + reg->end = end; + break; + } + } else if (next) { + if (!ret) + ret = error; + + reg = next; + } else { + break; + } + } + + return ret ? ret : error; +} +EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 49c1720df59a8..515f33882ab89 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,6 +7,7 @@ * Bjorn Helgaas */ +#include #include #include #include @@ -22,25 +23,41 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; +#ifdef CONFIG_ACPI +static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) +{ + u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; + return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); +} +#else +static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) +{ + struct resource *res; + + res = io ? request_region(start, length, desc) : + request_mem_region(start, length, desc); + if (res) { + res->flags &= ~IORESOURCE_BUSY; + return true; + } + return false; +} +#endif + static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - struct resource *res; + bool reserved; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - if (port) - res = request_region(start, end - start + 1, regionid); - else - res = request_mem_region(start, end - start + 1, regionid); - if (res) - res->flags &= ~IORESOURCE_BUSY; - else + reserved = __reserve_range(start, end - start + 1, !!port, regionid); + if (!reserved) kfree(regionid); /* @@ -49,7 +66,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - res ? "has been" : "could not be"); + reserved ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4550be3bb63b5..5da2d2e9d38e3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -332,6 +332,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc); + #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -527,6 +530,13 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } +static inline int acpi_reserve_region(u64 start, unsigned int length, + u8 space_id, unsigned long flags, + char *desc) +{ + return -ENXIO; +} + struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) From 8841d6439b1d6504d9c7498ebfdee1408e09460e Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Mon, 4 May 2015 11:13:03 +0200 Subject: [PATCH 0098/2091] iio: accel: kxcjk-1013: add the "KXCJ9000" ACPI id commit 61e2c70da9cfc79e8485eafa0f98b5919b04bbe1 upstream. This id has been seen in the DSDT of the Teclast X98 Air 3G tablet based on Intel Bay Trail. Signed-off-by: Antonio Ospite Cc: Bastien Nocera Reviewed-by: Daniel Baluta Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxcjk-1013.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 51da3692d5613..5b7a860df524b 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1418,6 +1418,7 @@ static const struct dev_pm_ops kxcjk1013_pm_ops = { static const struct acpi_device_id kx_acpi_match[] = { {"KXCJ1013", KXCJK1013}, {"KXCJ1008", KXCJ91008}, + {"KXCJ9000", KXCJ91008}, {"KXTJ1009", KXTJ21009}, {"SMO8500", KXCJ91008}, { }, From f3ff4345ef597115869a227dbf738dde157f8521 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 May 2015 16:55:18 -0300 Subject: [PATCH 0099/2091] tools selftests: Fix 'clean' target with make 3.81 commit 60df4642a83546fa6ea8286f5094ce8c0906c3ec upstream. Make 3.81 doesn't have the 'undefine' command. Using undefine to clear LDFLAGS fails when make version 3.81 is used. Fix it to use override to clear LDFLAGS. Tested-by: Shuah Khan Cc: David Ahern Cc: Ingo Molnar Cc: Michael Ellerman Link: http://lkml.kernel.org/r/20150514151225.GH23588@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Shuah Khan Cc: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 95abddcd78397..f76830643086e 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -27,7 +27,7 @@ TARGETS_HOTPLUG += memory-hotplug # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. ifeq (1,$(MAKELEVEL)) -undefine LDFLAGS +override LDFLAGS = override MAKEFLAGS = endif From eb1eecd100ce48d4f8368a0c475ecb937abd40ec Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Nov 2014 15:42:09 +0530 Subject: [PATCH 0100/2091] ARC: add smp barriers around atomics per Documentation/atomic_ops.txt commit 2576c28e3f623ed401db7e6197241865328620ef upstream. - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers Since ARCv2 only provides load/load, store/store and all/all, we need the full barrier - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified values were lacking the explicit smp barriers. - Non LLOCK/SCOND varaints don't need the explicit barriers since that is implicity provided by the spin locks used to implement the critical section (the spin lock barriers in turn are also fixed in this commit as explained above Cc: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/atomic.h | 21 +++++++++++++++++++++ arch/arc/include/asm/bitops.h | 19 +++++++++++++++++++ arch/arc/include/asm/cmpxchg.h | 17 +++++++++++++++++ arch/arc/include/asm/spinlock.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430d..20b7dc17979ea 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned int temp; \ \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ @@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ : "r"(&v->counter), "ir"(i) \ : "cc"); \ \ + smp_mb(); \ + \ return temp; \ } @@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ unsigned long flags; \ unsigned long temp; \ \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ atomic_ops_lock(flags); \ temp = v->counter; \ temp c_op i; \ @@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) #define __atomic_add_unless(v, a, u) \ ({ \ int c, old; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ c = old; \ + \ + smp_mb(); \ + \ c; \ }) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939f..624a9d048ca9c 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -117,6 +117,12 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND themselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bset %1, %0, %3 \n" @@ -126,6 +132,8 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -139,6 +147,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bclr %1, %0, %3 \n" @@ -148,6 +158,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -161,6 +173,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bxor %1, %0, %3 \n" @@ -170,6 +184,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -249,6 +265,9 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ bitops_lock(flags); old = *m; diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d6..c9b1f461a587d 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -10,6 +10,8 @@ #define __ASM_ARC_CMPXCHG_H #include + +#include #include #ifdef CONFIG_ARC_HAS_LLSC @@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) { unsigned long prev; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND thmeselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%1] \n" " brne %0, %2, 2f \n" @@ -30,6 +38,8 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) "r"(new) /* can't be "ir". scond can't take limm for "b" */ : "cc"); + smp_mb(); + return prev; } @@ -42,6 +52,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) int prev; volatile unsigned long *p = ptr; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ atomic_ops_lock(flags); prev = *p; if (prev == expected) @@ -77,12 +90,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, switch (size) { case 4: + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r"(val) : "r"(ptr) : "memory"); + smp_mb(); + return val; } return __xchg_bad_pointer(); diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e4..e1651df6a93d5 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + /* + * This smp_mb() is technically superfluous, we only need the one + * after the lock for providing the ACQUIRE semantics. + * However doing the "right" thing was regressing hackbench + * so keeping this, pending further investigation + */ + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" : "+&r" (tmp) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); + + /* + * ACQUIRE barrier to ensure load/store after taking the lock + * don't "bleed-up" out of the critical section (leak-in is allowed) + * http://www.spinics.net/lists/kernel/msg2010409.html + * + * ARCv2 only has load-load, store-store and all-all barrier + * thus need the full all-all barrier + */ + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + smp_mb(); + return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); } @@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + /* + * RELEASE barrier: given the instructions avail on ARCv2, full barrier + * is the only option + */ + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + /* + * superfluous, but keeping for now - see pairing version in + * arch_spin_lock above + */ smp_mb(); } From 3e43ff498fb1a9f8a58c7746c207b8ffd9a9a87d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Nov 2014 15:54:01 +0530 Subject: [PATCH 0101/2091] ARC: add compiler barrier to LLSC based cmpxchg commit d57f727264f1425a94689bafc7e99e502cb135b5 upstream. When auditing cmpxchg call sites, Chuck noted that gcc was optimizing away some of the desired LDs. | do { | new = old = *ipi_data_ptr; | new |= 1U << msg; | } while (cmpxchg(ipi_data_ptr, old, new) != old); was generating to below | 8015cef8: ld r2,[r4,0] <-- First LD | 8015cefc: bset r1,r2,r1 | | 8015cf00: llock r3,[r4] <-- atomic op | 8015cf04: brne r3,r2,8015cf10 | 8015cf08: scond r1,[r4] | 8015cf0c: bnz 8015cf00 | | 8015cf10: brne r3,r2,8015cf00 <-- Branch doesn't go to orig LD Although this was fixed by adding a ACCESS_ONCE in this call site, it seems safer (for now at least) to add compiler barrier to LLSC based cmpxchg Reported-by: Chuck Jordan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/cmpxchg.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index c9b1f461a587d..44fd531f4d7b9 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -33,10 +33,11 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) " scond %3, [%1] \n" " bnz 1b \n" "2: \n" - : "=&r"(prev) - : "r"(ptr), "ir"(expected), - "r"(new) /* can't be "ir". scond can't take limm for "b" */ - : "cc"); + : "=&r"(prev) /* Early clobber, to prevent reg reuse */ + : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ + "ir"(expected), + "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ + : "cc", "memory"); /* so that gcc knows memory is being written here */ smp_mb(); From 16e860b30b2c3703d92a6cd701a6e602d897f481 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 14 Jun 2015 02:09:06 +0300 Subject: [PATCH 0102/2091] arc: fix use of uninitialized arc_pmu commit 7002f77541f877a5590615ceb3da32b114f14b62 upstream. static arc_pmu in the arch/arc/kernel/perf_event.c is not initialized as it's shadowed by a local variable of the same name in the arc_pmu_device_probe. Signed-off-by: Max Filippov Fixes: 03c94fcf954d "ARC: perf: make @arc_pmu static global" Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/perf_event.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index fd2ec50102f20..57b58f52d825b 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -266,7 +266,6 @@ static int arc_pmu_add(struct perf_event *event, int flags) static int arc_pmu_device_probe(struct platform_device *pdev) { - struct arc_pmu *arc_pmu; struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; int i, j, ret; From 9f8e1f603600b300906e695d106a0e5cc39dcf82 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 May 2015 16:13:01 +0900 Subject: [PATCH 0103/2091] power_supply: Fix NULL pointer dereference during bq27x00_battery probe commit 8e59c7f23410d5ca6b350a178b861a3d68c49edf upstream. Power supply is often registered during probe of a driver. The power_supply_register() returns pointer to newly allocated structure as return value. However before returning the power_supply_register() calls back the get_property() method provided by the driver through uevent. In that time the driver probe is still in progress and driver did not assigned pointer to power supply to its local variables. This leads to NULL pointer dereference from get_property() function. Starting from bq27x00_battery_probe(): di->bat = power_supply_register() device_add() kobject_uevent() power_supply_uevent() power_supply_show_property() power_supply_get_property() bq27x00_battery_get_property() dereference of (di->bat) which is NULL here The first uevent of power supply (the one coming from device creation) should not call back to the driver. To prevent that from happening, increment the atomic use counter at the end of power_supply_register(). This means that power_supply_get_property() will return -ENODEV. IMPORTANT: The patch has impact on this first uevent sent from power supply because it will not contain properties from power supply. The uevent with properties will be sent later after indicating that power supply has changed. This also has a race now, but will be fixed in other patches. Reported-by: H. Nikolaus Schaller Signed-off-by: Krzysztof Kozlowski Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Tested-By: Dr. H. Nikolaus Schaller Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/power_supply_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 2ed4a4a6b3c5c..15da277e0e8d9 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -659,7 +659,6 @@ __power_supply_register(struct device *parent, dev->release = power_supply_dev_release; dev_set_drvdata(dev, psy); psy->desc = desc; - atomic_inc(&psy->use_cnt); if (cfg) { psy->drv_data = cfg->drv_data; psy->of_node = cfg->of_node; @@ -700,6 +699,16 @@ __power_supply_register(struct device *parent, if (rc) goto create_triggers_failed; + /* + * Update use_cnt after any uevents (most notably from device_add()). + * We are here still during driver's probe but + * the power_supply_uevent() calls back driver's get_property + * method so: + * 1. Driver did not assigned the returned struct power_supply, + * 2. Driver could not finish initialization (anything in its probe + * after calling power_supply_register()). + */ + atomic_inc(&psy->use_cnt); power_supply_changed(psy); return psy; From f6795f11a4dfb7fbbd4b34668271a553141c0aa7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 May 2015 16:13:02 +0900 Subject: [PATCH 0104/2091] power_supply: Fix possible NULL pointer dereference on early uevent commit 7f1a57fdd6cb6e7be2ed31878a34655df38e1861 upstream. Don't call the power_supply_changed() from power_supply_register() when parent is still probing because it may lead to accessing parent too early. In bq27x00_battery this caused NULL pointer exception because uevent of power_supply_changed called back the the get_property() method provided by the driver. The get_property() method accessed pointer which should be returned by power_supply_register(). Starting from bq27x00_battery_probe(): di->bat = power_supply_register() power_supply_changed() kobject_uevent() power_supply_uevent() power_supply_show_property() power_supply_get_property() bq27x00_battery_get_property() dereference of di->bat which is NULL here The dereference of di->bat (value returned by power_supply_register()) is the currently visible problem. However calling back the methods provided by driver before ending the probe may lead to accessing other driver-related data which is not yet initialized. The call to power_supply_changed() is postponed till probing ends - mutex of parent device is released. Reported-by: H. Nikolaus Schaller Signed-off-by: Krzysztof Kozlowski Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Tested-By: Dr. H. Nikolaus Schaller Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/power_supply_core.c | 50 +++++++++++++++++++++++++++---- include/linux/power_supply.h | 1 + 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 15da277e0e8d9..4bc0c7f459a52 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(power_supply_notifier); static struct device_type power_supply_dev_type; +#define POWER_SUPPLY_DEFERRED_REGISTER_TIME msecs_to_jiffies(10) + static bool __power_supply_is_supplied_by(struct power_supply *supplier, struct power_supply *supply) { @@ -121,6 +123,30 @@ void power_supply_changed(struct power_supply *psy) } EXPORT_SYMBOL_GPL(power_supply_changed); +/* + * Notify that power supply was registered after parent finished the probing. + * + * Often power supply is registered from driver's probe function. However + * calling power_supply_changed() directly from power_supply_register() + * would lead to execution of get_property() function provided by the driver + * too early - before the probe ends. + * + * Avoid that by waiting on parent's mutex. + */ +static void power_supply_deferred_register_work(struct work_struct *work) +{ + struct power_supply *psy = container_of(work, struct power_supply, + deferred_register_work.work); + + if (psy->dev.parent) + mutex_lock(&psy->dev.parent->mutex); + + power_supply_changed(psy); + + if (psy->dev.parent) + mutex_unlock(&psy->dev.parent->mutex); +} + #ifdef CONFIG_OF #include @@ -645,6 +671,10 @@ __power_supply_register(struct device *parent, struct power_supply *psy; int rc; + if (!parent) + pr_warn("%s: Expected proper parent device for '%s'\n", + __func__, desc->name); + psy = kzalloc(sizeof(*psy), GFP_KERNEL); if (!psy) return ERR_PTR(-ENOMEM); @@ -671,6 +701,8 @@ __power_supply_register(struct device *parent, goto dev_set_name_failed; INIT_WORK(&psy->changed_work, power_supply_changed_work); + INIT_DELAYED_WORK(&psy->deferred_register_work, + power_supply_deferred_register_work); rc = power_supply_check_supplies(psy); if (rc) { @@ -709,7 +741,10 @@ __power_supply_register(struct device *parent, * after calling power_supply_register()). */ atomic_inc(&psy->use_cnt); - power_supply_changed(psy); + + queue_delayed_work(system_power_efficient_wq, + &psy->deferred_register_work, + POWER_SUPPLY_DEFERRED_REGISTER_TIME); return psy; @@ -729,7 +764,8 @@ __power_supply_register(struct device *parent, /** * power_supply_register() - Register new power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -750,7 +786,8 @@ EXPORT_SYMBOL_GPL(power_supply_register); /** * power_supply_register() - Register new non-waking-source power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -779,7 +816,8 @@ static void devm_power_supply_release(struct device *dev, void *res) /** * power_supply_register() - Register managed power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -814,7 +852,8 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register); /** * power_supply_register() - Register managed non-waking-source power supply - * @parent: Device to be a parent of power supply's device + * @parent: Device to be a parent of power supply's device, usually + * the device which probe function calls this * @desc: Description of power supply, must be valid through whole * lifetime of this power supply * @cfg: Run-time specific configuration accessed during registering, @@ -858,6 +897,7 @@ void power_supply_unregister(struct power_supply *psy) { WARN_ON(atomic_dec_return(&psy->use_cnt)); cancel_work_sync(&psy->changed_work); + cancel_delayed_work_sync(&psy->deferred_register_work); sysfs_remove_link(&psy->dev.kobj, "powers"); power_supply_remove_triggers(psy); psy_unregister_cooler(psy); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 75a1dd8dc56ee..a80f1fd01ddb6 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -237,6 +237,7 @@ struct power_supply { /* private */ struct device dev; struct work_struct changed_work; + struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; atomic_t use_cnt; From 5acb6674291b15ea45d5bd65baff96a896f11ec6 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sat, 13 Jun 2015 08:51:17 +0300 Subject: [PATCH 0105/2091] mei: me: wait for power gating exit confirmation commit 3dc196eae1db548f05e53e5875ff87b8ff79f249 upstream. Fix the hbm power gating state machine so it will wait till it receives confirmation interrupt for the PG_ISOLATION_EXIT message. In process of the suspend flow the devices first have to exit from the power gating state (runtime pm resume). If we do not handle the confirmation interrupt after sending PG_ISOLATION_EXIT message, we may receive it already after the suspend flow has changed the device state and interrupt will be interpreted as a spurious event, consequently link reset will be invoked which will prevent the device from completing the suspend flow kernel: [6603] mei_reset:136: mei_me 0000:00:16.0: powering down: end of reset kernel: [476] mei_me_irq_thread_handler:643: mei_me 0000:00:16.0: function called after ISR to handle the interrupt processing. kernel: mei_me 0000:00:16.0: FW not ready: resetting Cc: Gabriele Mazzotta Link: https://bugzilla.kernel.org/show_bug.cgi?id=86241 Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=770397 Tested-by: Gabriele Mazzotta Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- drivers/misc/mei/hw-me.c | 59 +++++++++++++++++++++++++++++++++++--- drivers/misc/mei/hw-txe.c | 13 +++++++++ drivers/misc/mei/mei_dev.h | 11 +++++++ 4 files changed, 80 insertions(+), 5 deletions(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 1e99ef6a54a2b..b2b9f4382d771 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -699,7 +699,7 @@ void mei_host_client_init(struct work_struct *work) bool mei_hbuf_acquire(struct mei_device *dev) { if (mei_pg_state(dev) == MEI_PG_ON || - dev->pg_event == MEI_PG_EVENT_WAIT) { + mei_pg_in_transition(dev)) { dev_dbg(dev->dev, "device is in pg\n"); return false; } diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 6fb75e62a764c..43d7101ff9933 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -663,17 +663,46 @@ int mei_me_pg_exit_sync(struct mei_device *dev) mutex_lock(&dev->device_lock); reply: - if (dev->pg_event == MEI_PG_EVENT_RECEIVED) - ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED) + ret = 0; else ret = -ETIME; +out: dev->pg_event = MEI_PG_EVENT_IDLE; hw->pg_state = MEI_PG_OFF; return ret; } +/** + * mei_me_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_me_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event >= MEI_PG_EVENT_WAIT && + dev->pg_event <= MEI_PG_EVENT_INTR_WAIT; +} + /** * mei_me_pg_is_enabled - detect if PG is supported by HW * @@ -704,6 +733,24 @@ static bool mei_me_pg_is_enabled(struct mei_device *dev) return false; } +/** + * mei_me_pg_intr - perform pg processing in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT) + return; + + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + hw->pg_state = MEI_PG_OFF; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); +} + /** * mei_me_irq_quick_handler - The ISR of the MEI device * @@ -761,6 +808,8 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) goto end; } + mei_me_pg_intr(dev); + /* check if we need to start the dev */ if (!mei_host_is_ready(dev)) { if (mei_hw_is_ready(dev)) { @@ -797,9 +846,10 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* * During PG handshake only allowed write is the replay to the * PG exit message, so block calling write function - * if the pg state is not idle + * if the pg event is in PG handshake */ - if (dev->pg_event == MEI_PG_EVENT_IDLE) { + if (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_RECEIVED) { rets = mei_irq_write_handler(dev, &complete_list); dev->hbuf_is_ready = mei_hbuf_is_ready(dev); } @@ -824,6 +874,7 @@ static const struct mei_hw_ops mei_me_hw_ops = { .hw_config = mei_me_hw_config, .hw_start = mei_me_hw_start, + .pg_in_transition = mei_me_pg_in_transition, .pg_is_enabled = mei_me_pg_is_enabled, .intr_clear = mei_me_intr_clear, diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 7abafe7d120d8..964136b357334 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -301,6 +301,18 @@ int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req) return 0; } +/** + * mei_txe_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_txe_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event == MEI_PG_EVENT_WAIT; +} + /** * mei_txe_pg_is_enabled - detect if PG is supported by HW * @@ -1138,6 +1150,7 @@ static const struct mei_hw_ops mei_txe_hw_ops = { .hw_config = mei_txe_hw_config, .hw_start = mei_txe_hw_start, + .pg_in_transition = mei_txe_pg_in_transition, .pg_is_enabled = mei_txe_pg_is_enabled, .intr_clear = mei_txe_intr_clear, diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index f066ecd719393..f84c39ee28a8c 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -271,6 +271,7 @@ struct mei_cl { * @fw_status : get fw status registers * @pg_state : power gating state of the device + * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled * @intr_clear : clear pending interrupts @@ -300,6 +301,7 @@ struct mei_hw_ops { int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); enum mei_pg_state (*pg_state)(struct mei_device *dev); + bool (*pg_in_transition)(struct mei_device *dev); bool (*pg_is_enabled)(struct mei_device *dev); void (*intr_clear)(struct mei_device *dev); @@ -398,11 +400,15 @@ struct mei_cl_device { * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete * @MEI_PG_EVENT_RECEIVED: the driver received pg event + * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt + * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt */ enum mei_pg_event { MEI_PG_EVENT_IDLE, MEI_PG_EVENT_WAIT, MEI_PG_EVENT_RECEIVED, + MEI_PG_EVENT_INTR_WAIT, + MEI_PG_EVENT_INTR_RECEIVED, }; /** @@ -717,6 +723,11 @@ static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) return dev->ops->pg_state(dev); } +static inline bool mei_pg_in_transition(struct mei_device *dev) +{ + return dev->ops->pg_in_transition(dev); +} + static inline bool mei_pg_is_enabled(struct mei_device *dev) { return dev->ops->pg_is_enabled(dev); From 6bc62fd9b8493bd6a07bb73a5d63ed28fea40b78 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 14 Apr 2015 10:27:26 +0300 Subject: [PATCH 0106/2091] mei: txe: reduce suspend/resume time commit fe292283c23329218e384bffc6cb4bfa3fd92277 upstream. HW has to be in known state before the initialisation sequence is started. The polling step for settling aliveness was set to 200ms while in practise this can be done in up to 30msecs. Signed-off-by: Tomas Winkler Signed-off-by: Barak Yoresh Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-txe.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 964136b357334..bae680c648ffc 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -218,26 +219,25 @@ static u32 mei_txe_aliveness_get(struct mei_device *dev) * * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set * - * Return: > 0 if the expected value was received, -ETIME otherwise + * Return: 0 if the expected value was received, -ETIME otherwise */ static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected) { struct mei_txe_hw *hw = to_txe_hw(dev); - int t = 0; + ktime_t stop, start; + start = ktime_get(); + stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT)); do { hw->aliveness = mei_txe_aliveness_get(dev); if (hw->aliveness == expected) { dev->pg_event = MEI_PG_EVENT_IDLE; - dev_dbg(dev->dev, - "aliveness settled after %d msecs\n", t); - return t; + dev_dbg(dev->dev, "aliveness settled after %lld usecs\n", + ktime_to_us(ktime_sub(ktime_get(), start))); + return 0; } - mutex_unlock(&dev->device_lock); - msleep(MSEC_PER_SEC / 5); - mutex_lock(&dev->device_lock); - t += MSEC_PER_SEC / 5; - } while (t < SEC_ALIVENESS_WAIT_TIMEOUT); + usleep_range(20, 50); + } while (ktime_before(ktime_get(), stop)); dev->pg_event = MEI_PG_EVENT_IDLE; dev_err(dev->dev, "aliveness timed out\n"); From eeac30f17f468292f1141c6f5d95afc38a18260f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 12 Jun 2015 11:24:41 +0100 Subject: [PATCH 0107/2091] arm64: Do not attempt to use init_mm in reset_context() commit 565630d503ef24e44c252bed55571b3a0d68455f upstream. After secondary CPU boot or hotplug, the active_mm of the idle thread is &init_mm. The init_mm.pgd (swapper_pg_dir) is only meant for TTBR1_EL1 and must not be set in TTBR0_EL1. Since when active_mm == &init_mm the TTBR0_EL1 is already set to the reserved value, there is no need to perform any context reset. Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/context.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index baa758d370210..76c1e6cd36fc4 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -92,6 +92,14 @@ static void reset_context(void *info) unsigned int cpu = smp_processor_id(); struct mm_struct *mm = current->active_mm; + /* + * current->active_mm could be init_mm for the idle thread immediately + * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to + * the reserved value, so no need to reset any context. + */ + if (mm == &init_mm) + return; + smp_rmb(); asid = cpu_last_asid + cpu; From f6b01e505aa5785eaeda028fc0f151599f90b557 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 15 Jun 2015 16:40:27 +0100 Subject: [PATCH 0108/2091] arm64: entry: fix context tracking for el0_sp_pc commit 46b0567c851cf85d6ba6f23eef385ec9111d09bc upstream. Commit 6c81fe7925cc4c42 ("arm64: enable context tracking") did not update el0_sp_pc to use ct_user_exit, but this appears to have been unintentional. In commit 6ab6463aeb5fbc75 ("arm64: adjust el0_sync so that a function can be called") we made x0 available, and in the return to userspace we call ct_user_enter in the kernel_exit macro. Due to this, we currently don't correctly inform RCU of the user->kernel transition, and may erroneously account for time spent in the kernel as if we were in an extended quiescent state when CONFIG_CONTEXT_TRACKING is enabled. As we do record the kernel->user transition, a userspace application making accesses from an unaligned stack pointer can demonstrate the imbalance, provoking the following warning: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 3660 at kernel/context_tracking.c:75 context_tracking_enter+0xd8/0xe4() Modules linked in: CPU: 2 PID: 3660 Comm: a.out Not tainted 4.1.0-rc7+ #8 Hardware name: ARM Juno development board (r0) (DT) Call trace: [] dump_backtrace+0x0/0x124 [] show_stack+0x10/0x1c [] dump_stack+0x84/0xc8 [] warn_slowpath_common+0x98/0xd0 [] warn_slowpath_null+0x14/0x20 [] context_tracking_enter+0xd4/0xe4 [] preempt_schedule_irq+0xd4/0x114 [] el1_preempt+0x4/0x28 [] exit_files+0x38/0x4c [] do_exit+0x430/0x978 [] do_group_exit+0x40/0xd4 [] get_signal+0x23c/0x4f4 [] do_signal+0x1ac/0x518 [] do_notify_resume+0x5c/0x68 ---[ end trace 963c192600337066 ]--- This patch adds the missing ct_user_exit to the el0_sp_pc entry path, correcting the context tracking for this case. Signed-off-by: Mark Rutland Acked-by: Will Deacon Fixes: 6c81fe7925cc ("arm64: enable context tracking") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 959fe87335609..bddd04d031db7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -517,6 +517,7 @@ el0_sp_pc: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp From da8de4cde423fcd62233467ff9c067137beda0e8 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Tue, 16 Jun 2015 17:38:47 +0100 Subject: [PATCH 0109/2091] arm64: mm: Fix freeing of the wrong memmap entries with !SPARSEMEM_VMEMMAP commit b9bcc919931611498e856eae9bf66337330d04cc upstream. The memmap freeing code in free_unused_memmap() computes the end of each memblock by adding the memblock size onto the base. However, if SPARSEMEM is enabled then the value (start) used for the base may already have been rounded downwards to work out which memmap entries to free after the previous memblock. This may cause memmap entries that are in use to get freed. In general, you're not likely to hit this problem unless there are at least 2 memblocks and one of them is not aligned to a sparsemem section boundary. Note that carve-outs can increase the number of memblocks by splitting the regions listed in the device tree. This problem doesn't occur with SPARSEMEM_VMEMMAP, because the vmemmap code deals with freeing the unused regions of the memmap instead of requiring the arch code to do it. This patch gets the memblock base out of the memblock directly when computing the block end address to ensure the correct value is used. Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 597831bdddf3d..ad87ce826cce4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -262,7 +262,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(start + __phys_to_pfn(reg->size), + prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), MAX_ORDER_NR_PAGES); } From e3334dca73de24e5798759b14ed9e4f58e241fbd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Jun 2015 13:56:33 +0100 Subject: [PATCH 0110/2091] arm64: vdso: work-around broken ELF toolchains in Makefile commit 6f1a6ae87c0c60d7c462ef8fd071f291aa7a9abb upstream. When building the kernel with a bare-metal (ELF) toolchain, the -shared option may not be passed down to collect2, resulting in silent corruption of the vDSO image (in particular, the DYNAMIC section is omitted). The effect of this corruption is that the dynamic linker fails to find the vDSO symbols and libc is instead used for the syscalls that we intended to optimise (e.g. gettimeofday). Functionally, there is no issue as the sigreturn trampoline is still intact and located by the kernel. This patch fixes the problem by explicitly passing -shared to the linker when building the vDSO. Reported-by: Szabolcs Nagy Reported-by: James Greenlaigh Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/vdso/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index ff3bddea482dd..f6fe17d88da55 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -15,6 +15,10 @@ ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared +# down to collect2, resulting in silent corruption of the vDSO image. +ccflags-y += -Wl,-shared + obj-y += vdso.o extra-y += vdso.lds vdso-offsets.h CPPFLAGS_vdso.lds += -P -C -U$(ARCH) From 3baf726f001b69454f3eb18a589c508992622be9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 24 Jun 2015 16:58:26 -0700 Subject: [PATCH 0111/2091] mm: kmemleak: allow safe memory scanning during kmemleak disabling commit c5f3b1a51a591c18c8b33983908e7fdda6ae417e upstream. The kmemleak scanning thread can run for minutes. Callbacks like kmemleak_free() are allowed during this time, the race being taken care of by the object->lock spinlock. Such lock also prevents a memory block from being freed or unmapped while it is being scanned by blocking the kmemleak_free() -> ... -> __delete_object() function until the lock is released in scan_object(). When a kmemleak error occurs (e.g. it fails to allocate its metadata), kmemleak_enabled is set and __delete_object() is no longer called on freed objects. If kmemleak_scan is running at the same time, kmemleak_free() no longer waits for the object scanning to complete, allowing the corresponding memory block to be freed or unmapped (in the case of vfree()). This leads to kmemleak_scan potentially triggering a page fault. This patch separates the kmemleak_free() enabling/disabling from the overall kmemleak_enabled nob so that we can defer the disabling of the object freeing tracking until the scanning thread completed. The kmemleak_free_part() is deliberately ignored by this patch since this is only called during boot before the scanning thread started. Signed-off-by: Catalin Marinas Reported-by: Vignesh Radhakrishnan Tested-by: Vignesh Radhakrishnan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/kmemleak.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f0fe4f2c1fa7a..41df5b8efd25f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -195,6 +195,8 @@ static struct kmem_cache *scan_area_cache; /* set if tracing memory operations is enabled */ static int kmemleak_enabled; +/* same as above but only for the kmemleak_free() callback */ +static int kmemleak_free_enabled; /* set in the late_initcall if there were no errors */ static int kmemleak_initialized; /* enables or disables early logging of the memory operations */ @@ -942,7 +944,7 @@ void __ref kmemleak_free(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); - if (kmemleak_enabled && ptr && !IS_ERR(ptr)) + if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) delete_object_full((unsigned long)ptr); else if (kmemleak_early_log) log_early(KMEMLEAK_FREE, ptr, 0, 0); @@ -982,7 +984,7 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr) pr_debug("%s(0x%p)\n", __func__, ptr); - if (kmemleak_enabled && ptr && !IS_ERR(ptr)) + if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) for_each_possible_cpu(cpu) delete_object_full((unsigned long)per_cpu_ptr(ptr, cpu)); @@ -1750,6 +1752,13 @@ static void kmemleak_do_cleanup(struct work_struct *work) mutex_lock(&scan_mutex); stop_scan_thread(); + /* + * Once the scan thread has stopped, it is safe to no longer track + * object freeing. Ordering of the scan thread stopping and the memory + * accesses below is guaranteed by the kthread_stop() function. + */ + kmemleak_free_enabled = 0; + if (!kmemleak_found_leaks) __kmemleak_do_cleanup(); else @@ -1776,6 +1785,8 @@ static void kmemleak_disable(void) /* check whether it is too early for a kernel thread */ if (kmemleak_initialized) schedule_work(&cleanup_work); + else + kmemleak_free_enabled = 0; pr_info("Kernel memory leak detector disabled\n"); } @@ -1840,8 +1851,10 @@ void __init kmemleak_init(void) if (kmemleak_error) { local_irq_restore(flags); return; - } else + } else { kmemleak_enabled = 1; + kmemleak_free_enabled = 1; + } local_irq_restore(flags); /* From 03445a4c2324f4adddd6b6c9b92879c1c754238a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 24 Jun 2015 16:58:51 -0700 Subject: [PATCH 0112/2091] mm: kmemleak_alloc_percpu() should follow the gfp from per_alloc() commit 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 upstream. Beginning at commit d52d3997f843 ("ipv6: Create percpu rt6_info"), the following INFO splat is logged: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-next-20150612 #1 Not tainted ------------------------------- kernel/sched/core.c:7318 Illegal context switch in RCU-bh read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 3 locks held by systemd/1: #0: (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv+0x1f/0x40 #1: (rcu_read_lock_bh){......}, at: [] ipv6_add_addr+0x62/0x540 #2: (addrconf_hash_lock){+...+.}, at: [] ipv6_add_addr+0x184/0x540 stack backtrace: CPU: 0 PID: 1 Comm: systemd Not tainted 4.1.0-rc7-next-20150612 #1 Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.20 04/17/2014 Call Trace: dump_stack+0x4c/0x6e lockdep_rcu_suspicious+0xe7/0x120 ___might_sleep+0x1d5/0x1f0 __might_sleep+0x4d/0x90 kmem_cache_alloc+0x47/0x250 create_object+0x39/0x2e0 kmemleak_alloc_percpu+0x61/0xe0 pcpu_alloc+0x370/0x630 Additional backtrace lines are truncated. In addition, the above splat is followed by several "BUG: sleeping function called from invalid context at mm/slub.c:1268" outputs. As suggested by Martin KaFai Lau, these are the clue to the fix. Routine kmemleak_alloc_percpu() always uses GFP_KERNEL for its allocations, whereas it should follow the gfp from its callers. Reviewed-by: Catalin Marinas Reviewed-by: Kamalesh Babulal Acked-by: Martin KaFai Lau Signed-off-by: Larry Finger Cc: Martin KaFai Lau Cc: Catalin Marinas Cc: Tejun Heo Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/kmemleak.h | 6 ++++-- mm/kmemleak.c | 9 +++++---- mm/percpu.c | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index e705467ddb478..d0a1f99e24e3e 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,7 +28,8 @@ extern void kmemleak_init(void) __ref; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; -extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref; +extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) __ref; extern void kmemleak_free(const void *ptr) __ref; extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; @@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, gfp_t gfp) { } -static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { } static inline void kmemleak_free(const void *ptr) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 41df5b8efd25f..3716cdb8ba420 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -909,12 +909,13 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc); * kmemleak_alloc_percpu - register a newly allocated __percpu object * @ptr: __percpu pointer to beginning of the object * @size: size of the object + * @gfp: flags used for kmemleak internal memory allocations * * This function is called from the kernel percpu allocator when a new object - * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL - * allocation. + * (memory block) is allocated (alloc_percpu). */ -void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { unsigned int cpu; @@ -927,7 +928,7 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) if (kmemleak_enabled && ptr && !IS_ERR(ptr)) for_each_possible_cpu(cpu) create_object((unsigned long)per_cpu_ptr(ptr, cpu), - size, 0, GFP_KERNEL); + size, 0, gfp); else if (kmemleak_early_log) log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0); } diff --git a/mm/percpu.c b/mm/percpu.c index dfd02484e8de1..2dd74487a0aff 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1030,7 +1030,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); - kmemleak_alloc_percpu(ptr, size); + kmemleak_alloc_percpu(ptr, size, gfp); return ptr; fail_unlock: From 1021c97205005db4f101e7fa55095ec13118ac03 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 24 Jun 2015 16:58:48 -0700 Subject: [PATCH 0113/2091] mm, thp: respect MPOL_PREFERRED policy with non-local node commit 0867a57c4f80a566dda1bac975b42fcd857cb489 upstream. Since commit 077fcf116c8c ("mm/thp: allocate transparent hugepages on local node"), we handle THP allocations on page fault in a special way - for non-interleave memory policies, the allocation is only attempted on the node local to the current CPU, if the policy's nodemask allows the node. This is motivated by the assumption that THP benefits cannot offset the cost of remote accesses, so it's better to fallback to base pages on the local node (which might still be available, while huge pages are not due to fragmentation) than to allocate huge pages on a remote node. The nodemask check prevents us from violating e.g. MPOL_BIND policies where the local node is not among the allowed nodes. However, the current implementation can still give surprising results for the MPOL_PREFERRED policy when the preferred node is different than the current CPU's local node. In such case we should honor the preferred node and not use the local node, which is what this patch does. If hugepage allocation on the preferred node fails, we fall back to base pages and don't try other nodes, with the same motivation as is done for the local node hugepage allocations. The patch also moves the MPOL_INTERLEAVE check around to simplify the hugepage specific test. The difference can be demonstrated using in-tree transhuge-stress test on the following 2-node machine where half memory on one node was occupied to show the difference. > numactl --hardware available: 2 nodes (0-1) node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 24 25 26 27 28 29 30 31 32 33 34 35 node 0 size: 7878 MB node 0 free: 3623 MB node 1 cpus: 12 13 14 15 16 17 18 19 20 21 22 23 36 37 38 39 40 41 42 43 44 45 46 47 node 1 size: 8045 MB node 1 free: 7818 MB node distances: node 0 1 0: 10 21 1: 21 10 Before the patch: > numactl -p0 -C0 ./transhuge-stress transhuge-stress: 2.197 s/loop, 0.276 ms/page, 7249.168 MiB/s 7962 succeed, 0 failed, 1786 different pages > numactl -p0 -C12 ./transhuge-stress transhuge-stress: 2.962 s/loop, 0.372 ms/page, 5376.172 MiB/s 7962 succeed, 0 failed, 3873 different pages Number of successful THP allocations corresponds to free memory on node 0 in the first case and node 1 in the second case, i.e. -p parameter is ignored and cpu binding "wins". After the patch: > numactl -p0 -C0 ./transhuge-stress transhuge-stress: 2.183 s/loop, 0.274 ms/page, 7295.516 MiB/s 7962 succeed, 0 failed, 1760 different pages > numactl -p0 -C12 ./transhuge-stress transhuge-stress: 2.878 s/loop, 0.361 ms/page, 5533.638 MiB/s 7962 succeed, 0 failed, 1750 different pages > numactl -p1 -C0 ./transhuge-stress transhuge-stress: 4.628 s/loop, 0.581 ms/page, 3440.893 MiB/s 7962 succeed, 0 failed, 3918 different pages The -p parameter is respected regardless of cpu binding. > numactl -C0 ./transhuge-stress transhuge-stress: 2.202 s/loop, 0.277 ms/page, 7230.003 MiB/s 7962 succeed, 0 failed, 1750 different pages > numactl -C12 ./transhuge-stress transhuge-stress: 3.020 s/loop, 0.379 ms/page, 5273.324 MiB/s 7962 succeed, 0 failed, 3916 different pages Without -p parameter, hugepage restriction to CPU-local node works as before. Fixes: 077fcf116c8c ("mm/thp: allocate transparent hugepages on local node") Signed-off-by: Vlastimil Babka Cc: Aneesh Kumar K.V Acked-by: David Rientjes Cc: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 747743237d9f4..99d4c1d0b8583 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1972,35 +1972,41 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, pol = get_vma_policy(vma, addr); cpuset_mems_cookie = read_mems_allowed_begin(); - if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage && - pol->mode != MPOL_INTERLEAVE)) { + if (pol->mode == MPOL_INTERLEAVE) { + unsigned nid; + + nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); + mpol_cond_put(pol); + page = alloc_page_interleave(gfp, order, nid); + goto out; + } + + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { + int hpage_node = node; + /* * For hugepage allocation and non-interleave policy which - * allows the current node, we only try to allocate from the - * current node and don't fall back to other nodes, as the - * cost of remote accesses would likely offset THP benefits. + * allows the current node (or other explicitly preferred + * node) we only try to allocate from the current/preferred + * node and don't fall back to other nodes, as the cost of + * remote accesses would likely offset THP benefits. * * If the policy is interleave, or does not allow the current * node in its nodemask, we allocate the standard way. */ + if (pol->mode == MPOL_PREFERRED && + !(pol->flags & MPOL_F_LOCAL)) + hpage_node = pol->v.preferred_node; + nmask = policy_nodemask(gfp, pol); - if (!nmask || node_isset(node, *nmask)) { + if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = alloc_pages_exact_node(node, + page = alloc_pages_exact_node(hpage_node, gfp | __GFP_THISNODE, order); goto out; } } - if (pol->mode == MPOL_INTERLEAVE) { - unsigned nid; - - nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); - mpol_cond_put(pol); - page = alloc_page_interleave(gfp, order, nid); - goto out; - } - nmask = policy_nodemask(gfp, pol); zl = policy_zonelist(gfp, pol, node); mpol_cond_put(pol); From 88822cdb258714ce2fdc9a6eed87e7dd7d205a6b Mon Sep 17 00:00:00 2001 From: Arun Chandran Date: Mon, 15 Jun 2015 15:59:02 +0530 Subject: [PATCH 0114/2091] regmap: Fix regmap_bulk_read in BE mode commit 15b8d2c41fe5839582029f65c5f7004db451cc2b upstream. In big endian mode regmap_bulk_read gives incorrect data for byte reads. This is because memcpy of a single byte from an address after full word read gives different results when endianness differs. ie. we get little-end in LE and big-end in BE. Signed-off-by: Arun Chandran Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 6273ff072f3ea..9f7f78ede01b1 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2318,7 +2318,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, &ival); if (ret != 0) return ret; - memcpy(val + (i * val_bytes), &ival, val_bytes); + map->format.format_val(val + (i * val_bytes), ival, 0); } } From 6489f7a496378983980e8219dfaeaf66d581590b Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Tue, 16 Jun 2015 13:53:19 +0200 Subject: [PATCH 0115/2091] regmap: Fix possible shift overflow in regmap_field_init() commit 921cc29473a0d7c109105c1876ddb432f4a4be7d upstream. The way the mask is generated in regmap_field_init() is wrong. Indeed, a field initialized with msb = 31 and lsb = 0 provokes a shift overflow while calculating the mask field. On some 32 bits architectures, such as x86, the generated mask is 0, instead of the expected 0xffffffff. This patch uses GENMASK() to fix the problem, as this macro is already safe regarding shift overflow. Signed-off-by: Maxime Coquelin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 9f7f78ede01b1..1c76dcb502cf3 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -945,11 +945,10 @@ EXPORT_SYMBOL_GPL(devm_regmap_init); static void regmap_field_init(struct regmap_field *rm_field, struct regmap *regmap, struct reg_field reg_field) { - int field_bits = reg_field.msb - reg_field.lsb + 1; rm_field->regmap = regmap; rm_field->reg = reg_field.reg; rm_field->shift = reg_field.lsb; - rm_field->mask = ((BIT(field_bits) - 1) << reg_field.lsb); + rm_field->mask = GENMASK(reg_field.msb, reg_field.lsb); rm_field->id_size = reg_field.id_size; rm_field->id_offset = reg_field.id_offset; } From 1a9850fbeb65734827a7dd0088ea357b0f22a242 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 18 May 2015 10:01:03 -0700 Subject: [PATCH 0116/2091] regulator: max77686: fix gpio_enabled shift wrapping bug commit c53403a37cf083ce85da720f18918f73580d0064 upstream. The code should handle more than 32 bits here because "id" can be a value up to MAX77686_REGULATORS (currently 34). Convert the gpio_enabled type to DECLARE_BITMAP and use test_bit/set_bit. Fixes: 3307e9025d29 ("regulator: max77686: Add GPIO control") Reported-by: Dan Carpenter Signed-off-by: Joe Perches Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/max77686.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/max77686.c b/drivers/regulator/max77686.c index 15fb1416bfbde..c064e32fb3b92 100644 --- a/drivers/regulator/max77686.c +++ b/drivers/regulator/max77686.c @@ -88,7 +88,7 @@ enum max77686_ramp_rate { }; struct max77686_data { - u64 gpio_enabled:MAX77686_REGULATORS; + DECLARE_BITMAP(gpio_enabled, MAX77686_REGULATORS); /* Array indexed by regulator id */ unsigned int opmode[MAX77686_REGULATORS]; @@ -121,7 +121,7 @@ static unsigned int max77686_map_normal_mode(struct max77686_data *max77686, case MAX77686_BUCK8: case MAX77686_BUCK9: case MAX77686_LDO20 ... MAX77686_LDO22: - if (max77686->gpio_enabled & (1 << id)) + if (test_bit(id, max77686->gpio_enabled)) return MAX77686_GPIO_CONTROL; } @@ -277,7 +277,7 @@ static int max77686_of_parse_cb(struct device_node *np, } if (gpio_is_valid(config->ena_gpio)) { - max77686->gpio_enabled |= (1 << desc->id); + set_bit(desc->id, max77686->gpio_enabled); return regmap_update_bits(config->regmap, desc->enable_reg, desc->enable_mask, From c17210c30c65355713afb618da1e24b970fa69c8 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 9 Jun 2015 20:09:42 +0000 Subject: [PATCH 0117/2091] regulator: core: fix constraints output buffer commit a7068e3932eee8268c4ce4e080a338ee7b8a27bf upstream. The buffer for condtraints debug isn't big enough to hold the output in all cases. So fix this issue by increasing the buffer. Signed-off-by: Stefan Wahren Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 443eaab933fcf..8a28116b58058 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -779,7 +779,7 @@ static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state) static void print_constraints(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; - char buf[80] = ""; + char buf[160] = ""; int count = 0; int ret; From 9da8e034daa3670221442392ce9ea17474591c34 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Mon, 1 Jun 2015 17:48:37 +0200 Subject: [PATCH 0118/2091] livepatch: add module locking around kallsyms calls commit 9a1bd63cdae4b623494c4ebaf723a91c35ec49fb upstream. The list of loaded modules is walked through in module_kallsyms_on_each_symbol (called by kallsyms_on_each_symbol). The module_mutex lock should be acquired to prevent potential corruptions in the list. This was uncovered with new lockdep asserts in module code introduced by the commit 0be964be0d45 ("module: Sanitize RCU usage and locking") in recent next- trees. Signed-off-by: Miroslav Benes Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- kernel/livepatch/core.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 284e2691e3807..9ec555732f1a9 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -179,7 +179,9 @@ static int klp_find_object_symbol(const char *objname, const char *name, .count = 0 }; + mutex_lock(&module_mutex); kallsyms_on_each_symbol(klp_find_callback, &args); + mutex_unlock(&module_mutex); if (args.count == 0) pr_err("symbol '%s' not found in symbol table\n", name); @@ -219,13 +221,19 @@ static int klp_verify_vmlinux_symbol(const char *name, unsigned long addr) .name = name, .addr = addr, }; + int ret; - if (kallsyms_on_each_symbol(klp_verify_callback, &args)) - return 0; + mutex_lock(&module_mutex); + ret = kallsyms_on_each_symbol(klp_verify_callback, &args); + mutex_unlock(&module_mutex); - pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", - name, addr); - return -EINVAL; + if (!ret) { + pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n", + name, addr); + return -EINVAL; + } + + return 0; } static int klp_find_verify_func_addr(struct klp_object *obj, From 927973d93be4524ad25518e0e3412cd27d425e29 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Sun, 10 May 2015 07:50:45 +0000 Subject: [PATCH 0119/2091] spi: fix race freeing dummy_tx/rx before it is unmapped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e76ef88f607174082023f50b87fe12dcdbe5db5 upstream. Fix a race (with some kernel configurations) where a queued master->pump_messages runs and frees dummy_tx/rx before spi_unmap_msg is running (or is finished). This results in the following messages: BUG: Bad page state in process page:db7ba030 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x200(arch_1) page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set ... Reported-by: Noralf Trønnes Suggested-by: Noralf Trønnes Tested-by: Noralf Trønnes Signed-off-by: Martin Sperl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 50910d85df5af..d35c1a13217c5 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -988,9 +988,6 @@ void spi_finalize_current_message(struct spi_master *master) spin_lock_irqsave(&master->queue_lock, flags); mesg = master->cur_msg; - master->cur_msg = NULL; - - queue_kthread_work(&master->kworker, &master->pump_messages); spin_unlock_irqrestore(&master->queue_lock, flags); spi_unmap_msg(master, mesg); @@ -1003,9 +1000,13 @@ void spi_finalize_current_message(struct spi_master *master) } } - trace_spi_message_done(mesg); - + spin_lock_irqsave(&master->queue_lock, flags); + master->cur_msg = NULL; master->cur_msg_prepared = false; + queue_kthread_work(&master->kworker, &master->pump_messages); + spin_unlock_irqrestore(&master->queue_lock, flags); + + trace_spi_message_done(mesg); mesg->state = NULL; if (mesg->complete) From 46afcceeebddd0f6e1d328c9c9c93059adffdf08 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 26 May 2015 11:44:42 +0200 Subject: [PATCH 0120/2091] spi: orion: Fix maximum baud rates for Armada 370/XP commit ce2f6ea1cbd41d78224f703af980a6ceeb0eb56a upstream. The commit df59fa7f4bca "spi: orion: support armada extended baud rates" was too optimistic for the maximum baud rate that the Armada SoCs can support. According to the hardware datasheet the maximum frequency supported by the Armada 370 SoC is tclk/4. But for the Armada XP, Armada 38x and Armada 39x SoCs the limitation is 50MHz and for the Armada 375 it is tclk/15. Currently the armada-370-spi compatible is only used by the Armada 370 and the Armada XP device tree. On Armada 370, tclk cannot be higher than 200MHz. In order to be able to handle both SoCs, we can take the minimum of 50MHz and tclk/4. A proper solution is adding a compatible string for each SoC, but it can't be done as a fix for compatibility reason (we can't modify device tree that have been already released) and it will be part of a separate patch. Fixes: df59fa7f4bca (spi: orion: support armada extended baud rates) Reported-by: Kostya Porotchkin Signed-off-by: Gregory CLEMENT Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-orion.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 861664776672c..ff97cabdaa81f 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -61,6 +61,12 @@ enum orion_spi_type { struct orion_spi_dev { enum orion_spi_type typ; + /* + * min_divisor and max_hz should be exclusive, the only we can + * have both is for managing the armada-370-spi case with old + * device tree + */ + unsigned long max_hz; unsigned int min_divisor; unsigned int max_divisor; u32 prescale_mask; @@ -387,8 +393,9 @@ static const struct orion_spi_dev orion_spi_dev_data = { static const struct orion_spi_dev armada_spi_dev_data = { .typ = ARMADA_SPI, - .min_divisor = 1, + .min_divisor = 4, .max_divisor = 1920, + .max_hz = 50000000, .prescale_mask = ARMADA_SPI_CLK_PRESCALE_MASK, }; @@ -454,7 +461,21 @@ static int orion_spi_probe(struct platform_device *pdev) goto out; tclk_hz = clk_get_rate(spi->clk); - master->max_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->min_divisor); + + /* + * With old device tree, armada-370-spi could be used with + * Armada XP, however for this SoC the maximum frequency is + * 50MHz instead of tclk/4. On Armada 370, tclk cannot be + * higher than 200MHz. So, in order to be able to handle both + * SoCs, we can take the minimum of 50MHz and tclk/4. + */ + if (of_device_is_compatible(pdev->dev.of_node, + "marvell,armada-370-spi")) + master->max_speed_hz = min(devdata->max_hz, + DIV_ROUND_UP(tclk_hz, devdata->min_divisor)); + else + master->max_speed_hz = + DIV_ROUND_UP(tclk_hz, devdata->min_divisor); master->min_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->max_divisor); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); From bcd201e2ae1c3e431a89a02a48c05112f19eb65e Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 11 May 2015 12:20:18 -0300 Subject: [PATCH 0121/2091] spi: pl022: Specify 'num-cs' property as required in devicetree binding commit ea6055c46eda1e19e02209814955e13f334bbe1b upstream. Since commit 39a6ac11df65 ("spi/pl022: Devicetree support w/o platform data") the 'num-cs' parameter cannot be passed through platform data when probing with devicetree. Instead, it's a required devicetree property. Fix the binding documentation so the property is properly specified. Fixes: 39a6ac11df65 ("spi/pl022: Devicetree support w/o platform data") Signed-off-by: Ezequiel Garcia Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/spi/spi_pl022.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt index 22ed6797216d7..4d1673ca8cf80 100644 --- a/Documentation/devicetree/bindings/spi/spi_pl022.txt +++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt @@ -4,9 +4,9 @@ Required properties: - compatible : "arm,pl022", "arm,primecell" - reg : Offset and length of the register set for the device - interrupts : Should contain SPI controller interrupt +- num-cs : total number of chipselects Optional properties: -- num-cs : total number of chipselects - cs-gpios : should specify GPIOs used for chipselects. The gpios will be referred to as reg = in the SPI child nodes. If unspecified, a single SPI device without a chip select can be used. From 3705ac339307a3f6b5a84e48cb82be0167b4e633 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:22:19 +0200 Subject: [PATCH 0122/2091] scsi_transport_srp: Introduce srp_wait_for_queuecommand() commit be34c62ddf39d1931780b07a6f4241393e4ba2ee upstream. Introduce the helper function srp_wait_for_queuecommand(). Move the definition of scsi_request_fn_active(). Add a comment above srp_wait_for_queuecommand() that support for scsi-mq needs to be added. This patch does not change any functionality. A second call to srp_wait_for_queuecommand() will be introduced in the next patch. Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_transport_srp.c | 54 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index ae45bd99baed7..e05cd7e2d6d33 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -396,6 +396,36 @@ static void srp_reconnect_work(struct work_struct *work) } } +/** + * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. + * + * To do: add support for scsi-mq in this function. + */ +static int scsi_request_fn_active(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + struct request_queue *q; + int request_fn_active = 0; + + shost_for_each_device(sdev, shost) { + q = sdev->request_queue; + + spin_lock_irq(q->queue_lock); + request_fn_active += q->request_fn_active; + spin_unlock_irq(q->queue_lock); + } + + return request_fn_active; +} + +/* Wait until ongoing shost->hostt->queuecommand() calls have finished. */ +static void srp_wait_for_queuecommand(struct Scsi_Host *shost) +{ + while (scsi_request_fn_active(shost)) + msleep(20); +} + static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); @@ -503,27 +533,6 @@ void srp_start_tl_fail_timers(struct srp_rport *rport) } EXPORT_SYMBOL(srp_start_tl_fail_timers); -/** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @shost: SCSI host for which to count the number of scsi_request_fn() callers. - */ -static int scsi_request_fn_active(struct Scsi_Host *shost) -{ - struct scsi_device *sdev; - struct request_queue *q; - int request_fn_active = 0; - - shost_for_each_device(sdev, shost) { - q = sdev->request_queue; - - spin_lock_irq(q->queue_lock); - request_fn_active += q->request_fn_active; - spin_unlock_irq(q->queue_lock); - } - - return request_fn_active; -} - /** * srp_reconnect_rport() - reconnect to an SRP target port * @rport: SRP target port. @@ -559,8 +568,7 @@ int srp_reconnect_rport(struct srp_rport *rport) if (res) goto out; scsi_target_block(&shost->shost_gendev); - while (scsi_request_fn_active(shost)) - msleep(20); + srp_wait_for_queuecommand(shost); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); From 3646ac368739a302008cc7bf33a985da2cfa1a17 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:22:44 +0200 Subject: [PATCH 0123/2091] scsi_transport_srp: Fix a race condition commit 535fb906225fb7436cb658144d0c0cea14a26f3e upstream. Avoid that srp_terminate_io() can get invoked while srp_queuecommand() is in progress. This patch avoids that an I/O timeout can trigger the following kernel warning: WARNING: at drivers/infiniband/ulp/srp/ib_srp.c:1447 srp_terminate_io+0xef/0x100 [ib_srp]() Call Trace: [] dump_stack+0x4e/0x68 [] warn_slowpath_common+0x81/0xa0 [] warn_slowpath_null+0x1a/0x20 [] srp_terminate_io+0xef/0x100 [ib_srp] [] __rport_fail_io_fast+0xba/0xc0 [scsi_transport_srp] [] rport_fast_io_fail_timedout+0xe0/0xf0 [scsi_transport_srp] [] process_one_work+0x1db/0x780 [] worker_thread+0x11b/0x450 [] kthread+0xe4/0x100 [] ret_from_fork+0x7c/0xb0 See also patch "scsi_transport_srp: Add transport layer error handling" (commit ID 29c17324803c). Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_transport_srp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index e05cd7e2d6d33..f115f67a6ba58 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -439,8 +439,10 @@ static void __rport_fail_io_fast(struct srp_rport *rport) /* Involve the LLD if possible to terminate all I/O on the rport. */ i = to_srp_internal(shost->transportt); - if (i->f->terminate_rport_io) + if (i->f->terminate_rport_io) { + srp_wait_for_queuecommand(shost); i->f->terminate_rport_io(rport); + } } /** From 5e72c7cc287d9415debd623e96ac275f20d37645 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:23:14 +0200 Subject: [PATCH 0124/2091] IB/srp: Remove an extraneous scsi_host_put() from an error path commit fb49c8bbaae70b14fea2b4590a90a21539f88526 upstream. Fix a scsi_get_host() / scsi_host_put() imbalance in the error path of srp_create_target(). See also patch "IB/srp: Avoid that I/O hangs due to a cable pull during LUN scanning" (commit ID 34aa654ecb8e). Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 918814cd0f806..5ce6cfd86476e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3172,11 +3172,11 @@ static ssize_t srp_create_target(struct device *dev, ret = srp_parse_options(buf, target); if (ret) - goto err; + goto out; ret = scsi_init_shared_tag_map(target_host, target_host->can_queue); if (ret) - goto err; + goto out; target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; @@ -3187,7 +3187,7 @@ static ssize_t srp_create_target(struct device *dev, be64_to_cpu(target->ioc_guid), be64_to_cpu(target->initiator_ext)); ret = -EEXIST; - goto err; + goto out; } if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && @@ -3208,7 +3208,7 @@ static ssize_t srp_create_target(struct device *dev, spin_lock_init(&target->lock); ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) - goto err; + goto out; ret = -ENOMEM; target->ch_count = max_t(unsigned, num_online_nodes(), @@ -3219,7 +3219,7 @@ static ssize_t srp_create_target(struct device *dev, target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) - goto err; + goto out; node_idx = 0; for_each_online_node(node) { @@ -3315,9 +3315,6 @@ static ssize_t srp_create_target(struct device *dev, } kfree(target->ch); - -err: - scsi_host_put(target_host); goto out; } From 86e4f5b10e79205f5eb254a3c60d111931f2a274 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:23:36 +0200 Subject: [PATCH 0125/2091] IB/srp: Fix a connection setup race commit 8de9fe3a1d4ac8c3e4953fa4b7d81f863f5196ad upstream. Avoid that receiving a DREQ while RDMA channels are being established causes target->qp_in_error to be reset. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5ce6cfd86476e..f2daabd1e91d2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -993,8 +993,6 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) WARN_ON_ONCE(!multich && target->connected); - target->qp_in_error = false; - ret = srp_lookup_path(ch); if (ret) return ret; @@ -1243,6 +1241,9 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (j = 0; j < target->queue_size; ++j) list_add(&ch->tx_ring[j]->list, &ch->free_tx); } + + target->qp_in_error = false; + for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; if (ret || !ch->target) { From c1ab680046ba170050e91ec49577699de1a24e1a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:23:57 +0200 Subject: [PATCH 0126/2091] IB/srp: Fix connection state tracking commit c014c8cd31b161e12deb81c0f7f477811bd1eddc upstream. Reception of a DREQ message only causes the state of a single channel to change. Hence move the 'connected' member variable from the target to the channel data structure. This patch avoids that following false positive warning can be reported by srp_destroy_qp(): WARNING: at drivers/infiniband/ulp/srp/ib_srp.c:617 srp_destroy_qp+0xa6/0x120 [ib_srp]() Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] srp_destroy_qp+0xa6/0x120 [ib_srp] [] srp_free_ch_ib+0x82/0x1e0 [ib_srp] [] srp_create_target+0x7ab/0x998 [ib_srp] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xef/0x170 [] vfs_write+0xc8/0x190 [] sys_write+0x51/0x90 Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 60 ++++++++++++++--------------- drivers/infiniband/ulp/srp/ib_srp.h | 2 +- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f2daabd1e91d2..c418d1fde1edd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -465,14 +465,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - struct srp_target_port *target = ch->target; static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; struct ib_recv_wr *bad_wr; int ret; /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(target->connected); + WARN_ON_ONCE(ch->connected); ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); @@ -811,35 +810,19 @@ static bool srp_queue_remove_work(struct srp_target_port *target) return changed; } -static bool srp_change_conn_state(struct srp_target_port *target, - bool connected) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->connected != connected) { - target->connected = connected; - changed = true; - } - spin_unlock_irq(&target->lock); - - return changed; -} - static void srp_disconnect_target(struct srp_target_port *target) { struct srp_rdma_ch *ch; int i; - if (srp_change_conn_state(target, false)) { - /* XXX should send SRP_I_LOGOUT request */ + /* XXX should send SRP_I_LOGOUT request */ - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - } + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ch->connected = false; + if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); } } } @@ -986,12 +969,26 @@ static void srp_rport_delete(struct srp_rport *rport) srp_queue_remove_work(target); } +/** + * srp_connected_ch() - number of connected channels + * @target: SRP target port. + */ +static int srp_connected_ch(struct srp_target_port *target) +{ + int i, c = 0; + + for (i = 0; i < target->ch_count; i++) + c += target->ch[i].connected; + + return c; +} + static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) { struct srp_target_port *target = ch->target; int ret; - WARN_ON_ONCE(!multich && target->connected); + WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); ret = srp_lookup_path(ch); if (ret) @@ -1014,7 +1011,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) */ switch (ch->status) { case 0: - srp_change_conn_state(target, true); + ch->connected = true; return 0; case SRP_PORT_REDIRECT: @@ -1930,7 +1927,7 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, return; } - if (target->connected && !target->qp_in_error) { + if (ch->connected && !target->qp_in_error) { if (wr_id & LOCAL_INV_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX "LOCAL_INV failed with status %d\n", @@ -2368,7 +2365,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); - srp_change_conn_state(target, false); + ch->connected = false; if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -2424,7 +2421,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (!target->connected || target->qp_in_error) + if (!ch->connected || target->qp_in_error) return -1; init_completion(&ch->tsk_mgmt_done); @@ -2798,7 +2795,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); - if (!target->connected || target->qp_in_error) { + if (srp_connected_ch(target) < target->ch_count || + target->qp_in_error) { shost_printk(KERN_INFO, target->scsi_host, PFX "SCSI scan failed - removing SCSI host\n"); srp_queue_remove_work(target); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a611556406ac0..e690847a46dd3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -170,6 +170,7 @@ struct srp_rdma_ch { struct completion tsk_mgmt_done; u8 tsk_mgmt_status; + bool connected; }; /** @@ -214,7 +215,6 @@ struct srp_target_port { __be16 pkey; u32 rq_tmo_jiffies; - bool connected; int zero_req_lim; From b14524edc36d494d75c760743cc9435ef4c12a2f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 May 2015 13:24:17 +0200 Subject: [PATCH 0127/2091] IB/srp: Fix reconnection failure handling commit a44074f14ba1ea0747ea737026eb929b81993dc3 upstream. Although it is possible to let SRP I/O continue if a reconnect results in a reduction of the number of channels, the current code does not handle this scenario correctly. Instead of making the reconnect code more complex, consider this as a reconnection failure. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c418d1fde1edd..75c01b27bd0bd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1243,11 +1243,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (ret || !ch->target) { - if (i > 1) - ret = 0; + if (ret || !ch->target) break; - } ret = srp_connect_ch(ch, multich); multich = true; } From 25d8f169eee423940158a0ab33ed5dd1dd995cf9 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 May 2015 17:02:58 +0800 Subject: [PATCH 0128/2091] genirq: devres: Fix testing return value of request_any_context_irq() commit 63781394c540dd9e666a6b21d70b64dd52bce76e upstream. request_any_context_irq() returns a negative value on failure. It returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED on success. So fix testing return value of request_any_context_irq(). Also fixup the return value of devm_request_any_context_irq() to make it consistent with request_any_context_irq(). Fixes: 0668d3065128 ("genirq: Add devm_request_any_context_irq()") Signed-off-by: Axel Lin Reviewed-by: Stephen Boyd Link: http://lkml.kernel.org/r/1431334978.17783.4.camel@ingics.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/devres.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index d5d0f7345c545..74d90a7542688 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -104,7 +104,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return -ENOMEM; rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); - if (rc) { + if (rc < 0) { devres_free(dr); return rc; } @@ -113,7 +113,7 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, dr->dev_id = dev_id; devres_add(dev, dr); - return 0; + return rc; } EXPORT_SYMBOL(devm_request_any_context_irq); From ab12dcd70c11074e6ed28b0304f059a075e33db0 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 3 Apr 2015 12:51:05 +0800 Subject: [PATCH 0129/2091] video: mxsfb: Make sure axi clock is enabled when accessing registers commit 2fa3b4c4a78a5db3502ab9e32630ea660ff923d0 upstream. The LCDIF engines embedded in i.MX6sl and i.MX6sx SoCs need the axi clock as the engine's system clock. The clock should be enabled when accessing LCDIF registers, otherwise the kernel would hang up. We should also keep the clock enabled when the engine is being active to scan out frames from memory. This patch makes sure the axi clock is enabled when accessing registers so that the kernel hang up issue can be fixed. Reported-by: Peter Chen Tested-by: Peter Chen Signed-off-by: Liu Ying Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/mxsfb.c | 68 +++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/drivers/video/fbdev/mxsfb.c b/drivers/video/fbdev/mxsfb.c index f8ac4a452f26d..0f64165b01474 100644 --- a/drivers/video/fbdev/mxsfb.c +++ b/drivers/video/fbdev/mxsfb.c @@ -316,6 +316,18 @@ static int mxsfb_check_var(struct fb_var_screeninfo *var, return 0; } +static inline void mxsfb_enable_axi_clk(struct mxsfb_info *host) +{ + if (host->clk_axi) + clk_prepare_enable(host->clk_axi); +} + +static inline void mxsfb_disable_axi_clk(struct mxsfb_info *host) +{ + if (host->clk_axi) + clk_disable_unprepare(host->clk_axi); +} + static void mxsfb_enable_controller(struct fb_info *fb_info) { struct mxsfb_info *host = to_imxfb_host(fb_info); @@ -333,14 +345,13 @@ static void mxsfb_enable_controller(struct fb_info *fb_info) } } - if (host->clk_axi) - clk_prepare_enable(host->clk_axi); - if (host->clk_disp_axi) clk_prepare_enable(host->clk_disp_axi); clk_prepare_enable(host->clk); clk_set_rate(host->clk, PICOS2KHZ(fb_info->var.pixclock) * 1000U); + mxsfb_enable_axi_clk(host); + /* if it was disabled, re-enable the mode again */ writel(CTRL_DOTCLK_MODE, host->base + LCDC_CTRL + REG_SET); @@ -380,11 +391,11 @@ static void mxsfb_disable_controller(struct fb_info *fb_info) reg = readl(host->base + LCDC_VDCTRL4); writel(reg & ~VDCTRL4_SYNC_SIGNALS_ON, host->base + LCDC_VDCTRL4); + mxsfb_disable_axi_clk(host); + clk_disable_unprepare(host->clk); if (host->clk_disp_axi) clk_disable_unprepare(host->clk_disp_axi); - if (host->clk_axi) - clk_disable_unprepare(host->clk_axi); host->enabled = 0; @@ -421,6 +432,8 @@ static int mxsfb_set_par(struct fb_info *fb_info) mxsfb_disable_controller(fb_info); } + mxsfb_enable_axi_clk(host); + /* clear the FIFOs */ writel(CTRL1_FIFO_CLEAR, host->base + LCDC_CTRL1 + REG_SET); @@ -438,6 +451,7 @@ static int mxsfb_set_par(struct fb_info *fb_info) ctrl |= CTRL_SET_WORD_LENGTH(3); switch (host->ld_intf_width) { case STMLCDIF_8BIT: + mxsfb_disable_axi_clk(host); dev_err(&host->pdev->dev, "Unsupported LCD bus width mapping\n"); return -EINVAL; @@ -451,6 +465,7 @@ static int mxsfb_set_par(struct fb_info *fb_info) writel(CTRL1_SET_BYTE_PACKAGING(0x7), host->base + LCDC_CTRL1); break; default: + mxsfb_disable_axi_clk(host); dev_err(&host->pdev->dev, "Unhandled color depth of %u\n", fb_info->var.bits_per_pixel); return -EINVAL; @@ -504,6 +519,8 @@ static int mxsfb_set_par(struct fb_info *fb_info) fb_info->fix.line_length * fb_info->var.yoffset, host->base + host->devdata->next_buf); + mxsfb_disable_axi_clk(host); + if (reenable) mxsfb_enable_controller(fb_info); @@ -582,10 +599,14 @@ static int mxsfb_pan_display(struct fb_var_screeninfo *var, offset = fb_info->fix.line_length * var->yoffset; + mxsfb_enable_axi_clk(host); + /* update on next VSYNC */ writel(fb_info->fix.smem_start + offset, host->base + host->devdata->next_buf); + mxsfb_disable_axi_clk(host); + return 0; } @@ -608,13 +629,17 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, unsigned line_count; unsigned period; unsigned long pa, fbsize; - int bits_per_pixel, ofs; + int bits_per_pixel, ofs, ret = 0; u32 transfer_count, vdctrl0, vdctrl2, vdctrl3, vdctrl4, ctrl; + mxsfb_enable_axi_clk(host); + /* Only restore the mode when the controller is running */ ctrl = readl(host->base + LCDC_CTRL); - if (!(ctrl & CTRL_RUN)) - return -EINVAL; + if (!(ctrl & CTRL_RUN)) { + ret = -EINVAL; + goto err; + } vdctrl0 = readl(host->base + LCDC_VDCTRL0); vdctrl2 = readl(host->base + LCDC_VDCTRL2); @@ -635,7 +660,8 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, break; case 1: default: - return -EINVAL; + ret = -EINVAL; + goto err; } fb_info->var.bits_per_pixel = bits_per_pixel; @@ -673,10 +699,14 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, pa = readl(host->base + host->devdata->cur_buf); fbsize = fb_info->fix.line_length * vmode->yres; - if (pa < fb_info->fix.smem_start) - return -EINVAL; - if (pa + fbsize > fb_info->fix.smem_start + fb_info->fix.smem_len) - return -EINVAL; + if (pa < fb_info->fix.smem_start) { + ret = -EINVAL; + goto err; + } + if (pa + fbsize > fb_info->fix.smem_start + fb_info->fix.smem_len) { + ret = -EINVAL; + goto err; + } ofs = pa - fb_info->fix.smem_start; if (ofs) { memmove(fb_info->screen_base, fb_info->screen_base + ofs, fbsize); @@ -689,7 +719,11 @@ static int mxsfb_restore_mode(struct mxsfb_info *host, clk_prepare_enable(host->clk); host->enabled = 1; - return 0; +err: + if (ret) + mxsfb_disable_axi_clk(host); + + return ret; } static int mxsfb_init_fbinfo_dt(struct mxsfb_info *host, @@ -915,7 +949,9 @@ static int mxsfb_probe(struct platform_device *pdev) } if (!host->enabled) { + mxsfb_enable_axi_clk(host); writel(0, host->base + LCDC_CTRL); + mxsfb_disable_axi_clk(host); mxsfb_set_par(fb_info); mxsfb_enable_controller(fb_info); } @@ -954,11 +990,15 @@ static void mxsfb_shutdown(struct platform_device *pdev) struct fb_info *fb_info = platform_get_drvdata(pdev); struct mxsfb_info *host = to_imxfb_host(fb_info); + mxsfb_enable_axi_clk(host); + /* * Force stop the LCD controller as keeping it running during reboot * might interfere with the BootROM's boot mode pads sampling. */ writel(CTRL_RUN, host->base + LCDC_CTRL + REG_CLR); + + mxsfb_disable_axi_clk(host); } static struct platform_driver mxsfb_driver = { From 2c6f129c8fcf59946e62216792e162b9d9f0dc8e Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 24 Apr 2015 14:57:10 +0300 Subject: [PATCH 0130/2091] leds / PM: fix hibernation on arm when gpio-led used with CPU led trigger commit 084609bf727981c7a2e6e69aefe0052c9d793300 upstream. Setting a dev_pm_ops suspend/resume pair of callbacks but not a set of hibernation callbacks means those pm functions will not be called upon hibernation - that leads to system crash on ARM during freezing if gpio-led is used in combination with CPU led trigger. It may happen after freeze_noirq stage (GPIO is suspended) and before syscore_suspend stage (CPU led trigger is suspended) - usually when disable_nonboot_cpus() is called. Log: PM: noirq freeze of devices complete after 1.425 msecs Disabling non-boot CPUs ... ^ system may crash or stuck here with message (TI AM572x) WARNING: CPU: 0 PID: 3100 at drivers/bus/omap_l3_noc.c:148 l3_interrupt_handler+0x22c/0x370() 44000000.ocp:L3 Custom Error: MASTER MPU TARGET L4_PER1_P3 (Idle): Data Access in Supervisor mode during Functional access CPU1: shutdown ^ or here Fix this by using SIMPLE_DEV_PM_OPS, which appropriately assigns the suspend and hibernation callbacks and move led_suspend/led_resume under CONFIG_PM_SLEEP to avoid build warnings. Fixes: 73e1ab41a80d (leds: Convert led class driver from legacy pm ops to dev_pm_ops) Signed-off-by: Grygorii Strashko Acked-by: Jacek Anaszewski Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/leds/led-class.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 728681debdbe4..7fb2a19ac649c 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -187,6 +187,7 @@ void led_classdev_resume(struct led_classdev *led_cdev) } EXPORT_SYMBOL_GPL(led_classdev_resume); +#ifdef CONFIG_PM_SLEEP static int led_suspend(struct device *dev) { struct led_classdev *led_cdev = dev_get_drvdata(dev); @@ -206,11 +207,9 @@ static int led_resume(struct device *dev) return 0; } +#endif -static const struct dev_pm_ops leds_class_dev_pm_ops = { - .suspend = led_suspend, - .resume = led_resume, -}; +static SIMPLE_DEV_PM_OPS(leds_class_dev_pm_ops, led_suspend, led_resume); static int match_name(struct device *dev, const void *data) { From 857814ee65dbc942b18b2dc713124ffff043035e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 7 May 2015 17:55:16 -0700 Subject: [PATCH 0131/2091] mtd: fix: avoid race condition when accessing mtd->usecount commit 073db4a51ee43ccb827f54a4261c0583b028d5ab upstream. On A MIPS 32-cores machine a BUG_ON was triggered because some acesses to mtd->usecount were done without taking mtd_table_mutex. kernel: Call Trace: kernel: [] __put_mtd_device+0x20/0x50 kernel: [] blktrans_release+0x8c/0xd8 kernel: [] __blkdev_put+0x1a8/0x200 kernel: [] blkdev_close+0x1c/0x30 kernel: [] __fput+0xac/0x250 kernel: [] task_work_run+0xd8/0x120 kernel: [] work_notifysig+0x10/0x18 kernel: kernel: Code: 2442ffff ac8202d8 000217fe <00020336> dc820128 10400003 00000000 0040f809 00000000 kernel: ---[ end trace 080fbb4579b47a73 ]--- Fixed by taking the mutex in blktrans_open and blktrans_release. Note that this locking is already suggested in include/linux/mtd/blktrans.h: struct mtd_blktrans_ops { ... /* Called with mtd_table_mutex held; no race with add/remove */ int (*open)(struct mtd_blktrans_dev *dev); void (*release)(struct mtd_blktrans_dev *dev); ... }; But we weren't following it. Originally reported by (and patched by) Zhang and Giuseppe, independently. Improved and rewritten. Reported-by: Zhang Xingcai Reported-by: Giuseppe Cantavenera Tested-by: Giuseppe Cantavenera Acked-by: Alexander Sverdlin Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/mtd_blkdevs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 2b0c528709997..df7c6c70757a6 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -197,6 +197,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/ mutex_lock(&dev->lock); + mutex_lock(&mtd_table_mutex); if (dev->open) goto unlock; @@ -220,6 +221,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) unlock: dev->open++; + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); return ret; @@ -230,6 +232,7 @@ static int blktrans_open(struct block_device *bdev, fmode_t mode) error_put: module_put(dev->tr->owner); kref_put(&dev->ref, blktrans_dev_release); + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); return ret; @@ -243,6 +246,7 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode) return; mutex_lock(&dev->lock); + mutex_lock(&mtd_table_mutex); if (--dev->open) goto unlock; @@ -256,6 +260,7 @@ static void blktrans_release(struct gendisk *disk, fmode_t mode) __put_mtd_device(dev->mtd); } unlock: + mutex_unlock(&mtd_table_mutex); mutex_unlock(&dev->lock); blktrans_dev_put(dev); } From 4aa339cddbcc05b7f8ff4f0960550929aa77213e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 28 May 2015 10:22:10 +0200 Subject: [PATCH 0132/2091] mtd: dc21285: use raw spinlock functions for nw_gpio_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e5babdf928e5d0c432a8d4b99f20421ce14d1ab6 upstream. Since commit bd31b85960a7 (which is in 3.2-rc1) nw_gpio_lock is a raw spinlock that needs usage of the corresponding raw functions. This fixes: drivers/mtd/maps/dc21285.c: In function 'nw_en_write': drivers/mtd/maps/dc21285.c:41:340: warning: passing argument 1 of 'spinlock_check' from incompatible pointer type spin_lock_irqsave(&nw_gpio_lock, flags); In file included from include/linux/seqlock.h:35:0, from include/linux/time.h:5, from include/linux/stat.h:18, from include/linux/module.h:10, from drivers/mtd/maps/dc21285.c:8: include/linux/spinlock.h:299:102: note: expected 'struct spinlock_t *' but argument is of type 'struct raw_spinlock_t *' static inline raw_spinlock_t *spinlock_check(spinlock_t *lock) ^ drivers/mtd/maps/dc21285.c:43:25: warning: passing argument 1 of 'spin_unlock_irqrestore' from incompatible pointer type spin_unlock_irqrestore(&nw_gpio_lock, flags); ^ In file included from include/linux/seqlock.h:35:0, from include/linux/time.h:5, from include/linux/stat.h:18, from include/linux/module.h:10, from drivers/mtd/maps/dc21285.c:8: include/linux/spinlock.h:370:91: note: expected 'struct spinlock_t *' but argument is of type 'struct raw_spinlock_t *' static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) Fixes: bd31b85960a7 ("locking, ARM: Annotate low level hw locks as raw") Signed-off-by: Uwe Kleine-König Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/maps/dc21285.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c index f8a7dd14cee0c..70a3db3ab856f 100644 --- a/drivers/mtd/maps/dc21285.c +++ b/drivers/mtd/maps/dc21285.c @@ -38,9 +38,9 @@ static void nw_en_write(void) * we want to write a bit pattern XXX1 to Xilinx to enable * the write gate, which will be open for about the next 2ms. */ - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_cpld_modify(CPLD_FLASH_WR_ENABLE, CPLD_FLASH_WR_ENABLE); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); /* * let the ISA bus to catch on... From 7044198591216ee701f98eeefecabc9d0ad6c2ef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 13 Apr 2015 16:23:36 +0200 Subject: [PATCH 0133/2091] PCI: Propagate the "ignore hotplug" setting to parent commit 0824965140fff1bf640a987dc790d1594a8e0699 upstream. Refine the mechanism introduced by commit f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") to propagate the ignore_hotplug setting of the device to its parent bridge in case hotplug notifications related to the graphics adapter switching are given for the bridge rather than for the device itself (they need to be ignored in both cases). Link: https://bugzilla.kernel.org/show_bug.cgi?id=61891 Link: https://bugs.freedesktop.org/show_bug.cgi?id=88927 Fixes: b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device") Reported-and-tested-by: tiagdtd-lava Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 11 +++++++++++ include/linux/pci.h | 6 +----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index acc4b6ef78c43..c44393f26fd37 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4324,6 +4324,17 @@ bool pci_device_is_present(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_device_is_present); +void pci_ignore_hotplug(struct pci_dev *dev) +{ + struct pci_dev *bridge = dev->bus->self; + + dev->ignore_hotplug = 1; + /* Propagate the "ignore hotplug" setting to the parent bridge. */ + if (bridge) + bridge->ignore_hotplug = 1; +} +EXPORT_SYMBOL_GPL(pci_ignore_hotplug); + #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; static DEFINE_SPINLOCK(resource_alignment_lock); diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e0..ef45ffe9ca882 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1006,6 +1006,7 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); +void pci_ignore_hotplug(struct pci_dev *dev); /* ROM control related routines */ int pci_enable_rom(struct pci_dev *pdev); @@ -1043,11 +1044,6 @@ bool pci_dev_run_wake(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); -static inline void pci_ignore_hotplug(struct pci_dev *dev) -{ - dev->ignore_hotplug = 1; -} - static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { From 30e8a1821385dbd85830b407103f28d989764911 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 May 2015 17:23:51 -0700 Subject: [PATCH 0134/2091] PCI: Add pci_bus_addr_t commit 3a9ad0b4fdcd57f775d3615004c8c64c021a9e7d upstream. David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") fails to boot on sparc/T5-8: pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000) The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA addresses, i.e., bus addresses returned via the DMA API (dma_map_single(), etc.), while the PCI core assumed dma_addr_t could hold *any* bus address, including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that tripped over this mismatch. Add pci_bus_addr_t, which is wide enough to hold any PCI bus address, including both raw BAR values and DMA addresses. This will be 64 bits on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then dma_addr_t only needs to be wide enough to hold addresses from the DMA API. [bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at least as wide as dma_addr_t, documentation] Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB") Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231 Reported-by: David Ahern Tested-by: David Ahern Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- Documentation/DMA-API-HOWTO.txt | 29 +++++++++++++++++------------ Documentation/DMA-API.txt | 30 +++++++++++++++--------------- drivers/pci/Kconfig | 4 ++++ drivers/pci/bus.c | 10 +++++----- drivers/pci/probe.c | 12 ++++++------ include/linux/pci.h | 12 +++++++++--- include/linux/types.h | 12 ++++++++++-- 7 files changed, 66 insertions(+), 43 deletions(-) diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 0f7afb2bb442e..aef8cc5a677bd 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -25,13 +25,18 @@ physical addresses. These are the addresses in /proc/iomem. The physical address is not directly useful to a driver; it must use ioremap() to map the space and produce a virtual address. -I/O devices use a third kind of address: a "bus address" or "DMA address". -If a device has registers at an MMIO address, or if it performs DMA to read -or write system memory, the addresses used by the device are bus addresses. -In some systems, bus addresses are identical to CPU physical addresses, but -in general they are not. IOMMUs and host bridges can produce arbitrary +I/O devices use a third kind of address: a "bus address". If a device has +registers at an MMIO address, or if it performs DMA to read or write system +memory, the addresses used by the device are bus addresses. In some +systems, bus addresses are identical to CPU physical addresses, but in +general they are not. IOMMUs and host bridges can produce arbitrary mappings between physical and bus addresses. +From a device's point of view, DMA uses the bus address space, but it may +be restricted to a subset of that space. For example, even if a system +supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU +so devices only need to use 32-bit DMA addresses. + Here's a picture and some examples: CPU CPU Bus @@ -72,11 +77,11 @@ can use virtual address X to access the buffer, but the device itself cannot because DMA doesn't go through the CPU virtual memory system. In some simple systems, the device can do DMA directly to physical address -Y. But in many others, there is IOMMU hardware that translates bus +Y. But in many others, there is IOMMU hardware that translates DMA addresses to physical addresses, e.g., it translates Z to Y. This is part of the reason for the DMA API: the driver can give a virtual address X to an interface like dma_map_single(), which sets up any required IOMMU -mapping and returns the bus address Z. The driver then tells the device to +mapping and returns the DMA address Z. The driver then tells the device to do DMA to Z, and the IOMMU maps it to the buffer at address Y in system RAM. @@ -98,7 +103,7 @@ First of all, you should make sure #include is in your driver, which provides the definition of dma_addr_t. This type -can hold any valid DMA or bus address for the platform and should be used +can hold any valid DMA address for the platform and should be used everywhere you hold a DMA address returned from the DMA mapping functions. What memory is DMA'able? @@ -316,7 +321,7 @@ There are two types of DMA mappings: Think of "consistent" as "synchronous" or "coherent". The current default is to return consistent memory in the low 32 - bits of the bus space. However, for future compatibility you should + bits of the DMA space. However, for future compatibility you should set the consistent mask even if this default is fine for your driver. @@ -403,7 +408,7 @@ dma_alloc_coherent() returns two values: the virtual address which you can use to access it from the CPU and dma_handle which you pass to the card. -The CPU virtual address and the DMA bus address are both +The CPU virtual address and the DMA address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk @@ -645,8 +650,8 @@ PLEASE NOTE: The 'nents' argument to the dma_unmap_sg call must be dma_map_sg call. Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}() -counterpart, because the bus address space is a shared resource and -you could render the machine unusable by consuming all bus addresses. +counterpart, because the DMA address space is a shared resource and +you could render the machine unusable by consuming all DMA addresses. If you need to use the same streaming DMA region multiple times and touch the data in between the DMA transfers, the buffer needs to be synced diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 52088408668a8..7eba542eff7c8 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -18,10 +18,10 @@ Part I - dma_ API To get the dma_ API, you must #include . This provides dma_addr_t and the interfaces described below. -A dma_addr_t can hold any valid DMA or bus address for the platform. It -can be given to a device to use as a DMA source or target. A CPU cannot -reference a dma_addr_t directly because there may be translation between -its physical address space and the bus address space. +A dma_addr_t can hold any valid DMA address for the platform. It can be +given to a device to use as a DMA source or target. A CPU cannot reference +a dma_addr_t directly because there may be translation between its physical +address space and the DMA address space. Part Ia - Using large DMA-coherent buffers ------------------------------------------ @@ -42,7 +42,7 @@ It returns a pointer to the allocated region (in the processor's virtual address space) or NULL if the allocation failed. It also returns a which may be cast to an unsigned integer the -same width as the bus and given to the device as the bus address base of +same width as the bus and given to the device as the DMA address base of the region. Note: consistent memory can be expensive on some platforms, and the @@ -193,7 +193,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) Maps a piece of processor virtual memory so it can be accessed by the -device and returns the bus address of the memory. +device and returns the DMA address of the memory. The direction for both APIs may be converted freely by casting. However the dma_ API uses a strongly typed enumerator for its @@ -212,20 +212,20 @@ contiguous piece of memory. For this reason, memory to be mapped by this API should be obtained from sources which guarantee it to be physically contiguous (like kmalloc). -Further, the bus address of the memory must be within the +Further, the DMA address of the memory must be within the dma_mask of the device (the dma_mask is a bit mask of the -addressable region for the device, i.e., if the bus address of -the memory ANDed with the dma_mask is still equal to the bus +addressable region for the device, i.e., if the DMA address of +the memory ANDed with the dma_mask is still equal to the DMA address, then the device can perform DMA to the memory). To ensure that the memory allocated by kmalloc is within the dma_mask, the driver may specify various platform-dependent flags to restrict -the bus address range of the allocation (e.g., on x86, GFP_DMA -guarantees to be within the first 16MB of available bus addresses, +the DMA address range of the allocation (e.g., on x86, GFP_DMA +guarantees to be within the first 16MB of available DMA addresses, as required by ISA devices). Note also that the above constraints on physical contiguity and dma_mask may not apply if the platform has an IOMMU (a device which -maps an I/O bus address to a physical memory address). However, to be +maps an I/O DMA address to a physical memory address). However, to be portable, device driver writers may *not* assume that such an IOMMU exists. @@ -296,7 +296,7 @@ reduce current DMA mapping usage or delay and try again later). dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) -Returns: the number of bus address segments mapped (this may be shorter +Returns: the number of DMA address segments mapped (this may be shorter than passed in if some elements of the scatter/gather list are physically or virtually adjacent and an IOMMU maps them with a single entry). @@ -340,7 +340,7 @@ must be the same as those and passed in to the scatter/gather mapping API. Note: must be the number you passed in, *not* the number of -bus address entries returned. +DMA address entries returned. void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -507,7 +507,7 @@ it's asked for coherent memory for this device. phys_addr is the CPU physical address to which the memory is currently assigned (this will be ioremapped so the CPU can access the region). -device_addr is the bus address the device needs to be programmed +device_addr is the DMA address the device needs to be programmed with to actually address this memory (this will be handed out as the dma_addr_t in dma_alloc_coherent()). diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7a8f1c5e65af1..73de4efcbe6ed 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -1,6 +1,10 @@ # # PCI configuration # +config PCI_BUS_ADDR_T_64BIT + def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) + depends on PCI + config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" depends on PCI diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 90fa3a78fb7ce..6fbd3f2b5992a 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -92,11 +92,11 @@ void pci_bus_remove_resources(struct pci_bus *bus) } static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL}; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT static struct pci_bus_region pci_64_bit = {0, - (dma_addr_t) 0xffffffffffffffffULL}; -static struct pci_bus_region pci_high = {(dma_addr_t) 0x100000000ULL, - (dma_addr_t) 0xffffffffffffffffULL}; + (pci_bus_addr_t) 0xffffffffffffffffULL}; +static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL, + (pci_bus_addr_t) 0xffffffffffffffffULL}; #endif /* @@ -200,7 +200,7 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t), void *alignf_data) { -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT int rc; if (res->flags & IORESOURCE_MEM_64) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6675a7a1b9fc6..c91185721345b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -254,8 +254,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } if (res->flags & IORESOURCE_MEM_64) { - if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && - sz64 > 0x100000000ULL) { + if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8) + && sz64 > 0x100000000ULL) { res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; res->start = 0; res->end = 0; @@ -264,7 +264,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, goto out; } - if ((sizeof(dma_addr_t) < 8) && l) { + if ((sizeof(pci_bus_addr_t) < 8) && l) { /* Above 32-bit boundary; try to reallocate */ res->flags |= IORESOURCE_UNSET; res->start = 0; @@ -399,7 +399,7 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; u64 base64, limit64; - dma_addr_t base, limit; + pci_bus_addr_t base, limit; struct pci_bus_region region; struct resource *res; @@ -426,8 +426,8 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) } } - base = (dma_addr_t) base64; - limit = (dma_addr_t) limit64; + base = (pci_bus_addr_t) base64; + limit = (pci_bus_addr_t) limit64; if (base != base64) { dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n", diff --git a/include/linux/pci.h b/include/linux/pci.h index ef45ffe9ca882..3ef3a52068df4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -577,9 +577,15 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val); +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +typedef u64 pci_bus_addr_t; +#else +typedef u32 pci_bus_addr_t; +#endif + struct pci_bus_region { - dma_addr_t start; - dma_addr_t end; + pci_bus_addr_t start; + pci_bus_addr_t end; }; struct pci_dynids { @@ -1124,7 +1130,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); -static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar) +static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { struct pci_bus_region region; diff --git a/include/linux/types.h b/include/linux/types.h index 59698be034908..8715287c3b1f6 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -139,12 +139,20 @@ typedef unsigned long blkcnt_t; */ #define pgoff_t unsigned long -/* A dma_addr_t can hold any valid DMA or bus address for the platform */ +/* + * A dma_addr_t can hold any valid DMA address, i.e., any address returned + * by the DMA API. + * + * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32 + * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits, + * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses, + * so they don't care about the size of the actual bus addresses. + */ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT typedef u64 dma_addr_t; #else typedef u32 dma_addr_t; -#endif /* dma_addr_t */ +#endif typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; From 5224e2a708f617fb69ea7cb56613838af40af188 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 8 Jun 2015 17:10:50 -0600 Subject: [PATCH 0135/2091] PCI: pciehp: Wait for hotplug command completion where necessary commit a5dd4b4b0570b3bf880d563969b245dfbd170c1e upstream. The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/pciehp_hpc.c | 52 +++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0ebf754fc1775..6d6868811e56e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -176,20 +176,17 @@ static void pcie_wait_cmd(struct controller *ctrl) jiffies_to_msecs(jiffies - ctrl->cmd_started)); } -/** - * pcie_write_cmd - Issue controller command - * @ctrl: controller to which the command is issued - * @cmd: command value written to slot control register - * @mask: bitmask of slot control register to be modified - */ -static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) +static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd, + u16 mask, bool wait) { struct pci_dev *pdev = ctrl_dev(ctrl); u16 slot_ctrl; mutex_lock(&ctrl->ctrl_lock); - /* Wait for any previous command that might still be in progress */ + /* + * Always wait for any previous command that might still be in progress + */ pcie_wait_cmd(ctrl); pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl); @@ -201,9 +198,33 @@ static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) ctrl->cmd_started = jiffies; ctrl->slot_ctrl = slot_ctrl; + /* + * Optionally wait for the hardware to be ready for a new command, + * indicating completion of the above issued command. + */ + if (wait) + pcie_wait_cmd(ctrl); + mutex_unlock(&ctrl->ctrl_lock); } +/** + * pcie_write_cmd - Issue controller command + * @ctrl: controller to which the command is issued + * @cmd: command value written to slot control register + * @mask: bitmask of slot control register to be modified + */ +static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) +{ + pcie_do_write_cmd(ctrl, cmd, mask, true); +} + +/* Same as above without waiting for the hardware to latch */ +static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask) +{ + pcie_do_write_cmd(ctrl, cmd, mask, false); +} + bool pciehp_check_link_active(struct controller *ctrl) { struct pci_dev *pdev = ctrl_dev(ctrl); @@ -422,7 +443,7 @@ void pciehp_set_attention_status(struct slot *slot, u8 value) default: return; } - pcie_write_cmd(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC); + pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); } @@ -434,7 +455,8 @@ void pciehp_green_led_on(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_ON); @@ -447,7 +469,8 @@ void pciehp_green_led_off(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_OFF); @@ -460,7 +483,8 @@ void pciehp_green_led_blink(struct slot *slot) if (!PWR_LED(ctrl)) return; - pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK, PCI_EXP_SLTCTL_PIC); + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK, + PCI_EXP_SLTCTL_PIC); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PWR_IND_BLINK); @@ -613,7 +637,7 @@ void pcie_enable_notification(struct controller *ctrl) PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_DLLSCE); - pcie_write_cmd(ctrl, cmd, mask); + pcie_write_cmd_nowait(ctrl, cmd, mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd); } @@ -664,7 +688,7 @@ int pciehp_reset_slot(struct slot *slot, int probe) pci_reset_bridge_secondary_bus(ctrl->pcie->port); pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd(ctrl, ctrl_mask, ctrl_mask); + pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); if (pciehp_poll_mode) From d389ad7c0e6c02efaa3fe7992efb2452022e4b63 Mon Sep 17 00:00:00 2001 From: Zhichang Yuan Date: Fri, 24 Apr 2015 17:05:09 +0800 Subject: [PATCH 0136/2091] of/pci: Fix pci_address_to_pio() conversion of CPU address to I/O port commit 5dbb4c6167229c8d4f528e8ec26699a7305000a3 upstream. 41f8bba7f555 ("of/pci: Add pci_register_io_range() and pci_pio_to_address()") added support for systems with several I/O ranges described by OF bindings. It modified pci_address_to_pio() look up the io_range for a given CPU physical address, but the conversion was wrong. Fix the conversion of address to I/O port. [bhelgaas: changelog] Fixes: 41f8bba7f555 ("of/pci: Add pci_register_io_range() and pci_pio_to_address()") Signed-off-by: Zhichang Yuan Signed-off-by: Bjorn Helgaas Acked-by: Liviu Dudau Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 78a7dcbec7d89..6906a3f61bd86 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -765,7 +765,7 @@ unsigned long __weak pci_address_to_pio(phys_addr_t address) spin_lock(&io_range_lock); list_for_each_entry(res, &io_range_list, list) { if (address >= res->start && address < res->start + res->size) { - addr = res->start - address + offset; + addr = address - res->start + offset; break; } offset += res->size; From 7890602ea4fa5793f5ecad24bcca0049fa7ca06d Mon Sep 17 00:00:00 2001 From: Frodo Lai Date: Tue, 16 Jun 2015 15:03:53 -0700 Subject: [PATCH 0137/2091] Input: pixcir_i2c_ts - fix receive error commit 469d7d22cea146e40efe8c330e5164b4d8f13934 upstream. The i2c_master_recv() uses readsize to receive data from i2c but compares to size of rdbuf which is always 27. This would cause problem when the max_fingers is not 5. Change the comparison value to readsize instead. Fixes: 36874c7e219 ("Input: pixcir_i2c_ts - support up to 5 fingers and hardware tracking IDs:) Signed-off-by: Frodo Lai Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/pixcir_i2c_ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 2c2107147319e..8f3e243a62bf3 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -78,7 +78,7 @@ static void pixcir_ts_parse(struct pixcir_i2c_ts_data *tsdata, } ret = i2c_master_recv(tsdata->client, rdbuf, readsize); - if (ret != sizeof(rdbuf)) { + if (ret != readsize) { dev_err(&tsdata->client->dev, "%s: i2c_master_recv failed(), ret=%d\n", __func__, ret); From c8bde72f9af412de57f0ceae218d648640118b0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jul 2015 10:10:33 -0700 Subject: [PATCH 0138/2091] Linux 4.1.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cef84c061f027..e3cdec4898be2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Series 4800 From fc25a0db97a0417990c4f380f04ecf0d0ca095a6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 May 2015 17:42:30 +0900 Subject: [PATCH 0139/2091] pinctrl: zynq: fix DEFINE_ZYNQ_PINMUX_FUNCTION_MUX macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4f652cea020aac42972cb7c9788b470ed45aa228 upstream. The offset to the mux register is missing. Fixes: add958cee967 "pinctrl: Add driver for Zynq" Signed-off-by: Masahiro Yamada Reviewed-by: Sören Brinkmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-zynq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c index 22280bddb9e26..6ec64b36c5b40 100644 --- a/drivers/pinctrl/pinctrl-zynq.c +++ b/drivers/pinctrl/pinctrl-zynq.c @@ -714,12 +714,13 @@ static const char * const gpio0_groups[] = {"gpio0_0_grp", .mux_val = mval, \ } -#define DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(fname, mval, mux, mask, shift) \ +#define DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(fname, mval, offset, mask, shift)\ [ZYNQ_PMUX_##fname] = { \ .name = #fname, \ .groups = fname##_groups, \ .ngroups = ARRAY_SIZE(fname##_groups), \ .mux_val = mval, \ + .mux = offset, \ .mux_mask = mask, \ .mux_shift = shift, \ } From 09ed2dbb1a7e827c09115e46e59dfe03b8ccac94 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 May 2015 17:42:31 +0900 Subject: [PATCH 0140/2091] pinctrl: zynq: fix offset address for {SD0,SD1}_WP_CD_SEL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5cf021d52026c0e998756a3bdb475aae2e8a68a4 upstream. The address for SD0_WP_CD_SEL, SD1_WP_CD_SEL is 0xf8000830, 0xf8000834, respectively. Each offset address must be prefixed with 0x. Fixes: add958cee967 "pinctrl: Add driver for Zynq" Signed-off-by: Masahiro Yamada Reviewed-by: Sören Brinkmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-zynq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c index 6ec64b36c5b40..8c51a3c65513a 100644 --- a/drivers/pinctrl/pinctrl-zynq.c +++ b/drivers/pinctrl/pinctrl-zynq.c @@ -745,15 +745,15 @@ static const struct zynq_pinmux_function zynq_pmux_functions[] = { DEFINE_ZYNQ_PINMUX_FUNCTION(spi1, 0x50), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio0, 0x40), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio0_pc, 0xc), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_wp, 0, 130, ZYNQ_SDIO_WP_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_wp, 0, 0x130, ZYNQ_SDIO_WP_MASK, ZYNQ_SDIO_WP_SHIFT), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_cd, 0, 130, ZYNQ_SDIO_CD_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio0_cd, 0, 0x130, ZYNQ_SDIO_CD_MASK, ZYNQ_SDIO_CD_SHIFT), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio1, 0x40), DEFINE_ZYNQ_PINMUX_FUNCTION(sdio1_pc, 0xc), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_wp, 0, 134, ZYNQ_SDIO_WP_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_wp, 0, 0x134, ZYNQ_SDIO_WP_MASK, ZYNQ_SDIO_WP_SHIFT), - DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_cd, 0, 134, ZYNQ_SDIO_CD_MASK, + DEFINE_ZYNQ_PINMUX_FUNCTION_MUX(sdio1_cd, 0, 0x134, ZYNQ_SDIO_CD_MASK, ZYNQ_SDIO_CD_SHIFT), DEFINE_ZYNQ_PINMUX_FUNCTION(smc0_nor, 4), DEFINE_ZYNQ_PINMUX_FUNCTION(smc0_nor_cs1, 8), From f1a1a4b4cf987c0609aba4f45ddd44d7719d03e0 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:54 +0200 Subject: [PATCH 0141/2091] pinctrl: mvebu: armada-370: fix spi0 pin description commit 438881dfddb9107ef0eb30b49368e91e092f0b3e upstream. Due to a mistake, the CS0 and CS1 SPI0 functions were incorrectly named "spi0-1" instead of just "spi0". This commit fixes that. This DT binding change does not affect any of the in-tree users. Signed-off-by: Thomas Petazzoni Fixes: 5f597bb2be57 ("pinctrl: mvebu: add pinctrl driver for Armada 370") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../bindings/pinctrl/marvell,armada-370-pinctrl.txt | 4 ++-- drivers/pinctrl/mvebu/pinctrl-armada-370.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt index adda2a8d1d529..e357b020861d6 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt @@ -92,5 +92,5 @@ mpp61 61 gpo, dev(wen1), uart1(txd), audio(rclk) mpp62 62 gpio, dev(a2), uart1(cts), tdm(drx), pcie(clkreq0), audio(mclk), uart0(cts) mpp63 63 gpo, spi0(sck), tclk -mpp64 64 gpio, spi0(miso), spi0-1(cs1) -mpp65 65 gpio, spi0(mosi), spi0-1(cs2) +mpp64 64 gpio, spi0(miso), spi0(cs1) +mpp65 65 gpio, spi0(mosi), spi0(cs2) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-370.c b/drivers/pinctrl/mvebu/pinctrl-armada-370.c index 03aa58c4cb85b..1eb084c3b0c96 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-370.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-370.c @@ -370,11 +370,11 @@ static struct mvebu_mpp_mode mv88f6710_mpp_modes[] = { MPP_MODE(64, MPP_FUNCTION(0x0, "gpio", NULL), MPP_FUNCTION(0x1, "spi0", "miso"), - MPP_FUNCTION(0x2, "spi0-1", "cs1")), + MPP_FUNCTION(0x2, "spi0", "cs1")), MPP_MODE(65, MPP_FUNCTION(0x0, "gpio", NULL), MPP_FUNCTION(0x1, "spi0", "mosi"), - MPP_FUNCTION(0x2, "spi0-1", "cs2")), + MPP_FUNCTION(0x2, "spi0", "cs2")), }; static struct mvebu_pinctrl_soc_info armada_370_pinctrl_info; From 2893c1e916f58001b534b3050c6413b9d050a00d Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:59 +0200 Subject: [PATCH 0142/2091] pinctrl: mvebu: armada-375: remove incorrect space in pin description commit d538990ee12b162f7ce6c0fcef3b643800102676 upstream. There was an incorrect space in the definition of the function of one pin in the Armada 375 pinctrl driver, which this commit fixes. Signed-off-by: Thomas Petazzoni Fixes: ce3ed59dcddd ("pinctrl: mvebu: add pin-muxing driver for the Marvell Armada 375") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-375.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-375.c b/drivers/pinctrl/mvebu/pinctrl-armada-375.c index ca1e7571fedb5..bff602de74195 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-375.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-375.c @@ -92,7 +92,7 @@ static struct mvebu_mpp_mode mv88f6720_mpp_modes[] = { MPP_FUNCTION(0x5, "nand", "io1")), MPP_MODE(8, MPP_FUNCTION(0x0, "gpio", NULL), - MPP_FUNCTION(0x1, "dev ", "bootcs"), + MPP_FUNCTION(0x1, "dev", "bootcs"), MPP_FUNCTION(0x2, "spi0", "cs0"), MPP_FUNCTION(0x3, "spi1", "cs0"), MPP_FUNCTION(0x5, "nand", "ce")), From 7f6d2a35acdd503e4bc35f790ccfefffba1872cc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:55 +0200 Subject: [PATCH 0143/2091] pinctrl: mvebu: armada-375: remove non-existing NAND re/we pins commit e5447d26092c72ef3346615ee558c9112ef8063f upstream. After updating to a more recent version of the Armada 375, we realized that some of the pins documented as having a NAND-related functionality in fact did not have such functionality. This commit updates the pinctrl driver accordingly. Signed-off-by: Thomas Petazzoni Fixes: ce3ed59dcddd ("pinctrl: mvebu: add pin-muxing driver for the Marvell Armada 375") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../bindings/pinctrl/marvell,armada-375-pinctrl.txt | 4 ++-- drivers/pinctrl/mvebu/pinctrl-armada-375.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt index 7de0cda4a3791..bedbe42c8c0ac 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt @@ -22,8 +22,8 @@ mpp5 5 gpio, dev(ad7), spi0(cs2), spi1(cs2) mpp6 6 gpio, dev(ad0), led(p1), audio(rclk) mpp7 7 gpio, dev(ad1), ptp(clk), led(p2), audio(extclk) mpp8 8 gpio, dev (bootcs), spi0(cs0), spi1(cs0) -mpp9 9 gpio, nf(wen), spi0(sck), spi1(sck) -mpp10 10 gpio, nf(ren), dram(vttctrl), led(c1) +mpp9 9 gpio, spi0(sck), spi1(sck), nand(we) +mpp10 10 gpio, dram(vttctrl), led(c1), nand(re) mpp11 11 gpio, dev(a0), led(c2), audio(sdo) mpp12 12 gpio, dev(a1), audio(bclk) mpp13 13 gpio, dev(readyn), pcie0(rstoutn), pcie1(rstoutn) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-375.c b/drivers/pinctrl/mvebu/pinctrl-armada-375.c index bff602de74195..203291bde6088 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-375.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-375.c @@ -98,13 +98,11 @@ static struct mvebu_mpp_mode mv88f6720_mpp_modes[] = { MPP_FUNCTION(0x5, "nand", "ce")), MPP_MODE(9, MPP_FUNCTION(0x0, "gpio", NULL), - MPP_FUNCTION(0x1, "nf", "wen"), MPP_FUNCTION(0x2, "spi0", "sck"), MPP_FUNCTION(0x3, "spi1", "sck"), MPP_FUNCTION(0x5, "nand", "we")), MPP_MODE(10, MPP_FUNCTION(0x0, "gpio", NULL), - MPP_FUNCTION(0x1, "nf", "ren"), MPP_FUNCTION(0x2, "dram", "vttctrl"), MPP_FUNCTION(0x3, "led", "c1"), MPP_FUNCTION(0x5, "nand", "re"), From 1d3f0ee87cec8957c39d6b08be076b0a11f9fb4c Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:53 +0200 Subject: [PATCH 0144/2091] pinctrl: mvebu: armada-38x: fix PCIe functions commit 331642fbf24a1c16b2669ca0a6479b5fcd6dd5b2 upstream. A new revision of the Marvell Armada 38x hardware datasheet unveiled that the definition of some of the PCIe functions were not correct. This commit fixes the pinctrl driver accordingly. Some PCIe functions simply do not exist, some of the PCIe functions in fact were corresponding to other functions, and some PCIe functions have been added. Note: the seemingly unrelated removal of spi(cs2) on MPP47 is related: this function is in fact implemented on MPP43, instead of a PCIe function. Signed-off-by: Thomas Petazzoni Fixes: ca6d9a084b56f ("pinctrl: mvebu: add pin-muxing driver for the Marvell Armada 380/385") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../pinctrl/marvell,armada-38x-pinctrl.txt | 38 +++++++------- drivers/pinctrl/mvebu/pinctrl-armada-38x.c | 49 ++++++++----------- 2 files changed, 39 insertions(+), 48 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt index b17c96849fc9e..4ac138aaaf879 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt @@ -27,15 +27,15 @@ mpp8 8 gpio, ge0(txd1), dev(ad10) mpp9 9 gpio, ge0(txd2), dev(ad11) mpp10 10 gpio, ge0(txd3), dev(ad12) mpp11 11 gpio, ge0(txctl), dev(ad13) -mpp12 12 gpio, ge0(rxd0), pcie0(rstout), pcie1(rstout) [1], spi0(cs1), dev(ad14) -mpp13 13 gpio, ge0(rxd1), pcie0(clkreq), pcie1(clkreq) [1], spi0(cs2), dev(ad15) -mpp14 14 gpio, ge0(rxd2), ptp(clk), m(vtt_ctrl), spi0(cs3), dev(wen1) -mpp15 15 gpio, ge0(rxd3), ge(mdc slave), pcie0(rstout), spi0(mosi), pcie1(rstout) [1] -mpp16 16 gpio, ge0(rxctl), ge(mdio slave), m(decc_err), spi0(miso), pcie0(clkreq) +mpp12 12 gpio, ge0(rxd0), pcie0(rstout), spi0(cs1), dev(ad14), pcie3(clkreq) +mpp13 13 gpio, ge0(rxd1), pcie0(clkreq), pcie1(clkreq) [1], spi0(cs2), dev(ad15), pcie2(clkreq) +mpp14 14 gpio, ge0(rxd2), ptp(clk), m(vtt_ctrl), spi0(cs3), dev(wen1), pcie3(clkreq) +mpp15 15 gpio, ge0(rxd3), ge(mdc slave), pcie0(rstout), spi0(mosi) +mpp16 16 gpio, ge0(rxctl), ge(mdio slave), m(decc_err), spi0(miso), pcie0(clkreq), pcie1(clkreq) [1] mpp17 17 gpio, ge0(rxclk), ptp(clk), ua1(rxd), spi0(sck), sata1(prsnt) -mpp18 18 gpio, ge0(rxerr), ptp(trig_gen), ua1(txd), spi0(cs0), pcie1(rstout) [1] -mpp19 19 gpio, ge0(col), ptp(event_req), pcie0(clkreq), sata1(prsnt), ua0(cts) -mpp20 20 gpio, ge0(txclk), ptp(clk), pcie1(rstout) [1], sata0(prsnt), ua0(rts) +mpp18 18 gpio, ge0(rxerr), ptp(trig_gen), ua1(txd), spi0(cs0) +mpp19 19 gpio, ge0(col), ptp(event_req), ge0(txerr), sata1(prsnt), ua0(cts) +mpp20 20 gpio, ge0(txclk), ptp(clk), sata0(prsnt), ua0(rts) mpp21 21 gpio, spi0(cs1), ge1(rxd0), sata0(prsnt), sd0(cmd), dev(bootcs) mpp22 22 gpio, spi0(mosi), dev(ad0) mpp23 23 gpio, spi0(sck), dev(ad2) @@ -58,23 +58,23 @@ mpp39 39 gpio, i2c1(sck), ge1(rxd2), ua0(cts), sd0(d1), dev(a2) mpp40 40 gpio, i2c1(sda), ge1(rxd3), ua0(rts), sd0(d2), dev(ad6) mpp41 41 gpio, ua1(rxd), ge1(rxctl), ua0(cts), spi1(cs3), dev(burst/last) mpp42 42 gpio, ua1(txd), ua0(rts), dev(ad7) -mpp43 43 gpio, pcie0(clkreq), m(vtt_ctrl), m(decc_err), pcie0(rstout), dev(clkout) -mpp44 44 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [3], pcie0(rstout) -mpp45 45 gpio, ref(clk_out0), pcie0(rstout), pcie1(rstout) [1], pcie2(rstout), pcie3(rstout) -mpp46 46 gpio, ref(clk_out1), pcie0(rstout), pcie1(rstout) [1], pcie2(rstout), pcie3(rstout) -mpp47 47 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], spi1(cs2), sata3(prsnt) [2] -mpp48 48 gpio, sata0(prsnt), m(vtt_ctrl), tdm2c(pclk), audio(mclk), sd0(d4) -mpp49 49 gpio, sata2(prsnt) [2], sata3(prsnt) [2], tdm2c(fsync), audio(lrclk), sd0(d5) -mpp50 50 gpio, pcie0(rstout), pcie1(rstout) [1], tdm2c(drx), audio(extclk), sd0(cmd) +mpp43 43 gpio, pcie0(clkreq), m(vtt_ctrl), m(decc_err), spi1(cs2), dev(clkout) +mpp44 44 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [3] +mpp45 45 gpio, ref(clk_out0), pcie0(rstout) +mpp46 46 gpio, ref(clk_out1), pcie0(rstout) +mpp47 47 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [2] +mpp48 48 gpio, sata0(prsnt), m(vtt_ctrl), tdm2c(pclk), audio(mclk), sd0(d4), pcie0(clkreq) +mpp49 49 gpio, sata2(prsnt) [2], sata3(prsnt) [2], tdm2c(fsync), audio(lrclk), sd0(d5), pcie1(clkreq) +mpp50 50 gpio, pcie0(rstout), tdm2c(drx), audio(extclk), sd0(cmd) mpp51 51 gpio, tdm2c(dtx), audio(sdo), m(decc_err) -mpp52 52 gpio, pcie0(rstout), pcie1(rstout) [1], tdm2c(intn), audio(sdi), sd0(d6) +mpp52 52 gpio, pcie0(rstout), tdm2c(intn), audio(sdi), sd0(d6) mpp53 53 gpio, sata1(prsnt), sata0(prsnt), tdm2c(rstn), audio(bclk), sd0(d7) -mpp54 54 gpio, sata0(prsnt), sata1(prsnt), pcie0(rstout), pcie1(rstout) [1], sd0(d3) +mpp54 54 gpio, sata0(prsnt), sata1(prsnt), pcie0(rstout), ge0(txerr), sd0(d3) mpp55 55 gpio, ua1(cts), ge(mdio), pcie1(clkreq) [1], spi1(cs1), sd0(d0) mpp56 56 gpio, ua1(rts), ge(mdc), m(decc_err), spi1(mosi) mpp57 57 gpio, spi1(sck), sd0(clk) mpp58 58 gpio, pcie1(clkreq) [1], i2c1(sck), pcie2(clkreq), spi1(miso), sd0(d1) -mpp59 59 gpio, pcie0(rstout), i2c1(sda), pcie1(rstout) [1], spi1(cs0), sd0(d2) +mpp59 59 gpio, pcie0(rstout), i2c1(sda), spi1(cs0), sd0(d2) [1]: only available on 88F6820 and 88F6828 [2]: only available on 88F6828 diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c index 83bbcc72be1f8..11656c7cfab0e 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c @@ -94,37 +94,39 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd0", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "spi0", "cs1", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "dev", "ad14", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "dev", "ad14", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie3", "clkreq", V_88F6810_PLUS)), MPP_MODE(13, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd1", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "pcie0", "clkreq", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "pcie1", "clkreq", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "spi0", "cs2", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "dev", "ad15", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "dev", "ad15", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie2", "clkreq", V_88F6810_PLUS)), MPP_MODE(14, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd2", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "clk", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "m", "vtt_ctrl", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "spi0", "cs3", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "dev", "wen1", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "dev", "wen1", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie3", "clkreq", V_88F6810_PLUS)), MPP_MODE(15, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxd3", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ge", "mdc slave", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "spi0", "mosi", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie1", "rstout", V_88F6820_PLUS)), + MPP_VAR_FUNCTION(4, "spi0", "mosi", V_88F6810_PLUS)), MPP_MODE(16, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxctl", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ge", "mdio slave", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "m", "decc_err", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "spi0", "miso", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie0", "clkreq", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "pcie0", "clkreq", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie1", "clkreq", V_88F6820_PLUS)), MPP_MODE(17, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "rxclk", V_88F6810_PLUS), @@ -137,13 +139,12 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(1, "ge0", "rxerr", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "trig_gen", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "ua1", "txd", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "spi0", "cs0", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie1", "rstout", V_88F6820_PLUS)), + MPP_VAR_FUNCTION(4, "spi0", "cs0", V_88F6810_PLUS)), MPP_MODE(19, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "col", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "event_req", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie0", "clkreq", V_88F6810_PLUS), + MPP_VAR_FUNCTION(3, "ge0", "txerr", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "ua0", "cts", V_88F6810_PLUS), MPP_VAR_FUNCTION(6, "ua1", "rxd", V_88F6810_PLUS)), @@ -151,7 +152,6 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ge0", "txclk", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "ptp", "clk", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "ua0", "rts", V_88F6810_PLUS), MPP_VAR_FUNCTION(6, "ua1", "txd", V_88F6810_PLUS)), @@ -277,35 +277,27 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(1, "pcie0", "clkreq", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "m", "vtt_ctrl", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "m", "decc_err", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "pcie0", "rstout", V_88F6810_PLUS), + MPP_VAR_FUNCTION(4, "spi1", "cs2", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "dev", "clkout", V_88F6810_PLUS)), MPP_MODE(44, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "sata2", "prsnt", V_88F6828), - MPP_VAR_FUNCTION(4, "sata3", "prsnt", V_88F6828), - MPP_VAR_FUNCTION(5, "pcie0", "rstout", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(4, "sata3", "prsnt", V_88F6828)), MPP_MODE(45, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ref", "clk_out0", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), - MPP_VAR_FUNCTION(4, "pcie2", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie3", "rstout", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS)), MPP_MODE(46, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "ref", "clk_out1", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), - MPP_VAR_FUNCTION(4, "pcie2", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "pcie3", "rstout", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(2, "pcie0", "rstout", V_88F6810_PLUS)), MPP_MODE(47, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "sata2", "prsnt", V_88F6828), - MPP_VAR_FUNCTION(4, "spi1", "cs2", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sata3", "prsnt", V_88F6828)), MPP_MODE(48, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), @@ -313,18 +305,19 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(2, "m", "vtt_ctrl", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "tdm2c", "pclk", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "mclk", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "sd0", "d4", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "sd0", "d4", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie0", "clkreq", V_88F6810_PLUS)), MPP_MODE(49, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "sata2", "prsnt", V_88F6828), MPP_VAR_FUNCTION(2, "sata3", "prsnt", V_88F6828), MPP_VAR_FUNCTION(3, "tdm2c", "fsync", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "lrclk", V_88F6810_PLUS), - MPP_VAR_FUNCTION(5, "sd0", "d5", V_88F6810_PLUS)), + MPP_VAR_FUNCTION(5, "sd0", "d5", V_88F6810_PLUS), + MPP_VAR_FUNCTION(6, "pcie1", "clkreq", V_88F6820_PLUS)), MPP_MODE(50, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(3, "tdm2c", "drx", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "extclk", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "cmd", V_88F6810_PLUS)), @@ -336,7 +329,6 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_MODE(52, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(2, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(3, "tdm2c", "intn", V_88F6810_PLUS), MPP_VAR_FUNCTION(4, "audio", "sdi", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "d6", V_88F6810_PLUS)), @@ -352,7 +344,7 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(1, "sata0", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "sata1", "prsnt", V_88F6810_PLUS), MPP_VAR_FUNCTION(3, "pcie0", "rstout", V_88F6810_PLUS), - MPP_VAR_FUNCTION(4, "pcie1", "rstout", V_88F6820_PLUS), + MPP_VAR_FUNCTION(4, "ge0", "txerr", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "d3", V_88F6810_PLUS)), MPP_MODE(55, MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), @@ -382,7 +374,6 @@ static struct mvebu_mpp_mode armada_38x_mpp_modes[] = { MPP_VAR_FUNCTION(0, "gpio", NULL, V_88F6810_PLUS), MPP_VAR_FUNCTION(1, "pcie0", "rstout", V_88F6810_PLUS), MPP_VAR_FUNCTION(2, "i2c1", "sda", V_88F6810_PLUS), - MPP_VAR_FUNCTION(3, "pcie1", "rstout", V_88F6820_PLUS), MPP_VAR_FUNCTION(4, "spi1", "cs0", V_88F6810_PLUS), MPP_VAR_FUNCTION(5, "sd0", "d2", V_88F6810_PLUS)), }; From 32ebad3d19ecad29837467bf4a6d0857a50ed03d Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:47:00 +0200 Subject: [PATCH 0145/2091] pinctrl: mvebu: armada-38x: fix incorrect total number of GPIOs commit 27e7cd016558bf787b128fd882cdd90409ae4036 upstream. The pinctrl_gpio_range[] array described a first bank of 32 GPIOs and a second one of 27 GPIOs. However, since there is a total of 60 MPP pins that can be muxed as GPIOs, the second bank really has 28 GPIOs. Signed-off-by: Thomas Petazzoni Fixes: ca6d9a084b56f ("pinctrl: mvebu: add pin-muxing driver for the Marvell Armada 380/385") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-38x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c index 11656c7cfab0e..ff411a53b5a45 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-38x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-38x.c @@ -402,7 +402,7 @@ static struct mvebu_mpp_ctrl armada_38x_mpp_controls[] = { static struct pinctrl_gpio_range armada_38x_mpp_gpio_ranges[] = { MPP_GPIO_RANGE(0, 0, 0, 32), - MPP_GPIO_RANGE(1, 32, 32, 27), + MPP_GPIO_RANGE(1, 32, 32, 28), }; static int armada_38x_pinctrl_probe(struct platform_device *pdev) From 665a170e81d002fe355e03cef2ae01401de97d71 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:47:01 +0200 Subject: [PATCH 0146/2091] pinctrl: mvebu: armada-39x: fix incorrect total number of GPIOs commit 7c580311a2cb3bb0d0188665c9c69227aed650ea upstream. The pinctrl_gpio_range[] array described a first bank of 32 GPIOs and a second one of 27 GPIOs. However, since there is a total of 60 MPP pins that can be muxed as GPIOs, the second bank really has 28 GPIOs. Signed-off-by: Thomas Petazzoni Fixes: ee086577abe7f ("pinctrl: mvebu: add pinctrl driver for Marvell Armada 39x") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-39x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c index 42491624d660d..2dcf9b41e01e8 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-39x.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-39x.c @@ -380,7 +380,7 @@ static struct mvebu_mpp_ctrl armada_39x_mpp_controls[] = { static struct pinctrl_gpio_range armada_39x_mpp_gpio_ranges[] = { MPP_GPIO_RANGE(0, 0, 0, 32), - MPP_GPIO_RANGE(1, 32, 32, 27), + MPP_GPIO_RANGE(1, 32, 32, 28), }; static int armada_39x_pinctrl_probe(struct platform_device *pdev) From 51ba8d4f98ba8d6f5504577ddc4db2b1a657260e Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:56 +0200 Subject: [PATCH 0147/2091] pinctrl: mvebu: armada-xp: remove non-existing NAND pins commit bc99357f3690c11817756adfee0ece811a3db2e7 upstream. After updating to a more recent version of the Armada XP datasheet, we realized that some of the pins documented as having a NAND-related functionality in fact did not have such functionality. This commit updates the pinctrl driver accordingly. Signed-off-by: Thomas Petazzoni Fixes: 463e270f766a ("pinctrl: mvebu: add pinctrl driver for Armada XP") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt | 4 ++-- drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt index 373dbccd7ab0e..974168d854ba5 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt @@ -42,8 +42,8 @@ mpp20 20 gpio, ge0(rxd4), ge1(rxd2), lcd(d20), ptp(clk) mpp21 21 gpio, ge0(rxd5), ge1(rxd3), lcd(d21), mem(bat) mpp22 22 gpio, ge0(rxd6), ge1(rxctl), lcd(d22), sata0(prsnt) mpp23 23 gpio, ge0(rxd7), ge1(rxclk), lcd(d23), sata1(prsnt) -mpp24 24 gpio, lcd(hsync), sata1(prsnt), nf(bootcs-re), tdm(rst) -mpp25 25 gpio, lcd(vsync), sata0(prsnt), nf(bootcs-we), tdm(pclk) +mpp24 24 gpio, lcd(hsync), sata1(prsnt), tdm(rst) +mpp25 25 gpio, lcd(vsync), sata0(prsnt), tdm(pclk) mpp26 26 gpio, lcd(clk), tdm(fsync), vdd(cpu1-pd) mpp27 27 gpio, lcd(e), tdm(dtx), ptp(trig) mpp28 28 gpio, lcd(pwm), tdm(drx), ptp(evreq) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index 578db9f033b23..7310c4022ba6d 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -172,13 +172,11 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_MODE(24, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sata1", "prsnt", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x2, "nf", "bootcs-re", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "rst", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x4, "lcd", "hsync", V_MV78230_PLUS)), MPP_MODE(25, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sata0", "prsnt", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x2, "nf", "bootcs-we", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "pclk", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x4, "lcd", "vsync", V_MV78230_PLUS)), MPP_MODE(26, From 42ae173f64203d396a68d82a7962fd19e4d7a819 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:57 +0200 Subject: [PATCH 0148/2091] pinctrl: mvebu: armada-xp: remove non-existing VDD cpu_pd functions commit 80b3d04feab5e69d51cb2375eb989a7165e43e3b upstream. The latest version of the Armada XP datasheet no longer documents the VDD cpu_pd functions, which might indicate they are not working and/or not supported. This commit ensures the pinctrl driver matches the datasheet. Signed-off-by: Thomas Petazzoni Fixes: 463e270f766a ("pinctrl: mvebu: add pinctrl driver for Armada XP") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../pinctrl/marvell,armada-xp-pinctrl.txt | 26 ++++++--------- drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 33 ++++++------------- 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt index 974168d854ba5..b347b85aa22e5 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt @@ -44,13 +44,13 @@ mpp22 22 gpio, ge0(rxd6), ge1(rxctl), lcd(d22), sata0(prsnt) mpp23 23 gpio, ge0(rxd7), ge1(rxclk), lcd(d23), sata1(prsnt) mpp24 24 gpio, lcd(hsync), sata1(prsnt), tdm(rst) mpp25 25 gpio, lcd(vsync), sata0(prsnt), tdm(pclk) -mpp26 26 gpio, lcd(clk), tdm(fsync), vdd(cpu1-pd) +mpp26 26 gpio, lcd(clk), tdm(fsync) mpp27 27 gpio, lcd(e), tdm(dtx), ptp(trig) mpp28 28 gpio, lcd(pwm), tdm(drx), ptp(evreq) -mpp29 29 gpio, lcd(ref-clk), tdm(int0), ptp(clk), vdd(cpu0-pd) +mpp29 29 gpio, lcd(ref-clk), tdm(int0), ptp(clk) mpp30 30 gpio, tdm(int1), sd0(clk) -mpp31 31 gpio, tdm(int2), sd0(cmd), vdd(cpu0-pd) -mpp32 32 gpio, tdm(int3), sd0(d0), vdd(cpu1-pd) +mpp31 31 gpio, tdm(int2), sd0(cmd) +mpp32 32 gpio, tdm(int3), sd0(d0) mpp33 33 gpio, tdm(int4), sd0(d1), mem(bat) mpp34 34 gpio, tdm(int5), sd0(d2), sata0(prsnt) mpp35 35 gpio, tdm(int6), sd0(d3), sata1(prsnt) @@ -58,14 +58,11 @@ mpp36 36 gpio, spi(mosi) mpp37 37 gpio, spi(miso) mpp38 38 gpio, spi(sck) mpp39 39 gpio, spi(cs0) -mpp40 40 gpio, spi(cs1), uart2(cts), lcd(vga-hsync), vdd(cpu1-pd), - pcie(clkreq0) +mpp40 40 gpio, spi(cs1), uart2(cts), lcd(vga-hsync), pcie(clkreq0) mpp41 41 gpio, spi(cs2), uart2(rts), lcd(vga-vsync), sata1(prsnt), pcie(clkreq1) -mpp42 42 gpio, uart2(rxd), uart0(cts), tdm(int7), tdm-1(timer), - vdd(cpu0-pd) -mpp43 43 gpio, uart2(txd), uart0(rts), spi(cs3), pcie(rstout), - vdd(cpu2-3-pd){1} +mpp42 42 gpio, uart2(rxd), uart0(cts), tdm(int7), tdm-1(timer) +mpp43 43 gpio, uart2(txd), uart0(rts), spi(cs3), pcie(rstout) mpp44 44 gpio, uart2(cts), uart3(rxd), spi(cs4), pcie(clkreq2), mem(bat) mpp45 45 gpio, uart2(rts), uart3(txd), spi(cs5), sata1(prsnt) @@ -84,9 +81,9 @@ mpp51 51 gpio, dev(ad16) mpp52 52 gpio, dev(ad17) mpp53 53 gpio, dev(ad18) mpp54 54 gpio, dev(ad19) -mpp55 55 gpio, dev(ad20), vdd(cpu0-pd) -mpp56 56 gpio, dev(ad21), vdd(cpu1-pd) -mpp57 57 gpio, dev(ad22), vdd(cpu2-3-pd){1} +mpp55 55 gpio, dev(ad20) +mpp56 56 gpio, dev(ad21) +mpp57 57 gpio, dev(ad22) mpp58 58 gpio, dev(ad23) mpp59 59 gpio, dev(ad24) mpp60 60 gpio, dev(ad25) @@ -96,6 +93,3 @@ mpp63 63 gpio, dev(ad28) mpp64 64 gpio, dev(ad29) mpp65 65 gpio, dev(ad30) mpp66 66 gpio, dev(ad31) - -Notes: -* {1} vdd(cpu2-3-pd) only available on mv78460. diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index 7310c4022ba6d..0301bd22c1d04 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -14,10 +14,7 @@ * available: mv78230, mv78260 and mv78460. From a pin muxing * perspective, the mv78230 has 49 MPP pins. The mv78260 and mv78460 * both have 67 MPP pins (more GPIOs and address lines for the memory - * bus mainly). The only difference between the mv78260 and the - * mv78460 in terms of pin muxing is the addition of two functions on - * pins 43 and 56 to access the VDD of the CPU2 and 3 (mv78260 has two - * cores, mv78460 has four cores). + * bus mainly). */ #include @@ -182,8 +179,7 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_MODE(26, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "fsync", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "lcd", "clk", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu1-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x4, "lcd", "clk", V_MV78230_PLUS)), MPP_MODE(27, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "ptp", "trig", V_MV78230_PLUS), @@ -198,8 +194,7 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "ptp", "clk", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "int0", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "lcd", "ref-clk", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu0-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x4, "lcd", "ref-clk", V_MV78230_PLUS)), MPP_MODE(30, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "clk", V_MV78230_PLUS), @@ -207,13 +202,11 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_MODE(31, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "cmd", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x3, "tdm", "int2", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu0-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x3, "tdm", "int2", V_MV78230_PLUS)), MPP_MODE(32, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "d0", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x3, "tdm", "int3", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu1-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x3, "tdm", "int3", V_MV78230_PLUS)), MPP_MODE(33, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "sd0", "d1", V_MV78230_PLUS), @@ -245,7 +238,6 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "spi", "cs1", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "uart2", "cts", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x3, "vdd", "cpu1-pd", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x4, "lcd", "vga-hsync", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x5, "pcie", "clkreq0", V_MV78230_PLUS)), MPP_MODE(41, @@ -260,15 +252,13 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x1, "uart2", "rxd", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "uart0", "cts", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "tdm", "int7", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "tdm-1", "timer", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu0-pd", V_MV78230_PLUS)), + MPP_VAR_FUNCTION(0x4, "tdm-1", "timer", V_MV78230_PLUS)), MPP_MODE(43, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "uart2", "txd", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "uart0", "rts", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x3, "spi", "cs3", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x4, "pcie", "rstout", V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x5, "vdd", "cpu2-3-pd", V_MV78460)), + MPP_VAR_FUNCTION(0x4, "pcie", "rstout", V_MV78230_PLUS)), MPP_MODE(44, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), MPP_VAR_FUNCTION(0x1, "uart2", "cts", V_MV78230_PLUS), @@ -319,16 +309,13 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x1, "dev", "ad19", V_MV78260_PLUS)), MPP_MODE(55, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x1, "dev", "ad20", V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x2, "vdd", "cpu0-pd", V_MV78260_PLUS)), + MPP_VAR_FUNCTION(0x1, "dev", "ad20", V_MV78260_PLUS)), MPP_MODE(56, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x1, "dev", "ad21", V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x2, "vdd", "cpu1-pd", V_MV78260_PLUS)), + MPP_VAR_FUNCTION(0x1, "dev", "ad21", V_MV78260_PLUS)), MPP_MODE(57, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x1, "dev", "ad22", V_MV78260_PLUS), - MPP_VAR_FUNCTION(0x2, "vdd", "cpu2-3-pd", V_MV78460)), + MPP_VAR_FUNCTION(0x1, "dev", "ad22", V_MV78260_PLUS)), MPP_MODE(58, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), MPP_VAR_FUNCTION(0x1, "dev", "ad23", V_MV78260_PLUS)), From 6d033f883bc7eb4d2bfcbfaef48a8c7441942430 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 9 Jun 2015 18:46:58 +0200 Subject: [PATCH 0149/2091] pinctrl: mvebu: armada-xp: fix functions of MPP48 commit ea78b9511a54d0de026e04b5da86b30515072f31 upstream. There was a mistake in the definition of the functions for MPP48 on Marvell Armada XP. The second function is dev(clkout), and not tclk. Signed-off-by: Thomas Petazzoni Fixes: 463e270f766a ("pinctrl: mvebu: add pinctrl driver for Armada XP") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt | 2 +- drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt index b347b85aa22e5..96e7744cab844 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt @@ -69,7 +69,7 @@ mpp45 45 gpio, uart2(rts), uart3(txd), spi(cs5), sata1(prsnt) mpp46 46 gpio, uart3(rts), uart1(rts), spi(cs6), sata0(prsnt) mpp47 47 gpio, uart3(cts), uart1(cts), spi(cs7), pcie(clkreq3), ref(clkout) -mpp48 48 gpio, tclk, dev(burst/last) +mpp48 48 gpio, dev(clkout), dev(burst/last) * Marvell Armada XP (mv78260 and mv78460 only) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index 0301bd22c1d04..d7cdb146f44d0 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -287,7 +287,7 @@ static struct mvebu_mpp_mode armada_xp_mpp_modes[] = { MPP_VAR_FUNCTION(0x5, "pcie", "clkreq3", V_MV78230_PLUS)), MPP_MODE(48, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78230_PLUS), - MPP_VAR_FUNCTION(0x1, "tclk", NULL, V_MV78230_PLUS), + MPP_VAR_FUNCTION(0x1, "dev", "clkout", V_MV78230_PLUS), MPP_VAR_FUNCTION(0x2, "dev", "burst/last", V_MV78230_PLUS)), MPP_MODE(49, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_MV78260_PLUS), From 9366d297b6e2949761ae79fd275c972c1384076c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 17 Jul 2015 16:23:28 -0700 Subject: [PATCH 0150/2091] openrisc: fix CONFIG_UID16 setting commit 04ea1e91f85615318ea91ce8ab50cb6a01ee4005 upstream. openrisc-allnoconfig: kernel/uid16.c: In function 'SYSC_setgroups16': kernel/uid16.c:184:2: error: implicit declaration of function 'groups_alloc' kernel/uid16.c:184:13: warning: assignment makes pointer from integer without a cast openrisc shouldn't be setting CONFIG_UID16 when CONFIG_MULTIUSER=n. Fixes: 2813893f8b197a1 ("kernel: conditionally support non-root users, groups and capabilities") Reported-by: Fengguang Wu Cc: Iulia Manda Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/openrisc/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e5a693b16da29..443f44de10209 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -17,6 +17,7 @@ config OPENRISC select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_CPU_DEVICES + select HAVE_UID16 select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_STRNCPY_FROM_USER @@ -31,9 +32,6 @@ config MMU config HAVE_DMA_ATTRS def_bool y -config UID16 - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y From 250cb4138bda236ea39bc430421c84e3b1586630 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 15:44:02 +1000 Subject: [PATCH 0151/2091] m68knommu: make ColdFire SoC selection a choice commit fa95a1dd0819c9041a873b10a6d83b5134964154 upstream. It would be nice if we could support multiple ColdFire SoC types in a single binary - but currently the code simply does not support it. Change the SoC selection config options to be a choice instead of individual selectable entries. This fixes problems with building allnoconfig, and means that a sane linux kernel is generated for a single ColdFire SoC type. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/m68k/Kconfig.cpu | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 33013dfcd3e1d..bd902ef0d95af 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -125,6 +125,13 @@ endif # M68KCLASSIC if COLDFIRE +choice + prompt "ColdFire SoC type" + default M520x + help + Select the type of ColdFire System-on-Chip (SoC) that you want + to build for. + config M5206 bool "MCF5206" depends on !MMU @@ -174,9 +181,6 @@ config M525x help Freescale (Motorola) Coldfire 5251/5253 processor support. -config M527x - bool - config M5271 bool "MCF5271" depends on !MMU @@ -223,9 +227,6 @@ config M5307 help Motorola ColdFire 5307 processor support. -config M53xx - bool - config M532x bool "MCF532x" depends on !MMU @@ -251,9 +252,6 @@ config M5407 help Motorola ColdFire 5407 processor support. -config M54xx - bool - config M547x bool "MCF547x" select M54xx @@ -280,6 +278,17 @@ config M5441x help Freescale Coldfire 54410/54415/54416/54417/54418 processor support. +endchoice + +config M527x + bool + +config M53xx + bool + +config M54xx + bool + endif # COLDFIRE From 4c3c2d140a873c9330dd51119aa9e13225cd65ef Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 14:21:21 +1000 Subject: [PATCH 0152/2091] m68knommu: force setting of CONFIG_CLOCK_FREQ for ColdFire commit d9ee489619744ee5ac246b8fb3dd65bb078d2f0a upstream. It is possible to disable the clock selection at configuration time, but for ColdFire targets we always expect a clock frequency to be selected. This results in the following compile time error: CC arch/m68k/kernel/asm-offsets.s In file included from ./arch/m68k/include/asm/timex.h:14:0, from include/linux/timex.h:65, from include/linux/sched.h:19, from arch/m68k/kernel/asm-offsets.c:14: ./arch/m68k/include/asm/coldfire.h:25:2: error: #error "Don't know what your ColdFire CPU clock frequency is??" Remove CONFIG_CLOCK_SELECT completely and always enable CONFIG_CLOCK_FREQ for ColdFire. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/m68k/Kconfig.cpu | 14 +------------- arch/m68k/include/asm/coldfire.h | 2 +- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index bd902ef0d95af..5c68c85d5dbe5 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -425,22 +425,10 @@ config HAVE_MBAR config HAVE_IPSBAR bool -config CLOCK_SET - bool "Enable setting the CPU clock frequency" - depends on COLDFIRE - default n - help - On some CPU's you do not need to know what the core CPU clock - frequency is. On these you can disable clock setting. On some - traditional 68K parts, and on all ColdFire parts you need to set - the appropriate CPU clock frequency. On these devices many of the - onboard peripherals derive their timing from the master CPU clock - frequency. - config CLOCK_FREQ int "Set the core clock frequency" default "66666666" - depends on CLOCK_SET + depends on COLDFIRE help Define the CPU clock frequency in use. This is the core clock frequency, it may or may not be the same as the external clock diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h index c94557b914482..50aa4dac9ca28 100644 --- a/arch/m68k/include/asm/coldfire.h +++ b/arch/m68k/include/asm/coldfire.h @@ -19,7 +19,7 @@ * in any case new boards come along from time to time that have yet * another different clocking frequency. */ -#ifdef CONFIG_CLOCK_SET +#ifdef CONFIG_CLOCK_FREQ #define MCF_CLK CONFIG_CLOCK_FREQ #else #error "Don't know what your ColdFire CPU clock frequency is??" From 2bd8b830dafea36e902518dd4be77e9432634c2d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 6 Jun 2015 06:06:49 +0200 Subject: [PATCH 0153/2091] Bluetooth: Fix race condition with user channel and setup stage commit 781f899f2f9d8b71e35225a087f90052059486c5 upstream. During the initial setup stage of a controller, the low-level transport is actually active. This means that HCI_UP is true. To avoid toggling the transport off and back on again for normal operation the kernel holds a grace period with HCI_AUTO_OFF that will turn the low-level transport off in case no user is present. The idea of the grace period is important to avoid having to initialize all of the controller twice. So legacy ioctl and the new management interface knows how to clear this grace period and then start normal operation. For the user channel operation this grace period has not been taken into account which results in the problem that HCI_UP and HCI_AUTO_OFF are set and the kernel will return EBUSY. However from a system point of view the controller is ready to be grabbed by either the ioctl, the management interface or the user channel. This patch brings the user channel to the same level as the other two entries for operating a controller. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sock.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 56f9edbf3d05d..e11a5cfda4b1c 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -741,10 +741,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } - if (test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || + if (test_bit(HCI_INIT, &hdev->flags) || hci_dev_test_flag(hdev, HCI_SETUP) || - hci_dev_test_flag(hdev, HCI_CONFIG)) { + hci_dev_test_flag(hdev, HCI_CONFIG) || + (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) && + test_bit(HCI_UP, &hdev->flags))) { err = -EBUSY; hci_dev_put(hdev); goto done; @@ -760,10 +761,21 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = hci_dev_open(hdev->id); if (err) { - hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); - mgmt_index_added(hdev); - hci_dev_put(hdev); - goto done; + if (err == -EALREADY) { + /* In case the transport is already up and + * running, clear the error here. + * + * This can happen when opening an user + * channel and HCI_AUTO_OFF grace period + * is still active. + */ + err = 0; + } else { + hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); + mgmt_index_added(hdev); + hci_dev_put(hdev); + goto done; + } } atomic_inc(&hdev->promisc); From a5b637f8893adb8ccd17f6f92158241e8537aba8 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 7 Jun 2015 09:42:19 +0200 Subject: [PATCH 0154/2091] Bluetooth: btusb: Fix memory leak in Intel setup routine commit ecffc80478cdce122f0ecb6a4e4f909132dd5c47 upstream. The SKB returned from the Intel specific version information command is missing a kfree_skb. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 420cc9f3eb76f..1a6aee2683370 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1993,6 +1993,8 @@ static int btusb_setup_intel(struct hci_dev *hdev) } fw_ptr = fw->data; + kfree_skb(skb); + /* This Intel specific command enables the manufacturer mode of the * controller. * From 2f5ce3a02ce2e5dbf14f4f391cb88a4682b0019d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 7 Jun 2015 09:47:08 +0200 Subject: [PATCH 0155/2091] Bluetooth: btusb: Fix secure send command length alignment on Intel 8260 commit e66890a96abbb746e1229c3067471be36dc49b34 upstream. This patch fixes the command length alignment issue for Intel Bluetooth 8260. The length of parameters in the firmware downloading command must be multiplication of 4. If not, the command must append Intel_NOP command with extra parameters, zeros, at the end, and the firmware file is already included Intel_NOP command for alignment. This patch checks the next command and if the next command is Intel_NOP command, it reads the Intel_NOP command and send them together. For example, if the data from the firmware file looks like this: 8E FC 03 11 22 33 02 FC 03 00 00 00 Previously, btusb sends two commands: 09 FC 06 8E FC 03 11 22 33 09 FC 06 02 FC 03 00 00 00 This won't work because the length of parameters are 6 which violates the 4 byte alignment. This patch will append them together and send as one command: 09 FC 0C 8E FC 03 11 22 33 02 FC 03 00 00 00 Based on previous work from Tedd Ho-Jeong An Reported-by: Tedd Ho-Jeong An Signed-off-by: Marcel Holtmann Tested-by: Tedd Ho-Jeong An Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 1a6aee2683370..dc99586d51bc1 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2336,6 +2336,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) struct intel_boot_params *params; const struct firmware *fw; const u8 *fw_ptr; + u32 frag_len; char fwname[64]; ktime_t calltime, delta, rettime; unsigned long long duration; @@ -2542,24 +2543,33 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) } fw_ptr = fw->data + 644; + frag_len = 0; while (fw_ptr - fw->data < fw->size) { - struct hci_command_hdr *cmd = (void *)fw_ptr; - u8 cmd_len; + struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len); - cmd_len = sizeof(*cmd) + cmd->plen; + frag_len += sizeof(*cmd) + cmd->plen; - /* Send each command from the firmware data buffer as - * a single Data fragment. + /* The paramter length of the secure send command requires + * a 4 byte alignment. It happens so that the firmware file + * contains proper Intel_NOP commands to align the fragments + * as needed. + * + * Send set of commands with 4 byte alignment from the + * firmware data buffer as a single Data fragement. */ - err = btusb_intel_secure_send(hdev, 0x01, cmd_len, fw_ptr); - if (err < 0) { - BT_ERR("%s: Failed to send firmware data (%d)", - hdev->name, err); - goto done; - } + if (!(frag_len % 4)) { + err = btusb_intel_secure_send(hdev, 0x01, frag_len, + fw_ptr); + if (err < 0) { + BT_ERR("%s: Failed to send firmware data (%d)", + hdev->name, err); + goto done; + } - fw_ptr += cmd_len; + fw_ptr += frag_len; + frag_len = 0; + } } set_bit(BTUSB_FIRMWARE_LOADED, &data->flags); From 6665e53d7bd3e84d1f203b4d747d0c06c4bbf345 Mon Sep 17 00:00:00 2001 From: Aleksei Volkov Date: Mon, 8 Jun 2015 12:02:10 +0300 Subject: [PATCH 0156/2091] Bluetooth: btusb: Correct typo in Roper Class 1 Bluetooth Dongle commit 2eeac871697ac24a77b6d7953bd711b490e83ac7 upstream. That patch corrects the typo in usb vendor id for Roper Class 1 Bluetooth Dongle. Problem with typo is present since 4.0 kernel. Content /sys/kernel/debug/usb/devices for these dongle: T: Bus=05 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1310 ProdID=0001 Rev=15.00 S: Manufacturer=SiW S: Product=SiW S: SerialNumber=E7BB050D0B00 C:* #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr= 50mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Aleksei Volkov Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index dc99586d51bc1..c655015392248 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -268,7 +268,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */ - { USB_DEVICE(0x1300, 0x0001), .driver_info = BTUSB_SWAVE }, + { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE }, /* Digianswer devices */ { USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER }, From b5c06428d38a0d74d90901d2c3fcf8ad29292216 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 14 Jul 2015 16:25:30 -0400 Subject: [PATCH 0157/2091] Bluetooth: btbcm: allow btbcm_read_verbose_config to fail on Apple commit 7bee8b08c428b63aa4a3765bb907602e36355378 upstream. Commit 1c8ba6d013 moved around the setup code for broadcomm chips, and also added btbcm_read_verbose_config() to read extra information about the hardware. It's returning errors on some macbooks: Bluetooth: hci0: BCM: Read verbose config info failed (-16) Which makes us error out of the setup function. Since this probe isn't critical to operate the chip, this patch just changes things to carry on when it fails. Signed-off-by: Chris Mason Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btbcm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 4bba86677adc6..3f146c9911c17 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -378,12 +378,11 @@ int btbcm_setup_apple(struct hci_dev *hdev) /* Read Verbose Config Version Info */ skb = btbcm_read_verbose_config(hdev); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, skb->data[1], - get_unaligned_le16(skb->data + 5)); - kfree_skb(skb); + if (!IS_ERR(skb)) { + BT_INFO("%s: BCM: chip id %u build %4.4u", hdev->name, skb->data[1], + get_unaligned_le16(skb->data + 5)); + kfree_skb(skb); + } set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks); From f3a16a0597a773deb7ee578c3453e62cc1f9e222 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 2 Jun 2015 10:38:32 +0200 Subject: [PATCH 0158/2091] ath9k: fix DMA stop sequence for AR9003+ commit 300f77c08ded96d33f492aaa02549103852f0c12 upstream. AR93xx and newer needs to stop rx before tx to avoid getting the DMA engine or MAC into a stuck state. This should reduce/fix the occurence of "Failed to stop Tx DMA" logspam. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b0badef71ce79..d5f2fbf62d726 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -216,11 +216,13 @@ static bool ath_prepare_reset(struct ath_softc *sc) ath_stop_ani(sc); ath9k_hw_disable_interrupts(ah); - if (!ath_drain_all_txq(sc)) - ret = false; - - if (!ath_stoprecv(sc)) - ret = false; + if (AR_SREV_9300_20_OR_LATER(ah)) { + ret &= ath_stoprecv(sc); + ret &= ath_drain_all_txq(sc); + } else { + ret &= ath_drain_all_txq(sc); + ret &= ath_stoprecv(sc); + } return ret; } From 01fe812522c67852943800c724199836a66d3a52 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 May 2015 11:34:48 +0300 Subject: [PATCH 0159/2091] ath9k_htc: memory corruption calling set_bit() commit 191f1aeeb93bb58e56f4d1868294ae22f3f67d4e upstream. In d8a2c51cdcae ('ath9k_htc: Use atomic operations for op_flags') we changed things like this: - if (priv->op_flags & OP_TSF_RESET) { + if (test_bit(OP_TSF_RESET, &priv->op_flags)) { The problem is that test_bit() takes a bit number and not a mask. It means that when we do: set_bit(OP_TSF_RESET, &priv->op_flags); Then it sets the (1 << 6) bit instead of the 6 bit so we are setting a bit which is past the end of the unsigned long. Fixes: d8a2c51cdcae ('ath9k_htc: Use atomic operations for op_flags') Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/htc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index e82a0d4ce23f9..5dbc617ecf8a8 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -440,9 +440,9 @@ static inline void ath9k_htc_stop_btcoex(struct ath9k_htc_priv *priv) } #endif /* CONFIG_ATH9K_BTCOEX_SUPPORT */ -#define OP_BT_PRIORITY_DETECTED BIT(3) -#define OP_BT_SCAN BIT(4) -#define OP_TSF_RESET BIT(6) +#define OP_BT_PRIORITY_DETECTED 3 +#define OP_BT_SCAN 4 +#define OP_TSF_RESET 6 enum htc_op_flags { HTC_FWFLAG_NO_RMW, From 33e1432c291b72339b03ea3450d7840cdba1dbe1 Mon Sep 17 00:00:00 2001 From: Vincent Fann Date: Fri, 15 May 2015 21:29:27 -0500 Subject: [PATCH 0160/2091] rtlwifi: Remove the clear interrupt routine from all drivers commit 1277fa2ab2f9a624a4b0177119ca13b5fd65edd0 upstream. Several of these drivers have there TX randomly blocked for 3~5 seconds while measuring tx throughput (iperf). The root couse happens in rtl_pci_flush(). The function uses a while-loop to wait for TX queue length to decrease to 0. The TX queue length counts the number of packets that are queued in the driver. The driver relys on the TX OK interrupt to return skb and reduce TX queue length. The interrupt subroutine disables interupts, reads the interrupt registers, and then clears the registers in the beginning of _rtl_pci_interrupt(). After all interupts process are finished, the driver invokes enable_interrupt() to enable interupts. This behavior is normal for an interrupt subroutine. But enable_interrupt() invokes clear_interrupt() again. This unexpected interrupt clearing may cleari me fresh TX OK interrupts. These missing interrupts cause TX queue length to never reduce to 0i, which causes rtl_pci_flush() to be stuck in unterminated while-loop. This patch removes clear_interrupt() in enable_interrupt() to avoid this behavior. Signed-off-by: Vincent Fann Signed-off-by: Shao Fu Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8188ee/hw.c | 16 ---------------- drivers/net/wireless/rtlwifi/rtl8192ee/hw.c | 17 ----------------- drivers/net/wireless/rtlwifi/rtl8723ae/hw.c | 13 ------------- drivers/net/wireless/rtlwifi/rtl8723be/hw.c | 17 ----------------- drivers/net/wireless/rtlwifi/rtl8821ae/hw.c | 20 -------------------- 5 files changed, 83 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c index 86ce5b1930e6d..e5d8108f1987d 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c @@ -1354,27 +1354,11 @@ void rtl88ee_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl88ee_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl88ee_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl88ee_clear_interrupt(hw);/*clear it here first*/ rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, diff --git a/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c index da0a6125f314b..cbf2ca7c7c6de 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c @@ -1584,28 +1584,11 @@ void rtl92ee_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl92ee_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl92ee_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl92ee_clear_interrupt(hw);/*clear it here first*/ - rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c index 67bb47d77b68c..a4b7eac6856f2 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c @@ -1258,18 +1258,6 @@ void rtl8723e_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl8723e_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); -} - void rtl8723e_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); @@ -1284,7 +1272,6 @@ void rtl8723e_disable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl8723e_clear_interrupt(hw);/*clear it here first*/ rtl_write_dword(rtlpriv, 0x3a8, IMR8190_DISABLED); rtl_write_dword(rtlpriv, 0x3ac, IMR8190_DISABLED); rtlpci->irq_enabled = false; diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/rtlwifi/rtl8723be/hw.c index b681af3c7a355..b9417268427ed 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723be/hw.c @@ -1634,28 +1634,11 @@ void rtl8723be_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl8723be_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - - tmp = rtl_read_dword(rtlpriv, REG_HISR); - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl8723be_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl8723be_clear_interrupt(hw);/*clear it here first*/ - rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c index 8704eee9f3a49..57966e3c8e8d4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c @@ -2253,31 +2253,11 @@ void rtl8821ae_set_qos(struct ieee80211_hw *hw, int aci) } } -static void rtl8821ae_clear_interrupt(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - u32 tmp; - tmp = rtl_read_dword(rtlpriv, REG_HISR); - /*printk("clear interrupt first:\n"); - printk("0x%x = 0x%08x\n",REG_HISR, tmp);*/ - rtl_write_dword(rtlpriv, REG_HISR, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HISRE); - /*printk("0x%x = 0x%08x\n",REG_HISRE, tmp);*/ - rtl_write_dword(rtlpriv, REG_HISRE, tmp); - - tmp = rtl_read_dword(rtlpriv, REG_HSISR); - /*printk("0x%x = 0x%08x\n",REG_HSISR, tmp);*/ - rtl_write_dword(rtlpriv, REG_HSISR, tmp); -} - void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); - rtl8821ae_clear_interrupt(hw);/*clear it here first*/ - rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; From 6ab8bb14ed9e6b7979da424bdafc959fd602c42a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 3 Jun 2015 10:50:19 +0300 Subject: [PATCH 0161/2091] ieee802154: Fix sockaddr_ieee802154 implicit padding information leak. commit 8a70cefa3037d62e7c0b6068a66675def1a330c9 upstream. The AF_IEEE802154 sockaddr looks like this: struct sockaddr_ieee802154 { sa_family_t family; /* AF_IEEE802154 */ struct ieee802154_addr_sa addr; }; struct ieee802154_addr_sa { int addr_type; u16 pan_id; union { u8 hwaddr[IEEE802154_ADDR_LEN]; u16 short_addr; }; }; On most architectures there will be implicit structure padding here, in two different places: * In struct sockaddr_ieee802154, two bytes of padding between 'family' (unsigned short) and 'addr', so that 'addr' starts on a four byte boundary. * In struct ieee802154_addr_sa, two bytes at the end of the structure, to make the structure 16 bytes. When calling recvmsg(2) on a PF_IEEE802154 SOCK_DGRAM socket, the ieee802154 stack constructs a struct sockaddr_ieee802154 on the kernel stack without clearing these padding fields, and, depending on the addr_type, between four and ten bytes of uncleared kernel stack will be copied to userspace. We can't just insert two 'u16 __pad's in the right places and zero those before copying an address to userspace, as not all architectures insert this implicit padding -- from a quick test it seems that avr32, cris and m68k don't insert this padding, while every other architecture that I have cross compilers for does insert this padding. The easiest way to plug the leak is to just memset the whole struct sockaddr_ieee802154 before filling in the fields we want to fill in, and that's what this patch does. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/ieee802154/socket.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b60c65f70346a..627a2537634e4 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -739,6 +739,12 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (saddr) { + /* Clear the implicit padding in struct sockaddr_ieee802154 + * (16 bits between 'family' and 'addr') and in struct + * ieee802154_addr_sa (16 bits at the end of the structure). + */ + memset(saddr, 0, sizeof(*saddr)); + saddr->family = AF_IEEE802154; ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); *addr_len = sizeof(*saddr); From c4fdcdb1689f93b8b048e09c03924e730dfad600 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 9 Jul 2015 17:03:57 +0100 Subject: [PATCH 0162/2091] staging: vt6656: check ieee80211_bss_conf bssid not NULL commit d309509f84725f99326cc73d3b00aae096b374ae upstream. Sometimes bssid can go null on failed association. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/main_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index ab3ab84cb0a71..766fdcece074b 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -701,7 +701,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->current_aid = conf->aid; - if (changed & BSS_CHANGED_BSSID) + if (changed & BSS_CHANGED_BSSID && conf->bssid) vnt_mac_set_bssid_addr(priv, (u8 *)conf->bssid); From a225aecb12853c4231aac1dc2927755bb2a068a1 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 9 Jul 2015 17:01:24 +0100 Subject: [PATCH 0163/2091] staging: vt6655: check ieee80211_bss_conf bssid not NULL commit 8e8e9198920ddfa920191069ae02eba75d39e653 upstream. Sometimes bssid can go null on failed association. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 0343ae386f035..9e0278ee003ff 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1417,7 +1417,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, priv->current_aid = conf->aid; - if (changed & BSS_CHANGED_BSSID) { + if (changed & BSS_CHANGED_BSSID && conf->bssid) { unsigned long flags; spin_lock_irqsave(&priv->lock, flags); From c79d02758654aeef5448f9b42952e699dd2c5463 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sun, 31 May 2015 10:35:28 +0100 Subject: [PATCH 0164/2091] staging: vt6655: device_rx_srv check sk_buff is NULL commit b5eeed8cb6097c8ea660b6598d36fdbb94065a22 upstream. There is a small chance that pRD->pRDInfo->skb could go NULL while the interrupt is processing. Put NULL check on loop to break out. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 9e0278ee003ff..15baacb126ad1 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -807,6 +807,10 @@ static int device_rx_srv(struct vnt_private *pDevice, unsigned int uIdx) pRD = pRD->next) { if (works++ > 15) break; + + if (!pRD->pRDInfo->skb) + break; + if (vnt_receive_frame(pDevice, pRD)) { if (!device_alloc_rx_buf(pDevice, pRD)) { dev_err(&pDevice->pcid->dev, From 92c09183c74a03d85f2184992c9b0e3308c8bb8d Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Sat, 23 May 2015 23:13:51 +0300 Subject: [PATCH 0165/2091] staging: rtl8712: prevent buffer overrun in recvbuf2recvframe commit cab462140f8a183e3cca0b51c8b59ef715cb6148 upstream. With an RTL8191SU USB adaptor, sometimes the hints for a fragmented packet are set, but the packet length is too large. Allocate enough space to prevent memory corruption and a resulting kernel panic [1]. [1] http://www.spinics.net/lists/linux-wireless/msg136546.html Signed-off-by: Haggai Eran ACKed-by: Larry Finger Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl8712_recv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8712/rtl8712_recv.c b/drivers/staging/rtl8712/rtl8712_recv.c index 50227b598e0c3..fcb8c61b28844 100644 --- a/drivers/staging/rtl8712/rtl8712_recv.c +++ b/drivers/staging/rtl8712/rtl8712_recv.c @@ -1056,7 +1056,8 @@ static int recvbuf2recvframe(struct _adapter *padapter, struct sk_buff *pskb) /* for first fragment packet, driver need allocate 1536 + * drvinfo_sz + RXDESC_SIZE to defrag packet. */ if ((mf == 1) && (frag == 0)) - alloc_sz = 1658;/*1658+6=1664, 1664 is 128 alignment.*/ + /*1658+6=1664, 1664 is 128 alignment.*/ + alloc_sz = max_t(u16, tmp_len, 1658); else alloc_sz = tmp_len; /* 2 is for IP header 4 bytes alignment in QoS packet case. From 366031c6dd1c6b57a9d48a66cdc48e21c994a1bd Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 12 Jun 2015 16:37:41 +0100 Subject: [PATCH 0166/2091] staging: comedi: cb_pcimdas: fix handlers for DI and DO subdevices commit b08ad6657aacf9b5d7c4b22de2ba891b152d0528 upstream. Normally, low-level Comedi drivers set an `insn_bits` handler for digital input (DI), digital output (DO) and digital input/output (DIO) subdevice types to handle normal reading and writing of digital channels. The "cb_pcimdas" driver currently has an `insn_read` handler for the DI subdevice and an `insn_write` handler for the DO subdevice. However, the actual handler functions `cb_pcimdas_di_insn_read()` and `cb_pcimdas_do_insn_write()` are written to behave like `insn_bits` handlers. Something's wrong there! To fix it, set the functions as `insn_bits` handlers and rename them for consistency. Fixes: e56d03dee14a ("staging: comedi: cb_pcimdas: add main connector digital input/output") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcimdas.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/comedi/drivers/cb_pcimdas.c b/drivers/staging/comedi/drivers/cb_pcimdas.c index c458e5010a745..4ebf5aae50199 100644 --- a/drivers/staging/comedi/drivers/cb_pcimdas.c +++ b/drivers/staging/comedi/drivers/cb_pcimdas.c @@ -243,7 +243,7 @@ static int cb_pcimdas_ao_insn_write(struct comedi_device *dev, return insn->n; } -static int cb_pcimdas_di_insn_read(struct comedi_device *dev, +static int cb_pcimdas_di_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) @@ -258,7 +258,7 @@ static int cb_pcimdas_di_insn_read(struct comedi_device *dev, return insn->n; } -static int cb_pcimdas_do_insn_write(struct comedi_device *dev, +static int cb_pcimdas_do_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) @@ -424,7 +424,7 @@ static int cb_pcimdas_auto_attach(struct comedi_device *dev, s->n_chan = 4; s->maxdata = 1; s->range_table = &range_digital; - s->insn_read = cb_pcimdas_di_insn_read; + s->insn_bits = cb_pcimdas_di_insn_bits; /* Digital Output subdevice (main connector) */ s = &dev->subdevices[4]; @@ -433,7 +433,7 @@ static int cb_pcimdas_auto_attach(struct comedi_device *dev, s->n_chan = 4; s->maxdata = 1; s->range_table = &range_digital; - s->insn_write = cb_pcimdas_do_insn_write; + s->insn_bits = cb_pcimdas_do_insn_bits; /* Counter subdevice (8254) */ s = &dev->subdevices[5]; From be43d21df90d10f5f10252c114f5fb024b7ba5ae Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 1 Jun 2015 16:36:27 -0700 Subject: [PATCH 0167/2091] hid-sensor: Fix suspend/resume delay commit 1e25aa9641e8f3fa39cd5e46b4afcafd7f12a44b upstream. By default all the sensors are runtime suspended state (lowest power state). During Linux suspend process, all the run time suspended devices are resumed and then suspended. This caused all sensors to power up and introduced delay in suspend time, when we introduced runtime PM for HID sensors. The opposite process happens during resume process. To fix this, we do powerup process of the sensors only when the request is issued from user (raw or tiggerred). In this way when runtime, resume calls for powerup it will simply return as this will not match user requested state. Note this is a regression fix as the increase in suspend / resume times can be substantial (report of 8 seconds on Len's laptop!) Signed-off-by: Srinivas Pandruvada Tested-by: Len Brown Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 11 ++++++++++- include/linux/hid-sensor-hub.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 610fc98f88efa..595511022795f 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -36,6 +36,8 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) s32 poll_value = 0; if (state) { + if (!atomic_read(&st->user_requested_state)) + return 0; if (sensor_hub_device_open(st->hsdev)) return -EIO; @@ -52,8 +54,12 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) poll_value = hid_sensor_read_poll_value(st); } else { - if (!atomic_dec_and_test(&st->data_ready)) + int val; + + val = atomic_dec_if_positive(&st->data_ready); + if (val < 0) return 0; + sensor_hub_device_close(st->hsdev); state_val = hid_sensor_get_usage_index(st->hsdev, st->power_state.report_id, @@ -92,9 +98,11 @@ EXPORT_SYMBOL(hid_sensor_power_state); int hid_sensor_power_state(struct hid_sensor_common *st, bool state) { + #ifdef CONFIG_PM int ret; + atomic_set(&st->user_requested_state, state); if (state) ret = pm_runtime_get_sync(&st->pdev->dev); else { @@ -109,6 +117,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) return 0; #else + atomic_set(&st->user_requested_state, state); return _hid_sensor_power_state(st, state); #endif } diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0042bf330b99f..c02b5ce6c5cdb 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -230,6 +230,7 @@ struct hid_sensor_common { struct platform_device *pdev; unsigned usage_id; atomic_t data_ready; + atomic_t user_requested_state; struct iio_trigger *trigger; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; From 2171417e120ae8825ea0ea263667e47ba9d1aa36 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 12 Jun 2015 23:45:33 -0400 Subject: [PATCH 0168/2091] ext4: fix race between truncate and __ext4_journalled_writepage() commit bdf96838aea6a265f2ae6cbcfb12a778c84a0b8e upstream. The commit cf108bca465d: "ext4: Invert the locking order of page_lock and transaction start" caused __ext4_journalled_writepage() to drop the page lock before the page was written back, as part of changing the locking order to jbd2_journal_start -> page_lock. However, this introduced a potential race if there was a truncate racing with the data=journalled writeback mode. Fix this by grabbing the page lock after starting the journal handle, and then checking to see if page had gotten truncated out from under us. This fixes a number of different warnings or BUG_ON's when running xfstests generic/086 in data=journalled mode, including: jbd2_journal_dirty_metadata: vdc-8: bad jh for block 115643: transaction (ee3fe7 c0, 164), jh->b_transaction ( (null), 0), jh->b_next_transaction ( (null), 0), jlist 0 - and - kernel BUG at /usr/projects/linux/ext4/fs/jbd2/transaction.c:2200! ... Call Trace: [] ? __ext4_journalled_invalidatepage+0x117/0x117 [] __ext4_journalled_invalidatepage+0x10f/0x117 [] ? __ext4_journalled_invalidatepage+0x117/0x117 [] ? lock_buffer+0x36/0x36 [] ext4_journalled_invalidatepage+0xd/0x22 [] do_invalidatepage+0x22/0x26 [] truncate_inode_page+0x5b/0x85 [] truncate_inode_pages_range+0x156/0x38c [] truncate_inode_pages+0x11/0x15 [] truncate_pagecache+0x55/0x71 [] ext4_setattr+0x4a9/0x560 [] ? current_kernel_time+0x10/0x44 [] notify_change+0x1c7/0x2be [] do_truncate+0x65/0x85 [] ? file_ra_state_init+0x12/0x29 - and - WARNING: CPU: 1 PID: 1331 at /usr/projects/linux/ext4/fs/jbd2/transaction.c:1396 irty_metadata+0x14a/0x1ae() ... Call Trace: [] ? console_unlock+0x3a1/0x3ce [] dump_stack+0x48/0x60 [] warn_slowpath_common+0x89/0xa0 [] ? jbd2_journal_dirty_metadata+0x14a/0x1ae [] warn_slowpath_null+0x14/0x18 [] jbd2_journal_dirty_metadata+0x14a/0x1ae [] __ext4_handle_dirty_metadata+0xd4/0x19d [] write_end_fn+0x40/0x53 [] ext4_walk_page_buffers+0x4e/0x6a [] ext4_writepage+0x354/0x3b8 [] ? mpage_release_unused_pages+0xd4/0xd4 [] ? wait_on_buffer+0x2c/0x2c [] ? ext4_writepage+0x3b8/0x3b8 [] __writepage+0x10/0x2e [] write_cache_pages+0x22d/0x32c [] ? ext4_writepage+0x3b8/0x3b8 [] ext4_writepages+0x102/0x607 [] ? sched_clock_local+0x10/0x10e [] ? __lock_is_held+0x2e/0x44 [] ? lock_is_held+0x43/0x51 [] do_writepages+0x1c/0x29 [] __writeback_single_inode+0xc3/0x545 [] writeback_sb_inodes+0x21f/0x36d ... Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0554b0b5957bb..263a46c488c73 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1701,19 +1701,32 @@ static int __ext4_journalled_writepage(struct page *page, ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); } - /* As soon as we unlock the page, it can go away, but we have - * references to buffers so we are safe */ + /* + * We need to release the page lock before we start the + * journal, so grab a reference so the page won't disappear + * out from under us. + */ + get_page(page); unlock_page(page); handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, ext4_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out; + put_page(page); + goto out_no_pagelock; } - BUG_ON(!ext4_handle_valid(handle)); + lock_page(page); + put_page(page); + if (page->mapping != mapping) { + /* The page got truncated from under us */ + ext4_journal_stop(handle); + ret = 0; + goto out; + } + if (inline_data) { BUFFER_TRACE(inode_bh, "get write access"); ret = ext4_journal_get_write_access(handle, inode_bh); @@ -1739,6 +1752,8 @@ static int __ext4_journalled_writepage(struct page *page, NULL, bput_one); ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: + unlock_page(page); +out_no_pagelock: brelse(inode_bh); return ret; } From 1d54c6bb0c889f29f829dd400b5af3e5874fbfa0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 20 Jun 2015 22:50:33 -0400 Subject: [PATCH 0169/2091] ext4: call sync_blockdev() before invalidate_bdev() in put_super() commit 89d96a6f8e6491f24fc8f99fd6ae66820e85c6c1 upstream. Normally all of the buffers will have been forced out to disk before we call invalidate_bdev(), but there will be some cases, where a file system operation was aborted due to an ext4_error(), where there may still be some dirty buffers in the buffer cache for the device. So try to force them out to memory before calling invalidate_bdev(). This fixes a warning triggered by generic/081: WARNING: CPU: 1 PID: 3473 at /usr/projects/linux/ext4/fs/block_dev.c:56 __blkdev_put+0xb5/0x16f() Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ca9d4a2fed415..3ee91461aac27 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -807,6 +807,7 @@ static void ext4_put_super(struct super_block *sb) dump_orphan_list(sb, sbi); J_ASSERT(list_empty(&sbi->s_orphan)); + sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { /* From c632b53d850c401b63d5a308b2faafa98d8140af Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 21 Jun 2015 21:10:51 -0400 Subject: [PATCH 0170/2091] ext4: don't retry file block mapping on bigalloc fs with non-extent file commit 292db1bc6c105d86111e858859456bcb11f90f91 upstream. ext4 isn't willing to map clusters to a non-extent file. Don't signal this with an out of space error, since the FS will retry the allocation (which didn't fail) forever. Instead, return EUCLEAN so that the operation will fail immediately all the way back to userspace. (The fix is either to run e2fsck -E bmap2extent, or to chattr +e the file.) Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/indirect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 9588240195090..94ae6874c2cb8 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); - return -ENOSPC; + return -EUCLEAN; } /* Set up for the direct block allocation */ From 1c1964376ab16bc80dc24db2972a9d8f9120c9a4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 23 Jun 2015 11:03:54 -0400 Subject: [PATCH 0171/2091] ext4: set lazytime on remount if MS_LAZYTIME is set by mount commit a2fd66d069d86d793e9d39d4079b96f46d13f237 upstream. Newer versions of mount parse the lazytime feature and pass it to the mount system call via the flags field in the mount system call, removing the lazytime string from the mount options list. So we need to check for the presence of MS_LAZYTIME and set it in sb->s_flags in order for this flag to be set on a remount. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3ee91461aac27..ca12affdba961 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4944,6 +4944,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + if (*flags & MS_LAZYTIME) + sb->s_flags |= MS_LAZYTIME; + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; From ae82e119e810848c6596031c1bed8c9a38e70322 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Jul 2015 23:37:46 -0400 Subject: [PATCH 0172/2091] ext4: fix fencepost error in lazytime optimization commit 0f0ff9a9f3fa2ec6f427603fd521d5f3a0b076d1 upstream. Commit 8f4d8558391: "ext4: fix lazytime optimization" was not a complete fix. In the case where the inode number is a multiple of 16, and we could still end up updating an inode with dirty timestamps written to the wrong inode on disk. Oops. This can be easily reproduced by using generic/005 with a file system with metadata_csum and lazytime enabled. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 263a46c488c73..c7238fa503f20 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4360,7 +4360,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; From 1fdc8c7f1e7d853644c0ad5661ae2e6dfab7baa2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:32:44 -0400 Subject: [PATCH 0173/2091] bufferhead: Add _gfp version for sb_getblk() commit bd7ade3cd9b0850264306f5c2b79024a417b6396 upstream. sb_getblk() is used during ext4 (and possibly other FSes) writeback paths. Sometimes such path require allocating memory and guaranteeing that such allocation won't block. Currently, however, there is no way to provide user flags for sb_getblk which could lead to deadlocks. This patch implements a sb_getblk_gfp with the only difference it can accept user-provided GFP flags. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- include/linux/buffer_head.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca1..e6797ded700ec 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { From ad7f8a81e1cf1a999f633c08cee61975daceff08 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:34:07 -0400 Subject: [PATCH 0174/2091] ext4: avoid deadlocks in the writeback path by using sb_getblk_gfp commit c45653c341f5c8a0ce19c8f0ad4678640849cb86 upstream. Switch ext4 to using sb_getblk_gfp with GFP_NOFS added to fix possible deadlocks in the page writeback path. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e003a1e81dc35..87ba10d1d3bcc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); From 864c38b9d91bc101c804cb604b74c4edda76c377 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 3 Jul 2015 21:13:55 -0400 Subject: [PATCH 0175/2091] ext4: fix reservation release on invalidatepage for delalloc fs commit 9705acd63b125dee8b15c705216d7186daea4625 upstream. On delalloc enabled file system on invalidatepage operation in ext4_da_page_release_reservation() we want to clear the delayed buffer and remove the extent covering the delayed buffer from the extent status tree. However currently there is a bug where on the systems with page size > block size we will always remove extents from the start of the page regardless where the actual delayed buffers are positioned in the page. This leads to the errors like this: EXT4-fs warning (device loop0): ext4_da_release_space:1225: ext4_da_release_space: ino 13, to_free 1 with only 0 reserved data blocks This however can cause data loss on writeback time if the file system is in ENOSPC condition because we're releasing reservation for someones else delayed buffer. Fix this by only removing extents that corresponds to the part of the page we want to invalidate. This problem is reproducible by the following fio receipt (however I was only able to reproduce it with fio-2.1 or older. [global] bs=8k iodepth=1024 iodepth_batch=60 randrepeat=1 size=1m directory=/mnt/test numjobs=20 [job1] ioengine=sync bs=1k direct=1 rw=randread filename=file1:file2 [job2] ioengine=libaio rw=randwrite direct=1 filename=file1:file2 [job3] bs=1k ioengine=posixaio rw=randwrite direct=1 filename=file1:file2 [job5] bs=1k ioengine=sync rw=randread filename=file1:file2 [job7] ioengine=libaio rw=randwrite filename=file1:file2 [job8] ioengine=posixaio rw=randwrite filename=file1:file2 [job10] ioengine=mmap rw=randwrite bs=1k filename=file1:file2 [job11] ioengine=mmap rw=randwrite direct=1 filename=file1:file2 Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7238fa503f20..966c614822cc1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1342,7 +1342,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1363,14 +1363,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we From c23d1fbb672464b34894305a70268fcd52eec683 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 3 Jul 2015 23:56:50 -0400 Subject: [PATCH 0176/2091] ext4: be more strict when migrating to non-extent based file commit d6f123a9297496ad0b6335fe881504c4b5b2a5e5 upstream. Currently the check in ext4_ind_migrate() is not enough before doing the real conversion: a) delayed allocated extents could bypass the check on eh->eh_entries and eh->eh_depth This can be demonstrated by this script xfs_io -fc "pwrite 0 4k" -c "pwrite 8k 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile where testfile has two extents but still be converted to non-extent based file format. b) only extent length is checked but not the offset, which would result in data lose (delalloc) or fs corruption (nodelalloc), because non-extent based file only supports at most (12 + 2^10 + 2^20 + 2^30) blocks This can be demostrated by xfs_io -fc "pwrite 5T 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile sync If delalloc is enabled, dmesg prints EXT4-fs warning (device dm-4): ext4_block_to_path:105: block 1342177280 > max in inode 53 EXT4-fs (dm-4): Delayed block allocation failed for inode 53 at logical offset 1342177280 with max blocks 1 with error 5 EXT4-fs (dm-4): This should not happen!! Data will be lost If delalloc is disabled, e2fsck -nf shows corruption Inode 53, i_size is 5497558142976, should be 4096. Fix? no Fix the two issues by a) forcing all delayed allocation blocks to be allocated before checking eh->eh_depth and eh->eh_entries b) limiting the last logical block of the extent is within direct map Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b52374e421022..6d8b0c9173644 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -654,7 +663,8 @@ int ext4_ind_migrate(struct inode *inode) else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + end = le32_to_cpu(ex->ee_block) + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } From 8591fac57a6cf0c98269c9525e43c17b76835068 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sat, 4 Jul 2015 00:03:44 -0400 Subject: [PATCH 0177/2091] ext4: correctly migrate a file with a hole at the beginning commit 8974fec7d72e3e02752fe0f27b4c3719c78d9a15 upstream. Currently ext4_ind_migrate() doesn't correctly handle a file which contains a hole at the beginning of the file. This caused the migration to be done incorrectly, and then if there is a subsequent following delayed allocation write to the "hole", this would reclaim the same data blocks again and results in fs corruption. # assmuing 4k block size ext4, with delalloc enabled # skip the first block and write to the second block xfs_io -fc "pwrite 4k 4k" -c "fsync" /mnt/ext4/testfile # converting to indirect-mapped file, which would move the data blocks # to the beginning of the file, but extent status cache still marks # that region as a hole chattr -e /mnt/ext4/testfile # delayed allocation writes to the "hole", reclaim the same data block # again, results in i_blocks corruption xfs_io -c "pwrite 0 4k" /mnt/ext4/testfile umount /mnt/ext4 e2fsck -nf /dev/sda6 ... Inode 53, i_blocks is 16, should be 8. Fix? no ... Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6d8b0c9173644..6163ad21cb0ef 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,7 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; - ext4_lblk_t end; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -659,11 +659,12 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - end = le32_to_cpu(ex->ee_block) + len - 1; + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; @@ -672,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: From a345e343e0f6cccbb8d3891044b94d1d66407a11 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sun, 5 Jul 2015 12:33:44 -0400 Subject: [PATCH 0178/2091] ext4: replace open coded nofail allocation in ext4_free_blocks() commit 7444a072c387a93ebee7066e8aee776954ab0e41 upstream. ext4_free_blocks is looping around the allocation request and mimics __GFP_NOFAIL behavior without any allocation fallback strategy. Let's remove the open coded loop and replace it with __GFP_NOFAIL. Without the flag the allocator has no way to find out never-fail requirement and cannot help in any way. Signed-off-by: Michal Hocko Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8d1e60214ef0a..41260489d3bcd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4800,18 +4800,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; From 14624906f5f4356dd69e62b2ed671275cb3a0ed3 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 15 Jun 2015 00:18:02 -0400 Subject: [PATCH 0179/2091] jbd2: use GFP_NOFS in jbd2_cleanup_journal_tail() commit b4f1afcd068f6e533230dfed00782cd8a907f96b upstream. jbd2_cleanup_journal_tail() can be invoked by jbd2__journal_start() So allocations should be done with GFP_NOFS [Full stack trace snipped from 3.10-rh7] [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x61/0x80 [] warn_slowpath_null+0x1a/0x20 [] slab_pre_alloc_hook.isra.31.part.32+0x15/0x17 [] kmem_cache_alloc+0x55/0x210 [] ? mempool_alloc_slab+0x15/0x20 [] mempool_alloc_slab+0x15/0x20 [] mempool_alloc+0x69/0x170 [] ? _raw_spin_unlock_irq+0xe/0x20 [] ? finish_task_switch+0x5d/0x150 [] bio_alloc_bioset+0x1be/0x2e0 [] blkdev_issue_flush+0x99/0x120 [] jbd2_cleanup_journal_tail+0x93/0xa0 [jbd2] -->GFP_KERNEL [] jbd2_log_do_checkpoint+0x221/0x4a0 [jbd2] [] __jbd2_log_wait_for_space+0xa7/0x1e0 [jbd2] [] start_this_handle+0x2d8/0x550 [jbd2] [] ? __memcg_kmem_put_cache+0x29/0x30 [] ? kmem_cache_alloc+0x130/0x210 [] jbd2__journal_start+0xba/0x190 [jbd2] [] ? lru_cache_add+0xe/0x10 [] ? ext4_da_write_begin+0xf9/0x330 [ext4] [] __ext4_journal_start_sb+0x77/0x160 [ext4] [] ext4_da_write_begin+0xf9/0x330 [ext4] [] generic_file_buffered_write_iter+0x10c/0x270 [] __generic_file_write_iter+0x178/0x390 [] __generic_file_aio_write+0x8b/0xb0 [] generic_file_aio_write+0x5d/0xc0 [] ext4_file_write+0xa9/0x450 [ext4] [] ? pipe_read+0x379/0x4f0 [] do_sync_write+0x90/0xe0 [] vfs_write+0xbd/0x1e0 [] SyS_write+0x58/0xb0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Dmitry Monakhov Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 988b32ed4c873..6b7b73afef81e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -405,7 +405,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * jbd2_cleanup_journal_tail() doesn't get called all that often. */ if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); __jbd2_update_log_tail(journal, first_tid, blocknr); return 0; From 99d0ebdb4b9811ebe8e89ab07480b7836b4aa87a Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 15 Jun 2015 14:36:01 -0400 Subject: [PATCH 0180/2091] jbd2: fix ocfs2 corrupt when updating journal superblock fails commit 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a upstream. If updating journal superblock fails after journal data has been flushed, the error is omitted and this will mislead the caller as a normal case. In ocfs2, the checkpoint will be treated successfully and the other node can get the lock to update. Since the sb_start is still pointing to the old log block, it will rewrite the journal data during journal recovery by the other node. Thus the new updates will be overwritten and ocfs2 corrupts. So in above case we have to return the error, and ocfs2_commit_cache will take care of the error and prevent the other node to do update first. And only after recovering journal it can do the new updates. The issue discussion mail can be found at: https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html http://comments.gmane.org/gmane.comp.file-systems.ext4/48841 [ Fixed bug in patch which allowed a non-negative error return from jbd2_cleanup_journal_tail() to leak out of jbd2_fjournal_flush(); this was causing xfstests ext4/306 to fail. -- Ted ] Reported-by: Yiwen Jiang Signed-off-by: Joseph Qi Signed-off-by: Theodore Ts'o Tested-by: Yiwen Jiang Cc: Junxiao Bi Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/checkpoint.c | 5 ++--- fs/jbd2/journal.c | 38 +++++++++++++++++++++++++++++++------- include/linux/jbd2.h | 4 ++-- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6b7b73afef81e..4227dc4f7437d 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) unsigned long blocknr; if (is_journal_aborted(journal)) - return 1; + return -EIO; if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) return 1; @@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); - __jbd2_update_log_tail(journal, first_tid, blocknr); - return 0; + return __jbd2_update_log_tail(journal, first_tid, blocknr); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b96bd8076b706..112fad9e1e20a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, * * Requires j_checkpoint_mutex */ -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) { unsigned long freed; + int ret; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); @@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + if (ret) + goto out; + write_lock(&journal->j_state_lock); freed = block - journal->j_tail; if (block < journal->j_tail) @@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) journal->j_tail_sequence = tid; journal->j_tail = block; write_unlock(&journal->j_state_lock); + +out: + return ret; } /* @@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -static void jbd2_write_superblock(journal_t *journal, int write_op) +static int jbd2_write_superblock(journal_t *journal, int write_op) { struct buffer_head *bh = journal->j_sb_buffer; journal_superblock_t *sb = journal->j_superblock; @@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) printk(KERN_ERR "JBD2: Error %d detected when updating " "journal superblock for %s.\n", ret, journal->j_devname); + jbd2_journal_abort(journal, ret); } + + return ret; } /** @@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) * Update a journal's superblock information about log tail and write it to * disk, waiting for the IO to complete. */ -void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, +int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, unsigned long tail_block, int write_op) { journal_superblock_t *sb = journal->j_superblock; + int ret; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", @@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, sb->s_sequence = cpu_to_be32(tail_tid); sb->s_start = cpu_to_be32(tail_block); - jbd2_write_superblock(journal, write_op); + ret = jbd2_write_superblock(journal, write_op); + if (ret) + goto out; /* Log is no longer empty */ write_lock(&journal->j_state_lock); WARN_ON(!sb->s_sequence); journal->j_flags &= ~JBD2_FLUSHED; write_unlock(&journal->j_state_lock); + +out: + return ret; } /** @@ -1950,7 +1966,14 @@ int jbd2_journal_flush(journal_t *journal) return -EIO; mutex_lock(&journal->j_checkpoint_mutex); - jbd2_cleanup_journal_tail(journal); + if (!err) { + err = jbd2_cleanup_journal_tail(journal); + if (err < 0) { + mutex_unlock(&journal->j_checkpoint_mutex); + goto out; + } + err = 0; + } /* Finally, mark the journal as really needing no recovery. * This sets s_start==0 in the underlying superblock, which is @@ -1966,7 +1989,8 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); write_unlock(&journal->j_state_lock); - return 0; +out: + return err; } /** diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 20e7f78041c81..edb640ae9a948 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); /* Commit management */ @@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); -extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, +extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, int); extern void __jbd2_journal_abort_hard (journal_t *); extern void jbd2_journal_abort (journal_t *, int); From 25cd02f381876793dc5acd14d87f0dffccad1973 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Sat, 6 Jun 2015 13:16:42 +0200 Subject: [PATCH 0181/2091] NFC: st21nfcb: Remove inappropriate kfree on a devm_kzalloc pointer commit 38bd83f04c5e9695011dc5c294e0c4e6a9f9052d upstream. Since ndev->driver_data is allocated by devm_kzalloc(), we do not need the inappropriate kfree to free it in driver's remove function. Freeing will trigger when driver unloads. Acked-by: Christophe Ricard Signed-off-by: Firo Yang Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/st21nfcb/st21nfcb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nfc/st21nfcb/st21nfcb.c b/drivers/nfc/st21nfcb/st21nfcb.c index ca9871ab3fb3c..c7dc282d5c3be 100644 --- a/drivers/nfc/st21nfcb/st21nfcb.c +++ b/drivers/nfc/st21nfcb/st21nfcb.c @@ -131,11 +131,8 @@ EXPORT_SYMBOL_GPL(st21nfcb_nci_probe); void st21nfcb_nci_remove(struct nci_dev *ndev) { - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - nci_unregister_device(ndev); nci_free_device(ndev); - kfree(info); } EXPORT_SYMBOL_GPL(st21nfcb_nci_remove); From fff9252c7de221c9dc0c9b7e49815e1ebb15f0aa Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sat, 6 Jun 2015 13:16:43 +0200 Subject: [PATCH 0182/2091] NFC: st21nfcb: Do not remove header once the payload is sent commit 09f39a950523b1bb830c30a8670b77e0067da092 upstream. Once the data is sent, we need to preserve the full frame for the ndlc state machine. If the NDLC ACK is not received in time, the ndlc layer will resend the same frame. Having the header byte pulled will corrupt the frame. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/st21nfcb/i2c.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c index 76a4cad41cec9..4bc15ec29b03a 100644 --- a/drivers/nfc/st21nfcb/i2c.c +++ b/drivers/nfc/st21nfcb/i2c.c @@ -87,11 +87,6 @@ static void st21nfcb_nci_i2c_disable(void *phy_id) gpio_set_value(phy->gpio_reset, 1); } -static void st21nfcb_nci_remove_header(struct sk_buff *skb) -{ - skb_pull(skb, ST21NFCB_FRAME_HEADROOM); -} - /* * Writing a frame must not return the number of written bytes. * It must return either zero for success, or <0 for error. @@ -121,8 +116,6 @@ static int st21nfcb_nci_i2c_write(void *phy_id, struct sk_buff *skb) r = 0; } - st21nfcb_nci_remove_header(skb); - return r; } From 0268313777f9ff4c61adfee1b726dbde2f68ed44 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sat, 6 Jun 2015 13:16:44 +0200 Subject: [PATCH 0183/2091] NFC: st21nfcb: remove st21nfcb_nci_i2c_disable commit 4ac82e894825126816d7b7f662743335ce2b015e upstream. ndlc_remove already calls st21nfcb_nci_i2c_disable and phy->powered is already set to 0. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/st21nfcb/i2c.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c index 4bc15ec29b03a..c44f8cf5391a6 100644 --- a/drivers/nfc/st21nfcb/i2c.c +++ b/drivers/nfc/st21nfcb/i2c.c @@ -359,9 +359,6 @@ static int st21nfcb_nci_i2c_remove(struct i2c_client *client) ndlc_remove(phy->ndlc); - if (phy->powered) - st21nfcb_nci_i2c_disable(phy); - return 0; } From 29c1b3c87b50260525575d260efd6af66a25bf4d Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 21 May 2015 17:29:35 +0200 Subject: [PATCH 0184/2091] rtc: snvs: fix wakealarm by call enable_irq_wake earlier commit 119434f44c78df8c4b6d67f835448542a4bd7e91 upstream. When entering suspend while an wakeup alarm is set, enable_set_wake should make sure that the RTC interrupt keep being enabled and the .irq_set_wake for the RTC interrupt get called. However, since the driver uses the suspend_noirq callback, the call to enable_irq_wake has been made after disabling the interrupts. While .irq_set_wake has been called properly, the interrupt remained disabled. Use the suspend callback to call enable_irq_wake early enough to ensure the RTC interrupt remains enabled. Fixes: 7654e9d4fd8f ("drivers/rtc/rtc-snvs: fix suspend/resume") Signed-off-by: Stefan Agner Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-snvs.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 0479e807a776a..d87a85cefb665 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -322,6 +322,13 @@ static int snvs_rtc_suspend(struct device *dev) if (device_may_wakeup(dev)) enable_irq_wake(data->irq); + return 0; +} + +static int snvs_rtc_suspend_noirq(struct device *dev) +{ + struct snvs_rtc_data *data = dev_get_drvdata(dev); + if (data->clk) clk_disable_unprepare(data->clk); @@ -331,23 +338,28 @@ static int snvs_rtc_suspend(struct device *dev) static int snvs_rtc_resume(struct device *dev) { struct snvs_rtc_data *data = dev_get_drvdata(dev); - int ret; if (device_may_wakeup(dev)) - disable_irq_wake(data->irq); + return disable_irq_wake(data->irq); - if (data->clk) { - ret = clk_prepare_enable(data->clk); - if (ret) - return ret; - } + return 0; +} + +static int snvs_rtc_resume_noirq(struct device *dev) +{ + struct snvs_rtc_data *data = dev_get_drvdata(dev); + + if (data->clk) + return clk_prepare_enable(data->clk); return 0; } static const struct dev_pm_ops snvs_rtc_pm_ops = { - .suspend_noirq = snvs_rtc_suspend, - .resume_noirq = snvs_rtc_resume, + .suspend = snvs_rtc_suspend, + .suspend_noirq = snvs_rtc_suspend_noirq, + .resume = snvs_rtc_resume, + .resume_noirq = snvs_rtc_resume_noirq, }; #define SNVS_RTC_PM_OPS (&snvs_rtc_pm_ops) From 790d203b212fe6c73564bfc1258d7993692f9fc6 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 9 Jun 2015 18:22:14 +0200 Subject: [PATCH 0185/2091] i2c: at91: fix a race condition when using the DMA controller commit 93563a6a71bb69dd324fc7354c60fb05f84aae6b upstream. For TX transactions, the TXCOMP bit in the Status Register is cleared when the first data is written into the Transmit Holding Register. In the lines from at91_do_twi_transfer(): at91_twi_write_data_dma(dev); at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); the TXCOMP interrupt may be enabled before the DMA controller has actually started to write into the THR. In such a case, the TXCOMP bit is still set into the Status Register so the interrupt is triggered immediately. The driver understands that a transaction completion has occurred but this transaction hasn't started yet. Hence the TXCOMP interrupt is no longer enabled by at91_do_twi_transfer() but instead by at91_twi_write_data_dma_callback(). Also, the TXCOMP bit in the Status Register in not a clear on read flag but a snapshot of the transmission state at the time the Status Register is read. When a NACK error is dectected by the I2C controller, the TXCOMP, NACK and TXRDY bits are set together to 1 in the SR. If enabled, the TXCOMP interrupt is triggered at the same time. Also setting the TXRDY to 1 triggers the DMA controller to write the next data into the THR. Such a write resets the TXCOMP bit to 0 in the SR. So depending on when the interrupt handler reads the SR, it may fail to detect the NACK error if it relies on the TXCOMP bit. The NACK bit and its interrupt should be used instead. For RX transactions, the TXCOMP bit in the Status Register is cleared when the START bit is set into the Control Register. However to unify the management of the TXCOMP bit when the DMA controller is used, the TXCOMP interrupt is now enabled by the DMA callbacks for both TX and RX transfers. Signed-off-by: Cyrille Pitchen Acked-by: Ludovic Desroches Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-at91.c | 70 ++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index ff23d1bdd2307..9bd10a9b4b50b 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -65,6 +65,9 @@ #define AT91_TWI_UNRE 0x0080 /* Underrun Error */ #define AT91_TWI_NACK 0x0100 /* Not Acknowledged */ +#define AT91_TWI_INT_MASK \ + (AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY | AT91_TWI_NACK) + #define AT91_TWI_IER 0x0024 /* Interrupt Enable Register */ #define AT91_TWI_IDR 0x0028 /* Interrupt Disable Register */ #define AT91_TWI_IMR 0x002c /* Interrupt Mask Register */ @@ -119,13 +122,12 @@ static void at91_twi_write(struct at91_twi_dev *dev, unsigned reg, unsigned val) static void at91_disable_twi_interrupts(struct at91_twi_dev *dev) { - at91_twi_write(dev, AT91_TWI_IDR, - AT91_TWI_TXCOMP | AT91_TWI_RXRDY | AT91_TWI_TXRDY); + at91_twi_write(dev, AT91_TWI_IDR, AT91_TWI_INT_MASK); } static void at91_twi_irq_save(struct at91_twi_dev *dev) { - dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & 0x7; + dev->imr = at91_twi_read(dev, AT91_TWI_IMR) & AT91_TWI_INT_MASK; at91_disable_twi_interrupts(dev); } @@ -215,6 +217,14 @@ static void at91_twi_write_data_dma_callback(void *data) dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg), dev->buf_len, DMA_TO_DEVICE); + /* + * When this callback is called, THR/TX FIFO is likely not to be empty + * yet. So we have to wait for TXCOMP or NACK bits to be set into the + * Status Register to be sure that the STOP bit has been sent and the + * transfer is completed. The NACK interrupt has already been enabled, + * we just have to enable TXCOMP one. + */ + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); } @@ -309,7 +319,7 @@ static void at91_twi_read_data_dma_callback(void *data) /* The last two bytes have to be read without using dma */ dev->buf += dev->buf_len - 2; dev->buf_len = 2; - at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_RXRDY); + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_RXRDY | AT91_TWI_TXCOMP); } static void at91_twi_read_data_dma(struct at91_twi_dev *dev) @@ -370,7 +380,7 @@ static irqreturn_t atmel_twi_interrupt(int irq, void *dev_id) /* catch error flags */ dev->transfer_status |= status; - if (irqstatus & AT91_TWI_TXCOMP) { + if (irqstatus & (AT91_TWI_TXCOMP | AT91_TWI_NACK)) { at91_disable_twi_interrupts(dev); complete(&dev->cmd_complete); } @@ -384,6 +394,34 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) unsigned long time_left; bool has_unre_flag = dev->pdata->has_unre_flag; + /* + * WARNING: the TXCOMP bit in the Status Register is NOT a clear on + * read flag but shows the state of the transmission at the time the + * Status Register is read. According to the programmer datasheet, + * TXCOMP is set when both holding register and internal shifter are + * empty and STOP condition has been sent. + * Consequently, we should enable NACK interrupt rather than TXCOMP to + * detect transmission failure. + * + * Besides, the TXCOMP bit is already set before the i2c transaction + * has been started. For read transactions, this bit is cleared when + * writing the START bit into the Control Register. So the + * corresponding interrupt can safely be enabled just after. + * However for write transactions managed by the CPU, we first write + * into THR, so TXCOMP is cleared. Then we can safely enable TXCOMP + * interrupt. If TXCOMP interrupt were enabled before writing into THR, + * the interrupt handler would be called immediately and the i2c command + * would be reported as completed. + * Also when a write transaction is managed by the DMA controller, + * enabling the TXCOMP interrupt in this function may lead to a race + * condition since we don't know whether the TXCOMP interrupt is enabled + * before or after the DMA has started to write into THR. So the TXCOMP + * interrupt is enabled later by at91_twi_write_data_dma_callback(). + * Immediately after in that DMA callback, we still need to send the + * STOP condition manually writing the corresponding bit into the + * Control Register. + */ + dev_dbg(dev->dev, "transfer: %s %d bytes.\n", (dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len); @@ -414,26 +452,24 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) * seems to be the best solution. */ if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) { + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_NACK); at91_twi_read_data_dma(dev); - /* - * It is important to enable TXCOMP irq here because - * doing it only when transferring the last two bytes - * will mask NACK errors since TXCOMP is set when a - * NACK occurs. - */ - at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP); - } else + } else { at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP | AT91_TWI_RXRDY); + AT91_TWI_TXCOMP | + AT91_TWI_NACK | + AT91_TWI_RXRDY); + } } else { if (dev->use_dma && (dev->buf_len > AT91_I2C_DMA_THRESHOLD)) { + at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_NACK); at91_twi_write_data_dma(dev); - at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); } else { at91_twi_write_next_byte(dev); at91_twi_write(dev, AT91_TWI_IER, - AT91_TWI_TXCOMP | AT91_TWI_TXRDY); + AT91_TWI_TXCOMP | + AT91_TWI_NACK | + AT91_TWI_TXRDY); } } From f7c70cbf6bfff389b8d4448c801997a6181327dc Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 12 Jun 2015 14:40:37 +0200 Subject: [PATCH 0186/2091] i2c: mux: Use __i2c_transfer() instead of calling parent's master_xfer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e766f338a74200b8104b1165776b19f56e252834 upstream. Newly introduced quirks infrastructure doesn't work for the devices behind MUXes because MUX's master_xfer() calls parent's master_xfer() directly without checking the quirks. Instead of duplicating check code in MUX just call __i2c_transfer() instead. This has a side effect on tracing (messages will appear on both MUX bus and parent bus), but maybe that's not bad at the end. Signed-off-by: Alexander Sverdlin Tested-by: Łukasz Gemborowski Signed-off-by: Wolfram Sang Fixes: b7f625840267b1 ("i2c: add quirk checks to core") Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 06cc1ff088f12..98dd5d4cf39eb 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -51,7 +51,7 @@ static int i2c_mux_master_xfer(struct i2c_adapter *adap, ret = priv->select(parent, priv->mux_priv, priv->chan_id); if (ret >= 0) - ret = parent->algo->master_xfer(parent, msgs, num); + ret = __i2c_transfer(parent, msgs, num); if (priv->deselect) priv->deselect(parent, priv->mux_priv, priv->chan_id); From ab33fd4b630206d0d7b55c6df9acdb4623c1ec5b Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 12 Jun 2015 14:41:00 +0200 Subject: [PATCH 0187/2091] i2c: mux: pca954x: Use __i2c_transfer because of quirks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0a8237ae319ab5988d40a7a9b33d68846aae34b4 upstream. pca9541 and pca954x are calling master_xfer() of the parent adapter directly thus bypassing the quirks checks of the adapter. Use __i2c_transfer() instead. Signed-off-by: Alexander Sverdlin Tested-by: Łukasz Gemborowski Acked-by: Laurent Pinchart Reviewed-by: Jisheng Zhang Signed-off-by: Wolfram Sang Fixes: b7f625840267b1 ("i2c: add quirk checks to core") Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/muxes/i2c-mux-pca9541.c | 4 ++-- drivers/i2c/muxes/i2c-mux-pca954x.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c index cb772775da431..0c8d4d2cbdaf4 100644 --- a/drivers/i2c/muxes/i2c-mux-pca9541.c +++ b/drivers/i2c/muxes/i2c-mux-pca9541.c @@ -104,7 +104,7 @@ static int pca9541_reg_write(struct i2c_client *client, u8 command, u8 val) buf[0] = command; buf[1] = val; msg.buf = buf; - ret = adap->algo->master_xfer(adap, &msg, 1); + ret = __i2c_transfer(adap, &msg, 1); } else { union i2c_smbus_data data; @@ -144,7 +144,7 @@ static int pca9541_reg_read(struct i2c_client *client, u8 command) .buf = &val } }; - ret = adap->algo->master_xfer(adap, msg, 2); + ret = __i2c_transfer(adap, msg, 2); if (ret == 2) ret = val; else if (ret >= 0) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index bea0d2de29938..ea4aa9dfcea96 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -134,7 +134,7 @@ static int pca954x_reg_write(struct i2c_adapter *adap, msg.len = 1; buf[0] = val; msg.buf = buf; - ret = adap->algo->master_xfer(adap, &msg, 1); + ret = __i2c_transfer(adap, &msg, 1); } else { union i2c_smbus_data data; ret = adap->algo->smbus_xfer(adap, client->addr, From a6edb0fca0fa693468a340a8200efdfa7e9a4f9b Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 12 Jun 2015 14:41:16 +0200 Subject: [PATCH 0188/2091] i2c: use parent adapter quirks in mux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dc362d50ba94eaf2b1f11eecd81eb1d040d2d6e6 upstream. Inherit parent adapter quirks in MUX in case the devices on the multiplexed buses are interested in the adapter limitations. Signed-off-by: Łukasz Gemborowski Signed-off-by: Alexander Sverdlin Signed-off-by: Wolfram Sang Fixes: b7f625840267b1 ("i2c: add quirk checks to core") Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-mux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 98dd5d4cf39eb..2ba7c0fbc6150 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -144,6 +144,7 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, priv->adap.dev.parent = &parent->dev; priv->adap.retries = parent->retries; priv->adap.timeout = parent->timeout; + priv->adap.quirks = parent->quirks; /* Sanity check on class */ if (i2c_mux_parent_classes(parent) & class) From 19d75d2ca962e8a7d3e2351f9062bd7093e0941f Mon Sep 17 00:00:00 2001 From: Hartmut Knaack Date: Thu, 18 Jun 2015 00:31:59 +0200 Subject: [PATCH 0189/2091] iio:light:cm3323: clear bitmask before set commit c288503b32e8c5534062a05ec565d28bffa06db3 upstream. When setting the bits for integration time, the appropriate bitmask needs to be cleared first. Signed-off-by: Hartmut Knaack Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/cm3323.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/cm3323.c b/drivers/iio/light/cm3323.c index 869033e48a1fa..a1d4905cc9d2d 100644 --- a/drivers/iio/light/cm3323.c +++ b/drivers/iio/light/cm3323.c @@ -123,7 +123,7 @@ static int cm3323_set_it_bits(struct cm3323_data *data, int val, int val2) for (i = 0; i < ARRAY_SIZE(cm3323_int_time); i++) { if (val == cm3323_int_time[i].val && val2 == cm3323_int_time[i].val2) { - reg_conf = data->reg_conf; + reg_conf = data->reg_conf & ~CM3323_CONF_IT_MASK; reg_conf |= i << CM3323_CONF_IT_SHIFT; ret = i2c_smbus_write_word_data(data->client, From 62d59f98d13440855e140a68ba6996fd3ff89b50 Mon Sep 17 00:00:00 2001 From: Hartmut Knaack Date: Sun, 21 Jun 2015 12:15:50 +0200 Subject: [PATCH 0190/2091] iio:adc:cc10001_adc: fix Kconfig dependency commit b2b3c3dc6a7bef886850920f5f5dca041b443aa0 upstream. The Cosmic Circuits 10001 ADC driver depends on HAS_IOMEM, HAVE_CLK and REGULATOR together, not just any of these. Signed-off-by: Hartmut Knaack Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index e36a73e7c3a85..1bcb65b8d4a1a 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -146,8 +146,7 @@ config DA9150_GPADC config CC10001_ADC tristate "Cosmic Circuits 10001 ADC driver" - depends on HAVE_CLK || REGULATOR - depends on HAS_IOMEM + depends on HAS_IOMEM && HAVE_CLK && REGULATOR select IIO_BUFFER select IIO_TRIGGERED_BUFFER help From 5354bc06756b59dc89d6ce24658e747e6753a926 Mon Sep 17 00:00:00 2001 From: Hartmut Knaack Date: Mon, 15 Jun 2015 23:48:24 +0200 Subject: [PATCH 0191/2091] iio:accel:bmc150-accel: fix counting direction commit 7a1d0d91c94305fa5802a53df3a54c0ea1963c48 upstream. In bmc150_accel_unregister_triggers() triggers should be unregistered in reverse order of registration. Trigger registration starts with number 0, counting up. In consequence, trigger number needs to be count down here. Signed-off-by: Hartmut Knaack Reviewed-by: Octavian Purdila Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/bmc150-accel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/bmc150-accel.c b/drivers/iio/accel/bmc150-accel.c index 73e87739d2191..bf827d012a714 100644 --- a/drivers/iio/accel/bmc150-accel.c +++ b/drivers/iio/accel/bmc150-accel.c @@ -1465,7 +1465,7 @@ static void bmc150_accel_unregister_triggers(struct bmc150_accel_data *data, { int i; - for (i = from; i >= 0; i++) { + for (i = from; i >= 0; i--) { if (data->triggers[i].indio_trig) { iio_trigger_unregister(data->triggers[i].indio_trig); data->triggers[i].indio_trig = NULL; From cfdbaedd8da7b45b0043b709f688663edfab49f2 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Sun, 14 Jun 2015 23:09:35 +0200 Subject: [PATCH 0192/2091] iio: light: tcs3414: Fix bug preventing to set integration time commit 33361e5678a541f82f29f85467d589e7bf8da76b upstream. the millisecond values in tcs3414_times should be checked against val2, not val, which is always zero. Signed-off-by: Peter Meerwald Reported-by: Stephan Kleisinger Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/tcs3414.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/tcs3414.c b/drivers/iio/light/tcs3414.c index 71c2bde275aac..f8b1df018abee 100644 --- a/drivers/iio/light/tcs3414.c +++ b/drivers/iio/light/tcs3414.c @@ -185,7 +185,7 @@ static int tcs3414_write_raw(struct iio_dev *indio_dev, if (val != 0) return -EINVAL; for (i = 0; i < ARRAY_SIZE(tcs3414_times); i++) { - if (val == tcs3414_times[i] * 1000) { + if (val2 == tcs3414_times[i] * 1000) { data->timing &= ~TCS3414_INTEG_MASK; data->timing |= i; return i2c_smbus_write_byte_data( From 734f561a7a7d2ccf792f53f35187136f61d38835 Mon Sep 17 00:00:00 2001 From: JM Friedt Date: Fri, 19 Jun 2015 14:48:06 +0200 Subject: [PATCH 0193/2091] iio: DAC: ad5624r_spi: fix bit shift of output data value commit adfa969850ae93beca57f7527f0e4dc10cbe1309 upstream. The value sent on the SPI bus is shifted by an erroneous number of bits. The shift value was already computed in the iio_chan_spec structure and hence subtracting this argument to 16 yields an erroneous data position in the SPI stream. Signed-off-by: JM Friedt Acked-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/dac/ad5624r_spi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad5624r_spi.c b/drivers/iio/dac/ad5624r_spi.c index 61bb9d4239eaf..e98428df0d447 100644 --- a/drivers/iio/dac/ad5624r_spi.c +++ b/drivers/iio/dac/ad5624r_spi.c @@ -22,7 +22,7 @@ #include "ad5624r.h" static int ad5624r_spi_write(struct spi_device *spi, - u8 cmd, u8 addr, u16 val, u8 len) + u8 cmd, u8 addr, u16 val, u8 shift) { u32 data; u8 msg[3]; @@ -35,7 +35,7 @@ static int ad5624r_spi_write(struct spi_device *spi, * 14-, 12-bit input code followed by 0, 2, or 4 don't care bits, * for the AD5664R, AD5644R, and AD5624R, respectively. */ - data = (0 << 22) | (cmd << 19) | (addr << 16) | (val << (16 - len)); + data = (0 << 22) | (cmd << 19) | (addr << 16) | (val << shift); msg[0] = data >> 16; msg[1] = data >> 8; msg[2] = data; From 85250adce4a27debef10bd510ddfc4de928b4586 Mon Sep 17 00:00:00 2001 From: Adriana Reus Date: Fri, 12 Jun 2015 18:10:23 +0300 Subject: [PATCH 0194/2091] iio: inv-mpu: Specify the expected format/precision for write channels commit 6a3c45bb5a385be7049a7725a4fe93eaa76915f4 upstream. The gyroscope needs IIO_VAL_INT_PLUS_NANO for the scale channel and unless specified write returns MICRO by default. This needs to be properly specified so that write operations into scale have the expected behaviour. Signed-off-by: Adriana Reus Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index 17d4bb15be4d2..65ce868371771 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -431,6 +431,23 @@ static int inv_mpu6050_write_gyro_scale(struct inv_mpu6050_state *st, int val) return -EINVAL; } +static int inv_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SCALE: + switch (chan->type) { + case IIO_ANGL_VEL: + return IIO_VAL_INT_PLUS_NANO; + default: + return IIO_VAL_INT_PLUS_MICRO; + } + default: + return IIO_VAL_INT_PLUS_MICRO; + } + + return -EINVAL; +} static int inv_mpu6050_write_accel_scale(struct inv_mpu6050_state *st, int val) { int result, i; @@ -696,6 +713,7 @@ static const struct iio_info mpu_info = { .driver_module = THIS_MODULE, .read_raw = &inv_mpu6050_read_raw, .write_raw = &inv_mpu6050_write_raw, + .write_raw_get_fmt = &inv_write_raw_get_fmt, .attrs = &inv_attribute_group, .validate_trigger = inv_mpu6050_validate_trigger, }; From 21b9c4eca727901b770d387aead27b344dc7f9f6 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Sun, 21 Jun 2015 23:50:21 +0200 Subject: [PATCH 0195/2091] iio: tmp006: Check channel info on write commit 8d05abfaeff52bdf66aba3a3a337dcdbdb4911bf upstream. only SAMP_FREQ is writable Will lead to SAMP_FREQ being written by any attempt to write to the other exported attributes and hence a rather unexpected result! Signed-off-by: Peter Meerwald Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/temperature/tmp006.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/temperature/tmp006.c b/drivers/iio/temperature/tmp006.c index 84a0789c3d968..7a8050996b4ec 100644 --- a/drivers/iio/temperature/tmp006.c +++ b/drivers/iio/temperature/tmp006.c @@ -132,6 +132,9 @@ static int tmp006_write_raw(struct iio_dev *indio_dev, struct tmp006_data *data = iio_priv(indio_dev); int i; + if (mask != IIO_CHAN_INFO_SAMP_FREQ) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(tmp006_freqs); i++) if ((val == tmp006_freqs[i][0]) && (val2 == tmp006_freqs[i][1])) { From b2789eb0e024b09fde4d3a3b0ddae12d3a650902 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 24 May 2015 17:39:04 -0300 Subject: [PATCH 0196/2091] iio: twl4030-madc: Pass the IRQF_ONESHOT flag commit 6c0d48cb29c29b306ba3548afb45154d22eb4d78 upstream. Since commit 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") threaded IRQs without a primary handler need to be requested with IRQF_ONESHOT, otherwise the request will fail. So pass the IRQF_ONESHOT flag in this case. The semantic patch that makes this change is available in scripts/coccinelle/misc/irqf_oneshot.cocci. Signed-off-by: Fabio Estevam Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/twl4030-madc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c index 94c5f05b4bc1b..4caecbea4c97d 100644 --- a/drivers/iio/adc/twl4030-madc.c +++ b/drivers/iio/adc/twl4030-madc.c @@ -835,7 +835,8 @@ static int twl4030_madc_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, twl4030_madc_threaded_irq_handler, - IRQF_TRIGGER_RISING, "twl4030_madc", madc); + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + "twl4030_madc", madc); if (ret) { dev_err(&pdev->dev, "could not request irq\n"); goto err_i2c; From f3a389420f672f820de69fb583fcab4502aa494e Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Thu, 11 Jun 2015 18:49:33 +0300 Subject: [PATCH 0197/2091] iio: ABI: Clarify proximity output value commit bdc10d57f236b534fb675a4bbefd10017aeb2b26 upstream. Current description for proximity measurement is ambiguous. While the first part says that proximity is measured by observing reflectivity, the second part incorrectly infers that reported values should behave like a distance. This is because of AS3935 lightning sensor which uses the proximity API, while not being a true proximity sensor. Note this is marked for stable as it accompanies a fix in ABI usage to the sx9500 driver which would otherwise appear to be correct. Fixes: 614e8842ddf ("iio: ABI: add clarification for proximity") Signed-off-by: Daniel Baluta Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-bus-iio | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 3befcb19f4141..1fbdd79d16240 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1165,10 +1165,8 @@ Description: object is near the sensor, usually be observing reflectivity of infrared or ultrasound emitted. Often these sensors are unit less and as such conversion - to SI units is not possible. Where it is, the units should - be meters. If such a conversion is not possible, the reported - values should behave in the same way as a distance, i.e. lower - values indicate something is closer to the sensor. + to SI units is not possible. Higher proximity measurements + indicate closer objects, and vice versa. What: /sys/.../iio:deviceX/in_illuminance_input What: /sys/.../iio:deviceX/in_illuminance_raw From 2f69632bdd525e69893f029c528b4d6dc8e123a1 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Thu, 11 Jun 2015 18:49:34 +0300 Subject: [PATCH 0198/2091] iio: proximity: sx9500: Fix proximity value commit fd1883f07cb434707e50c4c9a16e3ed4b3a5e74f upstream. Because of the ABI confusion proximity value exposed by SX9500 was inverted. Signed-off-by: Daniel Baluta Reviewed-by: Vlad Dogaru Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/proximity/sx9500.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c index fa40f6d0ca394..bd26a484abcc3 100644 --- a/drivers/iio/proximity/sx9500.c +++ b/drivers/iio/proximity/sx9500.c @@ -206,7 +206,7 @@ static int sx9500_read_proximity(struct sx9500_data *data, if (ret < 0) return ret; - *val = 32767 - (s16)be16_to_cpu(regval); + *val = be16_to_cpu(regval); return IIO_VAL_INT; } From 4cb9ad71fc2b17e1b8540de8943b5d7f5019e922 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 25 May 2015 22:36:58 +0200 Subject: [PATCH 0199/2091] iio: adc: rockchip_saradc: add missing MODULE_* data commit dc7b8d98ac003c9f1e83a5f927c372dac6f114a1 upstream. The module-data is currently missing. This includes the license-information which makes the driver taint the kernel and miss symbols when compiled as module. Fixes: 44d6f2ef94f9 ("iio: adc: add driver for Rockchip saradc") Signed-off-by: Heiko Stuebner Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/rockchip_saradc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 8d4e019ea4ca5..9c311c1e1ac7f 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -349,3 +349,7 @@ static struct platform_driver rockchip_saradc_driver = { }; module_platform_driver(rockchip_saradc_driver); + +MODULE_AUTHOR("Heiko Stuebner "); +MODULE_DESCRIPTION("Rockchip SARADC driver"); +MODULE_LICENSE("GPL v2"); From b7bc8d0ac57d3728d0a2c14db9dec2cf5cf02b15 Mon Sep 17 00:00:00 2001 From: Jan Leupold Date: Wed, 17 Jun 2015 18:21:36 +0200 Subject: [PATCH 0200/2091] iio: adc: at91_adc: allow to use full range of startup time commit 2ab5f39bc7825808e0fa1e7e5f0b23e174563467 upstream. The DT-Property "atmel,adc-startup-time" is stored in an u8 for a microsecond value. When trying to increase the value of STARTUP in Register AT91_ADC_MR some higher values can't be reached. Change the type in function parameter and private structure field from u8 to u32. Signed-off-by: Jan Leupold [nicolas.ferre@atmel.com: change commit message, increase u16 to u32 for startup time] Signed-off-by: Nicolas Ferre Acked-by: Alexandre Belloni Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/at91_adc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 8a0eb4a04fb55..7b40925dd4ff2 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -182,7 +182,7 @@ struct at91_adc_caps { u8 ts_pen_detect_sensitivity; /* startup time calculate function */ - u32 (*calc_startup_ticks)(u8 startup_time, u32 adc_clk_khz); + u32 (*calc_startup_ticks)(u32 startup_time, u32 adc_clk_khz); u8 num_channels; struct at91_adc_reg_desc registers; @@ -201,7 +201,7 @@ struct at91_adc_state { u8 num_channels; void __iomem *reg_base; struct at91_adc_reg_desc *registers; - u8 startup_time; + u32 startup_time; u8 sample_hold_time; bool sleep_mode; struct iio_trigger **trig; @@ -779,7 +779,7 @@ static int at91_adc_of_get_resolution(struct at91_adc_state *st, return ret; } -static u32 calc_startup_ticks_9260(u8 startup_time, u32 adc_clk_khz) +static u32 calc_startup_ticks_9260(u32 startup_time, u32 adc_clk_khz) { /* * Number of ticks needed to cover the startup time of the ADC @@ -790,7 +790,7 @@ static u32 calc_startup_ticks_9260(u8 startup_time, u32 adc_clk_khz) return round_up((startup_time * adc_clk_khz / 1000) - 1, 8) / 8; } -static u32 calc_startup_ticks_9x5(u8 startup_time, u32 adc_clk_khz) +static u32 calc_startup_ticks_9x5(u32 startup_time, u32 adc_clk_khz) { /* * For sama5d3x and at91sam9x5, the formula changes to: From 24dade33c059e297602a100d5e3b34de3106e259 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 19 Jun 2015 08:50:07 -0300 Subject: [PATCH 0201/2091] vb2: Don't WARN when v4l2_buffer.bytesused is 0 for multiplanar buffers commit 77a3c6fd90c94f635edb00d4a65f485687538791 upstream. Commit f61bf13b6a07 ("[media] vb2: add allow_zero_bytesused flag to the vb2_queue struct") added a WARN_ONCE to catch usage of a deprecated API using a zero value for v4l2_buffer.bytesused. However, the condition is checked incorrectly, as the v4L2_buffer bytesused field is supposed to be ignored for multiplanar buffers. This results in spurious warnings when using the multiplanar API. Fix it by checking v4l2_buffer.bytesused for uniplanar buffers and v4l2_plane.bytesused for multiplanar buffers. Fixes: f61bf13b6a07 ("[media] vb2: add allow_zero_bytesused flag to the vb2_queue struct") Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-core.c | 33 +++++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 66ada01c796ca..cf9d644a8aff4 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1237,6 +1237,23 @@ void vb2_discard_done(struct vb2_queue *q) } EXPORT_SYMBOL_GPL(vb2_discard_done); +static void vb2_warn_zero_bytesused(struct vb2_buffer *vb) +{ + static bool __check_once __read_mostly; + + if (__check_once) + return; + + __check_once = true; + __WARN(); + + pr_warn_once("use of bytesused == 0 is deprecated and will be removed in the future,\n"); + if (vb->vb2_queue->allow_zero_bytesused) + pr_warn_once("use VIDIOC_DECODER_CMD(V4L2_DEC_CMD_STOP) instead.\n"); + else + pr_warn_once("use the actual size instead.\n"); +} + /** * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a * v4l2_buffer by the userspace. The caller has already verified that struct @@ -1247,16 +1264,6 @@ static void __fill_vb2_buffer(struct vb2_buffer *vb, const struct v4l2_buffer *b { unsigned int plane; - if (V4L2_TYPE_IS_OUTPUT(b->type)) { - if (WARN_ON_ONCE(b->bytesused == 0)) { - pr_warn_once("use of bytesused == 0 is deprecated and will be removed in the future,\n"); - if (vb->vb2_queue->allow_zero_bytesused) - pr_warn_once("use VIDIOC_DECODER_CMD(V4L2_DEC_CMD_STOP) instead.\n"); - else - pr_warn_once("use the actual size instead.\n"); - } - } - if (V4L2_TYPE_IS_MULTIPLANAR(b->type)) { if (b->memory == V4L2_MEMORY_USERPTR) { for (plane = 0; plane < vb->num_planes; ++plane) { @@ -1297,6 +1304,9 @@ static void __fill_vb2_buffer(struct vb2_buffer *vb, const struct v4l2_buffer *b struct v4l2_plane *pdst = &v4l2_planes[plane]; struct v4l2_plane *psrc = &b->m.planes[plane]; + if (psrc->bytesused == 0) + vb2_warn_zero_bytesused(vb); + if (vb->vb2_queue->allow_zero_bytesused) pdst->bytesused = psrc->bytesused; else @@ -1331,6 +1341,9 @@ static void __fill_vb2_buffer(struct vb2_buffer *vb, const struct v4l2_buffer *b } if (V4L2_TYPE_IS_OUTPUT(b->type)) { + if (b->bytesused == 0) + vb2_warn_zero_bytesused(vb); + if (vb->vb2_queue->allow_zero_bytesused) v4l2_planes[0].bytesused = b->bytesused; else From 1202e777b67d669aef34326da1f248c933735c78 Mon Sep 17 00:00:00 2001 From: Thomas Reitmayr Date: Fri, 1 May 2015 20:18:04 -0300 Subject: [PATCH 0202/2091] media: Fix regression in some more dib0700 based devices commit e989a73ebd09d22c22ead51fa363a2f56f70f28a upstream. Fix an oops during device initialization by correctly setting size_of_priv instead of leaving it 0. The regression was introduced by 8abe4a0a3f6d4217b16a ("[media] dib7000: export just one symbol") and only fixed for one type of dib0700 based devices in 9e334c75642b6e5bfb95 ("[media] Fix regression in some dib0700 based devices"). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=92301 Fixes: 8abe4a0a3f6d4217b16a ("[media] dib7000: export just one symbol") Signed-off-by: Thomas Reitmayr Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dib0700_devices.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index d7d55a20e9590..c170523226aad 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -3944,6 +3944,8 @@ struct dvb_usb_device_properties dib0700_devices[] = { DIB0700_DEFAULT_STREAMING_CONFIG(0x02), }}, + .size_of_priv = sizeof(struct + dib0700_adapter_state), }, { .num_frontends = 1, .fe = {{ @@ -3956,6 +3958,8 @@ struct dvb_usb_device_properties dib0700_devices[] = { DIB0700_DEFAULT_STREAMING_CONFIG(0x03), }}, + .size_of_priv = sizeof(struct + dib0700_adapter_state), } }, @@ -4009,6 +4013,8 @@ struct dvb_usb_device_properties dib0700_devices[] = { DIB0700_DEFAULT_STREAMING_CONFIG(0x02), }}, + .size_of_priv = sizeof(struct + dib0700_adapter_state), }, }, From 60f69783daf3967b828eb71eafd76d2ce3fabc6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20H=C3=A4rdeman?= Date: Mon, 30 Mar 2015 17:51:01 -0300 Subject: [PATCH 0203/2091] rc-core: fix dib0700 scancode generation for RC5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4d298b8539ed59f1d69d3aa6e41a2c4908137612 upstream. commit af3a4a9bbeb0 ("[media] dib0700: NEC scancode cleanup") cleaned up the NEC scancode logic but overlooked the RC5 case. This patch brings the RC5 case in line with the NEC code and makes the struct self-documenting. Signed-off-by: David Härdeman Reported-by: David Cimbůrek Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dib0700_core.c | 70 ++++++++++++++---------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 2b40393836ffa..0d248ce02a9b2 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -655,10 +655,20 @@ int dib0700_change_protocol(struct rc_dev *rc, u64 *rc_type) struct dib0700_rc_response { u8 report_id; u8 data_state; - u8 system; - u8 not_system; - u8 data; - u8 not_data; + union { + struct { + u8 system; + u8 not_system; + u8 data; + u8 not_data; + } nec; + struct { + u8 not_used; + u8 system; + u8 data; + u8 not_data; + } rc5; + }; }; #define RC_MSG_SIZE_V1_20 6 @@ -694,8 +704,8 @@ static void dib0700_rc_urb_completion(struct urb *purb) deb_data("IR ID = %02X state = %02X System = %02X %02X Cmd = %02X %02X (len %d)\n", poll_reply->report_id, poll_reply->data_state, - poll_reply->system, poll_reply->not_system, - poll_reply->data, poll_reply->not_data, + poll_reply->nec.system, poll_reply->nec.not_system, + poll_reply->nec.data, poll_reply->nec.not_data, purb->actual_length); switch (d->props.rc.core.protocol) { @@ -704,30 +714,30 @@ static void dib0700_rc_urb_completion(struct urb *purb) toggle = 0; /* NEC protocol sends repeat code as 0 0 0 FF */ - if (poll_reply->system == 0x00 && - poll_reply->not_system == 0x00 && - poll_reply->data == 0x00 && - poll_reply->not_data == 0xff) { + if (poll_reply->nec.system == 0x00 && + poll_reply->nec.not_system == 0x00 && + poll_reply->nec.data == 0x00 && + poll_reply->nec.not_data == 0xff) { poll_reply->data_state = 2; break; } - if ((poll_reply->data ^ poll_reply->not_data) != 0xff) { + if ((poll_reply->nec.data ^ poll_reply->nec.not_data) != 0xff) { deb_data("NEC32 protocol\n"); - keycode = RC_SCANCODE_NEC32(poll_reply->system << 24 | - poll_reply->not_system << 16 | - poll_reply->data << 8 | - poll_reply->not_data); - } else if ((poll_reply->system ^ poll_reply->not_system) != 0xff) { + keycode = RC_SCANCODE_NEC32(poll_reply->nec.system << 24 | + poll_reply->nec.not_system << 16 | + poll_reply->nec.data << 8 | + poll_reply->nec.not_data); + } else if ((poll_reply->nec.system ^ poll_reply->nec.not_system) != 0xff) { deb_data("NEC extended protocol\n"); - keycode = RC_SCANCODE_NECX(poll_reply->system << 8 | - poll_reply->not_system, - poll_reply->data); + keycode = RC_SCANCODE_NECX(poll_reply->nec.system << 8 | + poll_reply->nec.not_system, + poll_reply->nec.data); } else { deb_data("NEC normal protocol\n"); - keycode = RC_SCANCODE_NEC(poll_reply->system, - poll_reply->data); + keycode = RC_SCANCODE_NEC(poll_reply->nec.system, + poll_reply->nec.data); } break; @@ -735,19 +745,19 @@ static void dib0700_rc_urb_completion(struct urb *purb) deb_data("RC5 protocol\n"); protocol = RC_TYPE_RC5; toggle = poll_reply->report_id; - keycode = RC_SCANCODE_RC5(poll_reply->system, poll_reply->data); + keycode = RC_SCANCODE_RC5(poll_reply->rc5.system, poll_reply->rc5.data); + + if ((poll_reply->rc5.data ^ poll_reply->rc5.not_data) != 0xff) { + /* Key failed integrity check */ + err("key failed integrity check: %02x %02x %02x %02x", + poll_reply->rc5.not_used, poll_reply->rc5.system, + poll_reply->rc5.data, poll_reply->rc5.not_data); + goto resubmit; + } break; } - if ((poll_reply->data + poll_reply->not_data) != 0xff) { - /* Key failed integrity check */ - err("key failed integrity check: %02x %02x %02x %02x", - poll_reply->system, poll_reply->not_system, - poll_reply->data, poll_reply->not_data); - goto resubmit; - } - rc_keydown(d->rc_dev, protocol, keycode, toggle); resubmit: From a7faceb949f9511c17172200738a621b7f0ef0ff Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 24 Apr 2015 03:55:07 -0300 Subject: [PATCH 0204/2091] cx18: add missing caps for the PCM video device commit 2b4fd3ede3bab65ef5b97387b90899d11e4d3202 upstream. The cx18 PCM video device didn't have any capabilities set, which caused a warnings in the v4l2 core: [ 6.229393] ------------[ cut here ]------------ [ 6.229414] WARNING: CPU: 1 PID: 593 at drivers/media/v4l2-core/v4l2-ioctl.c:1025 v4l_querycap+0x41/0x70 [videodev]() [ 6.229415] Modules linked in: cx18_alsa mxl5005s s5h1409 tuner_simple tuner_types cs5345 tuner intel_rapl iosf_mbi x86_pkg_temp_thermal coretemp raid1 snd_hda_codec_realtek kvm_intel snd_hda_codec_generic snd_hda_codec_hdmi kvm snd_oxygen(+) snd_hda_intel snd_oxygen_lib snd_hda_controller snd_hda_codec snd_mpu401_uart iTCO_wdt snd_rawmidi iTCO_vendor_support snd_hwdep crct10dif_pclmul crc32_pclmul crc32c_intel snd_seq cx18 snd_seq_device ghash_clmulni_intel videobuf_vmalloc tveeprom cx2341x snd_pcm serio_raw videobuf_core vfat dvb_core fat v4l2_common snd_timer videodev snd lpc_ich i2c_i801 joydev mfd_core mei_me media soundcore tpm_infineon soc_button_array tpm_tis mei shpchp tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc binfmt_misc i915 nouveau mxm_wmi wmi e1000e ttm i2c_algo_bit drm_kms_helper [ 6.229444] drm ptp pps_core video [ 6.229446] CPU: 1 PID: 593 Comm: v4l_id Not tainted 3.19.3-200.fc21.x86_64 #1 [ 6.229447] Hardware name: Gigabyte Technology Co., Ltd. Z87-D3HP/Z87-D3HP-CF, BIOS F6 01/20/2014 [ 6.229448] 0000000000000000 00000000d12b1131 ffff88042dacfc28 ffffffff8176e215 [ 6.229449] 0000000000000000 0000000000000000 ffff88042dacfc68 ffffffff8109bc1a [ 6.229451] ffffffffa0594000 ffff88042dacfd90 0000000000000000 ffffffffa04e2140 [ 6.229452] Call Trace: [ 6.229466] [] dump_stack+0x45/0x57 [ 6.229469] [] warn_slowpath_common+0x8a/0xc0 [ 6.229472] [] warn_slowpath_null+0x1a/0x20 [ 6.229474] [] v4l_querycap+0x41/0x70 [videodev] [ 6.229477] [] __video_do_ioctl+0x29c/0x320 [videodev] [ 6.229479] [] ? do_last+0x2f1/0x1210 [ 6.229491] [] video_usercopy+0x366/0x5d0 [videodev] [ 6.229494] [] ? v4l_querycap+0x70/0x70 [videodev] [ 6.229497] [] video_ioctl2+0x15/0x20 [videodev] [ 6.229499] [] v4l2_ioctl+0x164/0x180 [videodev] [ 6.229501] [] do_vfs_ioctl+0x2f8/0x500 [ 6.229502] [] SyS_ioctl+0x81/0xa0 [ 6.229505] [] system_call_fastpath+0x12/0x17 [ 6.229506] ---[ end trace dacd80d4b19277ea ]--- Added the necessary capabilities to stop this warning. Signed-off-by: Hans Verkuil Reported-by: Laura Abbott Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/cx18/cx18-streams.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/pci/cx18/cx18-streams.c b/drivers/media/pci/cx18/cx18-streams.c index c82d25d533416..c9860845264fa 100644 --- a/drivers/media/pci/cx18/cx18-streams.c +++ b/drivers/media/pci/cx18/cx18-streams.c @@ -90,6 +90,7 @@ static struct { "encoder PCM audio", VFL_TYPE_GRABBER, CX18_V4L2_ENC_PCM_OFFSET, PCI_DMA_FROMDEVICE, + V4L2_CAP_TUNER | V4L2_CAP_AUDIO | V4L2_CAP_READWRITE, }, { /* CX18_ENC_STREAM_TYPE_IDX */ "encoder IDX", From ce99975dce28f20447fb72ddc8f8bf482d4f2559 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 28 Apr 2015 19:03:59 -0300 Subject: [PATCH 0205/2091] cx24117: fix a buffer overflow when checking userspace params commit 82e3b88b679049f043fe9b03991d6d66fc0a43c8 upstream. The maximum size for a DiSEqC command is 6, according to the userspace API. However, the code allows to write up much more values: drivers/media/dvb-frontends/cx24116.c:983 cx24116_send_diseqc_msg() error: buffer overflow 'd->msg' 6 <= 23 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/cx24117.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c index acb965ce0358b..af6363573efd6 100644 --- a/drivers/media/dvb-frontends/cx24117.c +++ b/drivers/media/dvb-frontends/cx24117.c @@ -1043,7 +1043,7 @@ static int cx24117_send_diseqc_msg(struct dvb_frontend *fe, dev_dbg(&state->priv->i2c->dev, ")\n"); /* Validate length */ - if (d->msg_len > 15) + if (d->msg_len > sizeof(d->msg)) return -EINVAL; /* DiSEqC message */ From 16eeb2182fb8f31a91e831f6f1ead91f373df4f6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 28 Apr 2015 19:02:19 -0300 Subject: [PATCH 0206/2091] af9013: Don't accept invalid bandwidth commit d7b76c91f471413de9ded837bddeca2164786571 upstream. If userspace sends an invalid bandwidth, it should either return EINVAL or switch to auto mode. This driver will go past an array and program the hardware on a wrong way if this happens. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/af9013.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/dvb-frontends/af9013.c b/drivers/media/dvb-frontends/af9013.c index 8001690d7576c..ba6c8f6c42a1c 100644 --- a/drivers/media/dvb-frontends/af9013.c +++ b/drivers/media/dvb-frontends/af9013.c @@ -605,6 +605,10 @@ static int af9013_set_frontend(struct dvb_frontend *fe) } } + /* Return an error if can't find bandwidth or the right clock */ + if (i == ARRAY_SIZE(coeff_lut)) + return -EINVAL; + ret = af9013_wr_regs(state, 0xae00, coeff_lut[i].val, sizeof(coeff_lut[i].val)); } From 755d7f5c163af374219462e707cdd9b83ee2f830 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 27 Mar 2015 15:17:56 -0300 Subject: [PATCH 0207/2091] saa7164: fix querycap warning commit 534bc3e2ee93835badca753bedce8073c67caa92 upstream. Fix the VIDIOC_QUERYCAP warning due to the missing device_caps. Don't fill in the version field, the V4L2 core will do that for you. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7164/saa7164-encoder.c | 11 ++++++----- drivers/media/pci/saa7164/saa7164-vbi.c | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/media/pci/saa7164/saa7164-encoder.c b/drivers/media/pci/saa7164/saa7164-encoder.c index 9266965412c34..7a0a65146723c 100644 --- a/drivers/media/pci/saa7164/saa7164-encoder.c +++ b/drivers/media/pci/saa7164/saa7164-encoder.c @@ -721,13 +721,14 @@ static int vidioc_querycap(struct file *file, void *priv, sizeof(cap->card)); sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci)); - cap->capabilities = + cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | - V4L2_CAP_READWRITE | - 0; + V4L2_CAP_READWRITE | + V4L2_CAP_TUNER; - cap->capabilities |= V4L2_CAP_TUNER; - cap->version = 0; + cap->capabilities = cap->device_caps | + V4L2_CAP_VBI_CAPTURE | + V4L2_CAP_DEVICE_CAPS; return 0; } diff --git a/drivers/media/pci/saa7164/saa7164-vbi.c b/drivers/media/pci/saa7164/saa7164-vbi.c index 6e025fea25422..06117e6c0596b 100644 --- a/drivers/media/pci/saa7164/saa7164-vbi.c +++ b/drivers/media/pci/saa7164/saa7164-vbi.c @@ -660,13 +660,14 @@ static int vidioc_querycap(struct file *file, void *priv, sizeof(cap->card)); sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci)); - cap->capabilities = + cap->device_caps = V4L2_CAP_VBI_CAPTURE | - V4L2_CAP_READWRITE | - 0; + V4L2_CAP_READWRITE | + V4L2_CAP_TUNER; - cap->capabilities |= V4L2_CAP_TUNER; - cap->version = 0; + cap->capabilities = cap->device_caps | + V4L2_CAP_VIDEO_CAPTURE | + V4L2_CAP_DEVICE_CAPS; return 0; } From 22ab213ceae3260d181316f7361419fbe4a5682c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 28 Apr 2015 18:34:40 -0300 Subject: [PATCH 0208/2091] s5h1420: fix a buffer overflow when checking userspace params commit 12f4543f5d6811f864e6c4952eb27253c7466c02 upstream. The maximum size for a DiSEqC command is 6, according to the userspace API. However, the code allows to write up to 7 values: drivers/media/dvb-frontends/s5h1420.c:193 s5h1420_send_master_cmd() error: buffer overflow 'cmd->msg' 6 <= 7 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/s5h1420.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/s5h1420.c b/drivers/media/dvb-frontends/s5h1420.c index 93eeaf7118fd0..0b4f8fe6bf990 100644 --- a/drivers/media/dvb-frontends/s5h1420.c +++ b/drivers/media/dvb-frontends/s5h1420.c @@ -180,7 +180,7 @@ static int s5h1420_send_master_cmd (struct dvb_frontend* fe, int result = 0; dprintk("enter %s\n", __func__); - if (cmd->msg_len > 8) + if (cmd->msg_len > sizeof(cmd->msg)) return -EINVAL; /* setup for DISEQC */ From 27b76178c496bdb4fc24278ff9e659dc0a5db10f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 28 Apr 2015 18:51:17 -0300 Subject: [PATCH 0209/2091] cx24116: fix a buffer overflow when checking userspace params commit 1fa2337a315a2448c5434f41e00d56b01a22283c upstream. The maximum size for a DiSEqC command is 6, according to the userspace API. However, the code allows to write up much more values: drivers/media/dvb-frontends/cx24116.c:983 cx24116_send_diseqc_msg() error: buffer overflow 'd->msg' 6 <= 23 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/cx24116.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c index 2916d7c74a1da..7bc68b355c0b9 100644 --- a/drivers/media/dvb-frontends/cx24116.c +++ b/drivers/media/dvb-frontends/cx24116.c @@ -963,6 +963,10 @@ static int cx24116_send_diseqc_msg(struct dvb_frontend *fe, struct cx24116_state *state = fe->demodulator_priv; int i, ret; + /* Validate length */ + if (d->msg_len > sizeof(d->msg)) + return -EINVAL; + /* Dump DiSEqC message */ if (debug) { printk(KERN_INFO "cx24116: %s(", __func__); @@ -974,10 +978,6 @@ static int cx24116_send_diseqc_msg(struct dvb_frontend *fe, printk(") toneburst=%d\n", toneburst); } - /* Validate length */ - if (d->msg_len > (CX24116_ARGLEN - CX24116_DISEQC_MSGOFS)) - return -EINVAL; - /* DiSEqC message */ for (i = 0; i < d->msg_len; i++) state->dsec_cmd.args[CX24116_DISEQC_MSGOFS + i] = d->msg[i]; From b955267c9fd99df9a4fcf1b2b570c3c1fa89d8ba Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 28 May 2015 14:28:12 +0100 Subject: [PATCH 0210/2091] ASoC: arizona: Fix noise generator gain TLV commit 15575ed544910464715df5c45a44b9732e415b93 upstream. The Arizona codec drivers had an incorrect dB scaling for the noise generator gain that started at 0dB and went upwards. Actually the highest setting is 0dB. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm5102.c | 2 +- sound/soc/codecs/wm5110.c | 2 +- sound/soc/codecs/wm8997.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index 0c6d1bc0526ef..d476221dba51d 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -42,7 +42,7 @@ struct wm5102_priv { static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); -static DECLARE_TLV_DB_SCALE(noise_tlv, 0, 600, 0); +static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); static const struct wm_adsp_region wm5102_dsp1_regions[] = { diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index fbaeddb3e9033..3ee6cfd0578be 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -167,7 +167,7 @@ static int wm5110_sysclk_ev(struct snd_soc_dapm_widget *w, static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); -static DECLARE_TLV_DB_SCALE(noise_tlv, 0, 600, 0); +static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); #define WM5110_NG_SRC(name, base) \ diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c index a4d11770630cd..e7c81baefe662 100644 --- a/sound/soc/codecs/wm8997.c +++ b/sound/soc/codecs/wm8997.c @@ -40,7 +40,7 @@ struct wm8997_priv { static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); -static DECLARE_TLV_DB_SCALE(noise_tlv, 0, 600, 0); +static DECLARE_TLV_DB_SCALE(noise_tlv, -13200, 600, 0); static DECLARE_TLV_DB_SCALE(ng_tlv, -10200, 600, 0); static const struct reg_default wm8997_sysclk_reva_patch[] = { From d83057a32525626d96217f0112c7197b223180fe Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Fri, 5 Jun 2015 18:42:12 +0800 Subject: [PATCH 0211/2091] ASoC: rt5645: Init jack_detect_work before registering irq commit 7ea3470a7277380248135a592a849e1c27960b2f upstream. Prevents frequent panic on boot, if the irq handler rt5645_irq gets called before the workqueue rt5645_jack_detect_work is initialized. Signed-off-by: Nicolas Boichat Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5645.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index be4d741c45baa..2ee44abd56a6b 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2837,6 +2837,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, } } + INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work); + if (rt5645->i2c->irq) { ret = request_threaded_irq(rt5645->i2c->irq, NULL, rt5645_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING @@ -2855,8 +2857,6 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, dev_err(&i2c->dev, "Fail gpio_direction hp_det_gpio\n"); } - INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work); - return snd_soc_register_codec(&i2c->dev, &soc_codec_dev_rt5645, rt5645_dai, ARRAY_SIZE(rt5645_dai)); } From b5bb3aa87c167fcf227996e8ea6fc28332f316b4 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 26 May 2015 20:35:08 +0800 Subject: [PATCH 0212/2091] ASoC: max98925: Fix mask for setting DAI invert mode commit 0b51601d4504f46f585eed823485101390f0b588 upstream. The M98925_DAI_WCI_MASK bit is not updated with current code. To properly set the DAI invert mode, the mask should be M98925_DAI_BCI_MASK | M98925_DAI_WCI_MASK. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/max98925.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98925.c b/sound/soc/codecs/max98925.c index 9b5a17de46909..aad664225dc3a 100644 --- a/sound/soc/codecs/max98925.c +++ b/sound/soc/codecs/max98925.c @@ -346,7 +346,7 @@ static int max98925_dai_set_fmt(struct snd_soc_dai *codec_dai, } regmap_update_bits(max98925->regmap, MAX98925_FORMAT, - M98925_DAI_BCI_MASK, invert); + M98925_DAI_BCI_MASK | M98925_DAI_WCI_MASK, invert); return 0; } From d23555e426b6531eccb1018714adfc1e6f762c6d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 May 2015 11:07:08 +0200 Subject: [PATCH 0213/2091] ASoC: qcom: remove incorrect dependencies commit a7310c496f376b945e7e61f64d69c9c0a93ee1ee upstream. Compile-tests show a warning for the newly added SND_SOC_STORM symbol: warning: (SND_SOC_STORM) selects SND_SOC_LPASS_CPU which has unmet direct dependencies (SOUND && !M68K && !UML && SND && SND_SOC && SND_SOC_QCOM) The problem is that it can be selected for COMPILE_TEST on non-QCOM builds, but the symbols it selects have a dependency. Dropping the dependencies makes it work without warnings and no other side-effects, because these are not user-visible. Signed-off-by: Arnd Bergmann Fixes: f380dd3f3cd ("ASoC: qcom: Add ability to build QCOM drivers") Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index 5f58e4f1bca98..b07f183fc47f0 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -6,12 +6,10 @@ config SND_SOC_QCOM config SND_SOC_LPASS_CPU tristate - depends on SND_SOC_QCOM select REGMAP_MMIO config SND_SOC_LPASS_PLATFORM tristate - depends on SND_SOC_QCOM select REGMAP_MMIO config SND_SOC_STORM From ebfcb43f3fab1a01fa04bd5b167826c0b1d97742 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jun 2015 18:37:23 +0300 Subject: [PATCH 0214/2091] ASoC: imx-wm8962: Add a missing error check commit 474ff0ae23b834e9fc18374d14bb5f3e7b3828b4 upstream. My static checker complains that: sound/soc/fsl/imx-wm8962.c:196 imx_wm8962_probe() warn: we tested 'ret' before and it was 'false' The intent was that we use "ret" to check imx_audmux_v2_configure_port(). Fixes: 8de2ae2a7f1f ('ASoC: fsl: add imx-wm8962 machine driver') Signed-off-by: Dan Carpenter Otherwise, Acked-by: Nicolin Chen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/imx-wm8962.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-wm8962.c b/sound/soc/fsl/imx-wm8962.c index cd146d4fa8054..b38b98cae855d 100644 --- a/sound/soc/fsl/imx-wm8962.c +++ b/sound/soc/fsl/imx-wm8962.c @@ -190,7 +190,7 @@ static int imx_wm8962_probe(struct platform_device *pdev) dev_err(&pdev->dev, "audmux internal port setup failed\n"); return ret; } - imx_audmux_v2_configure_port(ext_port, + ret = imx_audmux_v2_configure_port(ext_port, IMX_AUDMUX_V2_PTCR_SYN, IMX_AUDMUX_V2_PDCR_RXDSEL(int_port)); if (ret) { From 91ff38153a7ba011a9bd56a0ffa52f6bed9d1059 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 May 2015 14:47:32 +0200 Subject: [PATCH 0215/2091] ASoC: omap: fix up SND_OMAP_SOC_OMAP_ABE_TWL6040 dependency, again commit 0574eab363ace70ef275d4caad6eadc458d33728 upstream. I tried to fix this before and submitted a working patch, but after some discussion we came up with what seemed to be a nicer solution, resulting in commit 3d4cf65e2d ("ASoC: omap: fix up SND_OMAP_SOC_OMAP_ABE_TWL6040 dependency"). Unfortunately, that version was incomplete, and we still get this build error: drivers/clk/clk-palmas.c:46:16: error: field 'hw' has incomplete type drivers/clk/clk-palmas.c: In function 'to_palmas_clks_info': drivers/clk/clk-palmas.c:54:74: warning: initialization from incompatible pointer type [-Winc This happens only in randconfig builds that turn on MFD_PALMAS on a platform other than OMAP2+ when COMPILE_TEST is set but COMMON_CLK is not. The new approach is only 'select COMMON_CLK_PALMAS' if we know that we are on an OMAP5 platform and MFD_PALMAS is already set. This patch has survived thousands of randconfig builds and I don't see a remaining hole in the logic. Fixes: 3d4cf65e2d ("ASoC: omap: fix up SND_OMAP_SOC_OMAP_ABE_TWL6040 dependency") Signed-off-by: Arnd Bergmann Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/omap/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/omap/Kconfig b/sound/soc/omap/Kconfig index 6768e4f7d7d0e..30d0109703a93 100644 --- a/sound/soc/omap/Kconfig +++ b/sound/soc/omap/Kconfig @@ -100,12 +100,13 @@ config SND_OMAP_SOC_OMAP_TWL4030 config SND_OMAP_SOC_OMAP_ABE_TWL6040 tristate "SoC Audio support for OMAP boards using ABE and twl6040 codec" - depends on TWL6040_CORE && SND_OMAP_SOC && (ARCH_OMAP4 || SOC_OMAP5 || COMPILE_TEST) + depends on TWL6040_CORE && SND_OMAP_SOC + depends on ARCH_OMAP4 || (SOC_OMAP5 && MFD_PALMAS) || COMPILE_TEST select SND_OMAP_SOC_DMIC select SND_OMAP_SOC_MCPDM select SND_SOC_TWL6040 select SND_SOC_DMIC - select COMMON_CLK_PALMAS if MFD_PALMAS + select COMMON_CLK_PALMAS if (SOC_OMAP5 && MFD_PALMAS) help Say Y if you want to add support for SoC audio on OMAP boards using ABE and twl6040 codec. This driver currently supports: From 5d799a8697f8d2bfa228be19f1ac72d8e98ce895 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 10 May 2015 11:35:06 +0800 Subject: [PATCH 0216/2091] ASoC: wm8737: Fixup setting VMID Impedance control register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 14ba3ec1de043260cecd9e828ea2e3a0ad302893 upstream. According to the datasheet: R10 (0Ah) VMID Impedance Control BIT 3:2 VMIDSEL DEFAULT 00 DESCRIPTION: VMID impedance selection control 00: 75kΩ output 01: 300kΩ output 10: 2.5kΩ output WM8737_VMIDSEL_MASK is 0xC (VMIDSEL - [3:2]), so it needs to left shift WM8737_VMIDSEL_SHIFT bits for setting these bits. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8737.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8737.c b/sound/soc/codecs/wm8737.c index ada9ac1ba2c64..51171e457fa48 100644 --- a/sound/soc/codecs/wm8737.c +++ b/sound/soc/codecs/wm8737.c @@ -483,7 +483,8 @@ static int wm8737_set_bias_level(struct snd_soc_codec *codec, /* Fast VMID ramp at 2*2.5k */ snd_soc_update_bits(codec, WM8737_MISC_BIAS_CONTROL, - WM8737_VMIDSEL_MASK, 0x4); + WM8737_VMIDSEL_MASK, + 2 << WM8737_VMIDSEL_SHIFT); /* Bring VMID up */ snd_soc_update_bits(codec, WM8737_POWER_MANAGEMENT, @@ -497,7 +498,8 @@ static int wm8737_set_bias_level(struct snd_soc_codec *codec, /* VMID at 2*300k */ snd_soc_update_bits(codec, WM8737_MISC_BIAS_CONTROL, - WM8737_VMIDSEL_MASK, 2); + WM8737_VMIDSEL_MASK, + 1 << WM8737_VMIDSEL_SHIFT); break; From 98384697170b16d11a47d3e192ee4a0f5f8d7970 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 15 May 2015 09:15:16 +0800 Subject: [PATCH 0217/2091] ASoC: wm8955: Fix setting wrong register for WM8955_K_8_0_MASK bits commit 12c350050538c7dc779c083b7342bfd20f74949c upstream. WM8955_K_8_0_MASK bits is controlled by WM8955_PLL_CONTROL_3 rather than WM8955_PLL_CONTROL_2. Signed-off-by: Axel Lin Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8955.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8955.c b/sound/soc/codecs/wm8955.c index 00bec915d6522..03e04bf6c5ba2 100644 --- a/sound/soc/codecs/wm8955.c +++ b/sound/soc/codecs/wm8955.c @@ -298,7 +298,7 @@ static int wm8955_configure_clocking(struct snd_soc_codec *codec) snd_soc_update_bits(codec, WM8955_PLL_CONTROL_2, WM8955_K_17_9_MASK, (pll.k >> 9) & WM8955_K_17_9_MASK); - snd_soc_update_bits(codec, WM8955_PLL_CONTROL_2, + snd_soc_update_bits(codec, WM8955_PLL_CONTROL_3, WM8955_K_8_0_MASK, pll.k & WM8955_K_8_0_MASK); if (pll.k) From 142796fdb1a12dfc52a5dfa152849b19b62ef3bf Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 May 2015 09:04:06 +0800 Subject: [PATCH 0218/2091] ASoC: wm8903: Fix define for WM8903_VMID_RES_250K MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ebb6ad73e645b8f2d098dd3c41d2ff0da4146a02 upstream. VMID Control 0 BIT[2:1] is VMID Divider Enable and Select 00 = VMID disabled (for OFF mode) 01 = 2 x 50kΩ divider (for normal operation) 10 = 2 x 250kΩ divider (for low power standby) 11 = 2 x 5kΩ divider (for fast start-up) So WM8903_VMID_RES_250K should be 2 << 1, which is 4. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8903.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8903.h b/sound/soc/codecs/wm8903.h index db949311c0f20..0bb4a647755d8 100644 --- a/sound/soc/codecs/wm8903.h +++ b/sound/soc/codecs/wm8903.h @@ -172,7 +172,7 @@ extern int wm8903_mic_detect(struct snd_soc_codec *codec, #define WM8903_VMID_BUF_ENA_WIDTH 1 /* VMID_BUF_ENA */ #define WM8903_VMID_RES_50K 2 -#define WM8903_VMID_RES_250K 3 +#define WM8903_VMID_RES_250K 4 #define WM8903_VMID_RES_5K 6 /* From 01567c41cfbca8fdc28565f0c33e89bbcb87ae2d Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Thu, 11 Jun 2015 19:14:36 +0800 Subject: [PATCH 0219/2091] ASoC: wm8960: the enum of "DAC Polarity" should be wm8960_enum[1] commit a077e81ec61e07a7f86997d045109f06719fbffe upstream. the enum of "DAC Polarity" should be wm8960_enum[1]. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8960.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index e97a7615df850..8d7f632534406 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -245,7 +245,7 @@ SOC_SINGLE("PCM Playback -6dB Switch", WM8960_DACCTL1, 7, 1, 0), SOC_ENUM("ADC Polarity", wm8960_enum[0]), SOC_SINGLE("ADC High Pass Filter Switch", WM8960_DACCTL1, 0, 1, 0), -SOC_ENUM("DAC Polarity", wm8960_enum[2]), +SOC_ENUM("DAC Polarity", wm8960_enum[1]), SOC_SINGLE_BOOL_EXT("DAC Deemphasis Switch", 0, wm8960_get_deemph, wm8960_put_deemph), From 849349aa2dabe93aee517f337e2394ab62921393 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 4 Jun 2015 16:04:14 +0300 Subject: [PATCH 0220/2091] ASoC: tas2552: Fix kernel crash when the codec is loaded but not part of a card commit 80ba2669ec8c3e6517aa935001f6cb8809bf3df4 upstream. If the card is not part of any card the tas_data->codec is NULL since it is set only during snd_soc_codec_driver.probe, which is not yet called. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tas2552.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c index dfb4ff5cc9ea1..1630c602ec863 100644 --- a/sound/soc/codecs/tas2552.c +++ b/sound/soc/codecs/tas2552.c @@ -120,6 +120,9 @@ static void tas2552_sw_shutdown(struct tas2552_data *tas_data, int sw_shutdown) { u8 cfg1_reg; + if (!tas_data->codec) + return; + if (sw_shutdown) cfg1_reg = 0; else From 3041c3fb3bfc12bc2a5c9aa69d76cdb4c6f0476d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 4 Jun 2015 16:04:15 +0300 Subject: [PATCH 0221/2091] ASoC: tas2552: Fix kernel crash caused by wrong kcontrol entry commit 1cf0f44811b754b64283b11ef0e60cb0de07b29c upstream. SOC_DAPM_SINGLE("Playback AMP", ..) should not be under kcontrols. It causes kernel crash (NULL pointer) when the mixers are listed. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tas2552.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c index 1630c602ec863..18558595ba723 100644 --- a/sound/soc/codecs/tas2552.c +++ b/sound/soc/codecs/tas2552.c @@ -338,7 +338,6 @@ static DECLARE_TLV_DB_SCALE(dac_tlv, -7, 100, 24); static const struct snd_kcontrol_new tas2552_snd_controls[] = { SOC_SINGLE_TLV("Speaker Driver Playback Volume", TAS2552_PGA_GAIN, 0, 0x1f, 1, dac_tlv), - SOC_DAPM_SINGLE("Playback AMP", SND_SOC_NOPM, 0, 1, 0), }; static const struct reg_default tas2552_init_regs[] = { From eda7047688d78e9450a73d82fef825fc3c384c70 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 18 Jun 2015 14:50:18 -0400 Subject: [PATCH 0222/2091] libata: Do not blacklist Micron M500DC commit 243918be6393f643e513a26e7882e6ae06ff7717 upstream. Queued TRIM got disabled on Micron M500DC drives thanks to the "Micron_M500*" pattern we had in place to accommodate the previous generation of this drive family. Tweak the blacklist entry slightly so we only disable queued TRIM for the non-DC variants of M500 drives. Signed-off-by: Martin K. Petersen Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 577849c6611ac..48007023eab1c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4225,7 +4225,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, /* devices that don't properly handle queued TRIM commands */ - { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From 5c040d16a3786184356bb0baaa6bc74294bfd88f Mon Sep 17 00:00:00 2001 From: Aleksei Mamlin Date: Wed, 1 Jul 2015 13:48:30 +0300 Subject: [PATCH 0223/2091] libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for HP 250GB SATA disk VB0250EAVER commit 08c85d2a599d967ede38a847f5594447b6100642 upstream. Enabling AA on HP 250GB SATA disk VB0250EAVER causes errors: [ 3.788362] ata3.00: failed to enable AA (error_mask=0x1) [ 3.789243] ata3.00: failed to enable AA (error_mask=0x1) Add the ATA_HORKAGE_BROKEN_FPDMA_AA for this specific harddisk. tj: Collected FPDMA_AA entries and updated comment. Signed-off-by: Aleksei Mamlin Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 48007023eab1c..375ff9dc95477 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4174,9 +4174,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - /* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */ + /* drives which fail FPDMA_AA activation (some may freeze afterwards) */ { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + { "VB0250EAVER", "HPG7", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ From 822d2eac314e6d7ffc3e98d71676d3d9f6ef7894 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 8 Jul 2015 13:06:12 -0400 Subject: [PATCH 0224/2091] libata: increase the timeout when setting transfer mode commit d531be2ca2f27cca5f041b6a140504999144a617 upstream. I have a ST4000DM000 disk. If Linux is booted while the disk is spun down, the command that sets transfer mode causes the disk to spin up. The spin-up takes longer than the default 5s timeout, so the command fails and timeout is reported. Fix this by increasing the timeout to 15s, which is enough for the disk to spin up. Signed-off-by: Mikulas Patocka Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 375ff9dc95477..cb38f2b716b8d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4502,7 +4502,8 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev) else /* In the ancient relic department - skip all of this */ return 0; - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + /* On some disks, this command causes spin-up, so we need longer timeout */ + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 15000); DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; From a1fcca3abfc219a53e397d131d253603d9f04015 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 4 May 2015 21:54:21 -0400 Subject: [PATCH 0225/2091] libata: Fall back to unqueued READ LOG EXT if the DMA variant fails commit 5d3abf8ff67f49271a42c0f7fa4f20f9e046bf0e upstream. Some devices advertise support for the READ/WRITE LOG DMA EXT commands but fail when we try to issue them. This can lead to queued TRIM being unintentionally disabled since the relevant feature flag is located in a general purpose log page. Fall back to unqueued READ LOG EXT if the DMA variant fails while reading a log page. Signed-off-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 12 +++++++++++- include/linux/libata.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index cf0022ec07f24..f0931742a4392 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1507,13 +1507,17 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, { struct ata_taskfile tf; unsigned int err_mask; + bool dma = false; DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page); +retry: ata_tf_init(dev, &tf); - if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id)) { + if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) && + !(dev->horkage & ATA_HORKAGE_NO_NCQ_LOG)) { tf.command = ATA_CMD_READ_LOG_DMA_EXT; tf.protocol = ATA_PROT_DMA; + dma = true; } else { tf.command = ATA_CMD_READ_LOG_EXT; tf.protocol = ATA_PROT_PIO; @@ -1527,6 +1531,12 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, buf, sectors * ATA_SECT_SIZE, 0); + if (err_mask && dma) { + dev->horkage |= ATA_HORKAGE_NO_NCQ_LOG; + ata_dev_warn(dev, "READ LOG DMA EXT failed, trying unqueued\n"); + goto retry; + } + DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 28aeae46f355f..3831a3eaaa05e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -431,6 +431,7 @@ enum { ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ + ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ From e77d2137a071122135da5fa2418d5a7720bf7b07 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 4 May 2015 21:54:19 -0400 Subject: [PATCH 0226/2091] libata: Expose TRIM capability in sysfs commit f303074160d3401970ccae082014e1ee5a9a52c5 upstream. Create a sysfs "trim" attribute for each ata_device that displays whether DSM TRIM is "unsupported", "unqueued", "forced_unqueued" (blacklisted) or "queued". Signed-off-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-ata | 11 +++++++++++ drivers/ata/libata-transport.c | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index 0a932155cbbaf..9231daef3813e 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -90,6 +90,17 @@ gscr 130: SATA_PMP_GSCR_SII_GPIO Only valid if the device is a PM. +trim + + Shows the DSM TRIM mode currently used by the device. Valid + values are: + unsupported: Drive does not support DSM TRIM + unqueued: Drive supports unqueued DSM TRIM only + queued: Drive supports queued DSM TRIM + forced_unqueued: Drive's unqueued DSM support is known to be + buggy and only unqueued TRIM commands + are sent + spdn_cnt Number of time libata decided to lower the speed of link due to errors. diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 3227b7c8a05f8..d6c37bcd416d1 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -560,6 +560,27 @@ show_ata_dev_gscr(struct device *dev, static DEVICE_ATTR(gscr, S_IRUGO, show_ata_dev_gscr, NULL); +static ssize_t +show_ata_dev_trim(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ata_device *ata_dev = transport_class_to_dev(dev); + unsigned char *mode; + + if (!ata_id_has_trim(ata_dev->id)) + mode = "unsupported"; + else if (ata_dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) + mode = "forced_unqueued"; + else if (ata_fpdma_dsm_supported(ata_dev)) + mode = "queued"; + else + mode = "unqueued"; + + return snprintf(buf, 20, "%s\n", mode); +} + +static DEVICE_ATTR(trim, S_IRUGO, show_ata_dev_trim, NULL); + static DECLARE_TRANSPORT_CLASS(ata_dev_class, "ata_device", NULL, NULL, NULL); @@ -733,6 +754,7 @@ struct scsi_transport_template *ata_attach_transport(void) SETUP_DEV_ATTRIBUTE(ering); SETUP_DEV_ATTRIBUTE(id); SETUP_DEV_ATTRIBUTE(gscr); + SETUP_DEV_ATTRIBUTE(trim); BUG_ON(count > ATA_DEV_ATTRS); i->dev_attrs[count] = NULL; From 54150eb4cde1b758d788045d3ddc7ee336ce3d6d Mon Sep 17 00:00:00 2001 From: Arne Fitzenreiter Date: Wed, 15 Jul 2015 13:54:36 +0200 Subject: [PATCH 0227/2091] libata: add ATA_HORKAGE_NOTRIM commit 71d126fd28de2d4d9b7b2088dbccd7ca62fad6e0 upstream. Some devices lose data on TRIM whether queued or not. This patch adds a horkage to disable TRIM. tj: Collapsed unnecessary if() nesting. Signed-off-by: Arne Fitzenreiter Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 3 ++- drivers/ata/libata-transport.c | 2 ++ include/linux/libata.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3131adcc1f87e..641a61a59e89c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2568,7 +2568,8 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; - if (ata_id_has_trim(args->id)) { + if (ata_id_has_trim(args->id) && + !(dev->horkage & ATA_HORKAGE_NOTRIM)) { rbuf[14] |= 0x80; /* LBPME */ if (ata_id_has_zero_after_trim(args->id) && diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index d6c37bcd416d1..e2d94972962d6 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -569,6 +569,8 @@ show_ata_dev_trim(struct device *dev, if (!ata_id_has_trim(ata_dev->id)) mode = "unsupported"; + else if (ata_dev->horkage & ATA_HORKAGE_NOTRIM) + mode = "forced_unsupported"; else if (ata_dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) mode = "forced_unqueued"; else if (ata_fpdma_dsm_supported(ata_dev)) diff --git a/include/linux/libata.h b/include/linux/libata.h index 3831a3eaaa05e..f24e20ea9299c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -432,6 +432,8 @@ enum { ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ + ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ + /* DMA mask for user DMA control: User visible values; DO NOT renumber */ From 3b1c86a973f2e665c1697d965e459bf1b6713825 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Mon, 13 Jul 2015 11:48:23 -0500 Subject: [PATCH 0228/2091] libata: add ATA_HORKAGE_MAX_SEC_1024 to revert back to previous max_sectors limit commit af34d637637eabaf49406eb35c948cd51ba262a6 upstream. Since no longer limiting max_sectors to BLK_DEF_MAX_SECTORS (commit 34b48db66e08), data corruption may occur on ST380013AS drive configured on 82801JI (ICH10 Family) SATA controller. This patch will allow the driver to limit max_sectors as before # cat /sys/block/sdb/queue/max_sectors_kb 512 I was able to double the max_sectors_kb value up to 16384 on linux-4.2.0-rc2 before seeing corruption, but seems safer to use previous limit. Without this patch max_sectors_kb will be 32767. tj: Minor comment update. Reported-by: Jeff Moyer Signed-off-by: David Milburn Signed-off-by: Tejun Heo Fixes: 34b48db66e08 ("block: remove artifical max_hw_sectors cap") Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 10 ++++++++++ include/linux/ata.h | 1 + include/linux/libata.h | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cb38f2b716b8d..89527834fa07f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2478,6 +2478,10 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_1024) + dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_1024, + dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) dev->max_sectors = ATA_MAX_SECTORS_LBA48; @@ -4146,6 +4150,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, { "Slimtype DVD A DS8A9SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, + /* + * Causes silent data corruption with higher max sects. + * http://lkml.kernel.org/g/x49wpy40ysk.fsf@segfault.boston.devel.redhat.com + */ + { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, + /* Devices we expect to fail diagnostics */ /* Devices where NCQ should be avoided */ diff --git a/include/linux/ata.h b/include/linux/ata.h index b666b773e1118..533dbb6428f5a 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -45,6 +45,7 @@ enum { ATA_SECT_SIZE = 512, ATA_MAX_SECTORS_128 = 128, ATA_MAX_SECTORS = 256, + ATA_MAX_SECTORS_1024 = 1024, ATA_MAX_SECTORS_LBA48 = 65535,/* TODO: 65536? */ ATA_MAX_SECTORS_TAPE = 65535, diff --git a/include/linux/libata.h b/include/linux/libata.h index f24e20ea9299c..e0e33787c485e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -433,7 +433,7 @@ enum { ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ - + ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ From 5f5cc9ddc13efb5f1d2797e9b8df1d4c6e97296b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 15 Jul 2015 21:03:23 -0400 Subject: [PATCH 0229/2091] libata: Do not blacklist M510DC commit 9051bd393cf25e76dfb45409792719a854661500 upstream. A new Micron drive was just announced, once again recycling the first part of the model string. Add an underscore to the M510/M550 pattern to avoid picking up the new DC drive. Signed-off-by: Martin K. Petersen Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 89527834fa07f..549191bf78991 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4240,7 +4240,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Micron_M5[15]0*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + { "Micron_M5[15]0_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From 5a01a6e9d6f4a24039ceec3f9a38b196395d096d Mon Sep 17 00:00:00 2001 From: Arne Fitzenreiter Date: Wed, 15 Jul 2015 13:54:37 +0200 Subject: [PATCH 0230/2091] libata: force disable trim for SuperSSpeed S238 commit cda57b1b05cf7b8b99ab4b732bea0b05b6c015cc upstream. This device loses blocks, often the partition table area, on trim. Disable TRIM. http://pcengines.ch/msata16a.htm Signed-off-by: Arne Fitzenreiter Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 549191bf78991..41c99be9bd412 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4249,6 +4249,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Samsung SSD 8*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + /* devices that don't properly handle TRIM commands */ + { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, + /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT * (Return Zero After Trim) flags in the ATA Command Set are From 2dc7e2d5d7dca18c9f8a8cf5c0d2d3c5591af9a8 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Bhatta Date: Thu, 21 May 2015 15:46:47 +0530 Subject: [PATCH 0231/2091] usb: dwc3: gadget: return error if command sent to DGCMD register fails commit 891b1dc022955d36cf4c0f42d383226a930db7ed upstream. We need to return error to caller if command is not sent to controller succesfully. Signed-off-by: Subbaraya Sundeep Bhatta Fixes: b09bb64239c8 (usb: dwc3: gadget: implement Global Command support) Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8946c32047e99..fcbe120ba8ce9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,6 +291,8 @@ int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned cmd, u32 param) dwc3_trace(trace_dwc3_gadget, "Command Complete --> %d", DWC3_DGCMD_STATUS(reg)); + if (DWC3_DGCMD_STATUS(reg)) + return -EINVAL; return 0; } From f46bc2957ec103ae9dccf4eb55ad2c40173c5144 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Bhatta Date: Thu, 21 May 2015 15:46:48 +0530 Subject: [PATCH 0232/2091] usb: dwc3: gadget: return error if command sent to DEPCMD register fails commit 76e838c9f7765f9a6205b4d558d75a66104bc60d upstream. We need to return error to caller if command is not sent to controller succesfully. Signed-off-by: Subbaraya Sundeep Bhatta Fixes: 72246da40f37 (usb: Introduce DesignWare USB3 DRD Driver) Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index fcbe120ba8ce9..55b5edc191198 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -330,6 +330,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep, dwc3_trace(trace_dwc3_gadget, "Command Complete --> %d", DWC3_DEPCMD_STATUS(reg)); + if (DWC3_DEPCMD_STATUS(reg)) + return -EINVAL; return 0; } From 0343379cabd1736cce291f4555d239a66813f987 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 29 May 2015 10:06:38 -0500 Subject: [PATCH 0233/2091] usb: dwc3: gadget: don't clear EP_BUSY too early commit e18b7975c885bc3a938b9a76daf32957ea0235fa upstream. In case of non-Isochronous transfers, we don't want to clear DWC3_EP_BUSY flag until XferComplete event. That's because XferInProgress was only enabled so we can recycle TRBs and usb_requests quicker, but there are still other pending requests being transferred. In order to make sure we don't allow for another StartTransfer command while the HW is still processing other transfers, we must keep DWC3_EP_BUSY flag set and this what this patch does. Fixes: f3af36511e60 (usb: dwc3: gadget: always enable IOC on bulk/interrupt transfers) Reported-by: sundeep subbaraya Tested-by: sundeep subbaraya Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 55b5edc191198..333a7c0078fcf 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1906,12 +1906,16 @@ static void dwc3_endpoint_transfer_complete(struct dwc3 *dwc, { unsigned status = 0; int clean_busy; + u32 is_xfer_complete; + + is_xfer_complete = (event->endpoint_event == DWC3_DEPEVT_XFERCOMPLETE); if (event->status & DEPEVT_STATUS_BUSERR) status = -ECONNRESET; clean_busy = dwc3_cleanup_done_reqs(dwc, dep, event, status); - if (clean_busy) + if (clean_busy && (is_xfer_complete || + usb_endpoint_xfer_isoc(dep->endpoint.desc))) dep->flags &= ~DWC3_EP_BUSY; /* From b7473317530efd0b80cfdeba8a196a4d8fe49d69 Mon Sep 17 00:00:00 2001 From: John Youn Date: Mon, 17 Sep 2001 00:00:00 -0700 Subject: [PATCH 0234/2091] usb: dwc3: Reset the transfer resource index on SET_INTERFACE commit aebda618718157a69c0dc0adb978d69bc2b8723c upstream. This fixes an issue introduced in commit b23c843992b6 (usb: dwc3: gadget: fix DEPSTARTCFG for non-EP0 EPs) that made sure we would only use DEPSTARTCFG once per SetConfig. The trick is that we should use one DEPSTARTCFG per SetConfig *OR* SetInterface. SetInterface was completely missed from the original patch. This problem became aparent after commit 76e838c9f776 (usb: dwc3: gadget: return error if command sent to DEPCMD register fails) added checking of the return status of device endpoint commands. 'Set Endpoint Transfer Resource' command was caught failing occasionally. This is because the Transfer Resource Index was not getting reset during a SET_INTERFACE request. Finally, to fix the issue, was we have to do is make sure that our start_config_issued flag gets reset whenever we receive a SetInterface request. To verify the problem (and its fix), all we have to do is run test 9 from testusb with 'testusb -t 9 -s 2048 -a -c 5000'. Tested-by: Huang Rui Tested-by: Subbaraya Sundeep Bhatta Fixes: b23c843992b6 (usb: dwc3: gadget: fix DEPSTARTCFG for non-EP0 EPs) Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 2ef3c8d6a9dbd..69e769c35cf5d 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -727,6 +727,10 @@ static int dwc3_ep0_std_request(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY"); ret = dwc3_ep0_set_isoch_delay(dwc, ctrl); break; + case USB_REQ_SET_INTERFACE: + dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE"); + dwc->start_config_issued = false; + /* Fall through */ default: dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver"); ret = dwc3_ep0_delegate_req(dwc, ctrl); From 2c2e438cbb85386f4622626bfcb20f799980c9e1 Mon Sep 17 00:00:00 2001 From: Robert Schlabbach Date: Tue, 26 May 2015 00:27:30 +0200 Subject: [PATCH 0235/2091] usb: core: Fix USB 3.0 devices lost in NOTATTACHED state after a hub port reset commit fb6d1f7df5d25299fd7b3e84b72b8851d3634764 upstream. Fix USB 3.0 devices lost in NOTATTACHED state after a hub port reset. Dissolve the function hub_port_finish_reset() completely and divide the actions to be taken into those which need to be done after each reset attempt and those which need to be done after the full procedure is complete, and place them in the appropriate places in hub_port_reset(). Also, remove an unneeded forward declaration of hub_port_reset(). Verbose Problem Description: USB 3.0 devices may be "lost for good" during a hub port reset. This makes Linux unable to boot from USB 3.0 devices in certain constellations of host controllers and devices, because the USB device is lost during initialization, preventing the rootfs from being mounted. The underlying problem is that in the affected constellations, during the processing inside hub_port_reset(), the hub link state goes from 0 to SS.inactive after the initial reset, and back to 0 again only after the following "warm" reset. However, hub_port_finish_reset() is called after each reset attempt and sets the state the connected USB device based on the "preliminary" status of the hot reset to USB_STATE_NOTATTACHED due to SS.inactive, yet when the following warm reset is complete and hub_port_finish_reset() is called again, its call to set the device to USB_STATE_DEFAULT is blocked by usb_set_device_state() which does not allow taking USB devices out of USB_STATE_NOTATTACHED state. Thanks to Alan Stern for guiding me to the proper solution and how to submit it. Link: http://lkml.kernel.org/r/trinity-25981484-72a9-4d46-bf17-9c1cf9301a31-1432073240136%20()%203capp-gmx-bs27 Signed-off-by: Robert Schlabbach Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 82 +++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 49 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3b71516877768..fd787debb4c87 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2616,9 +2616,6 @@ static bool use_new_scheme(struct usb_device *udev, int retry) return USE_NEW_SCHEME(retry); } -static int hub_port_reset(struct usb_hub *hub, int port1, - struct usb_device *udev, unsigned int delay, bool warm); - /* Is a USB 3.0 port in the Inactive or Compliance Mode state? * Port worm reset is required to recover */ @@ -2706,44 +2703,6 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, return 0; } -static void hub_port_finish_reset(struct usb_hub *hub, int port1, - struct usb_device *udev, int *status) -{ - switch (*status) { - case 0: - /* TRSTRCY = 10 ms; plus some extra */ - msleep(10 + 40); - if (udev) { - struct usb_hcd *hcd = bus_to_hcd(udev->bus); - - update_devnum(udev, 0); - /* The xHC may think the device is already reset, - * so ignore the status. - */ - if (hcd->driver->reset_device) - hcd->driver->reset_device(hcd, udev); - } - /* FALL THROUGH */ - case -ENOTCONN: - case -ENODEV: - usb_clear_port_feature(hub->hdev, - port1, USB_PORT_FEAT_C_RESET); - if (hub_is_superspeed(hub->hdev)) { - usb_clear_port_feature(hub->hdev, port1, - USB_PORT_FEAT_C_BH_PORT_RESET); - usb_clear_port_feature(hub->hdev, port1, - USB_PORT_FEAT_C_PORT_LINK_STATE); - usb_clear_port_feature(hub->hdev, port1, - USB_PORT_FEAT_C_CONNECTION); - } - if (udev) - usb_set_device_state(udev, *status - ? USB_STATE_NOTATTACHED - : USB_STATE_DEFAULT); - break; - } -} - /* Handle port reset and port warm(BH) reset (for USB3 protocol ports) */ static int hub_port_reset(struct usb_hub *hub, int port1, struct usb_device *udev, unsigned int delay, bool warm) @@ -2767,13 +2726,10 @@ static int hub_port_reset(struct usb_hub *hub, int port1, * If the caller hasn't explicitly requested a warm reset, * double check and see if one is needed. */ - status = hub_port_status(hub, port1, - &portstatus, &portchange); - if (status < 0) - goto done; - - if (hub_port_warm_reset_required(hub, port1, portstatus)) - warm = true; + if (hub_port_status(hub, port1, &portstatus, &portchange) == 0) + if (hub_port_warm_reset_required(hub, port1, + portstatus)) + warm = true; } clear_bit(port1, hub->warm_reset_bits); @@ -2799,11 +2755,19 @@ static int hub_port_reset(struct usb_hub *hub, int port1, /* Check for disconnect or reset */ if (status == 0 || status == -ENOTCONN || status == -ENODEV) { - hub_port_finish_reset(hub, port1, udev, &status); + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_RESET); if (!hub_is_superspeed(hub->hdev)) goto done; + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_BH_PORT_RESET); + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_PORT_LINK_STATE); + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_CONNECTION); + /* * If a USB 3.0 device migrates from reset to an error * state, re-issue the warm reset. @@ -2836,6 +2800,26 @@ static int hub_port_reset(struct usb_hub *hub, int port1, dev_err(&port_dev->dev, "Cannot enable. Maybe the USB cable is bad?\n"); done: + if (status == 0) { + /* TRSTRCY = 10 ms; plus some extra */ + msleep(10 + 40); + if (udev) { + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + + update_devnum(udev, 0); + /* The xHC may think the device is already reset, + * so ignore the status. + */ + if (hcd->driver->reset_device) + hcd->driver->reset_device(hcd, udev); + + usb_set_device_state(udev, USB_STATE_DEFAULT); + } + } else { + if (udev) + usb_set_device_state(udev, USB_STATE_NOTATTACHED); + } + if (!hub_is_superspeed(hub->hdev)) up_read(&ehci_cf_port_reset_rwsem); From f32f8e6652d70c8453129fc4b9048a1eef6bc48c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 18 May 2015 15:29:51 +0300 Subject: [PATCH 0236/2091] USB: devio: fix a condition in async_completed() commit 83ed07c5db71bc02bd646d6eb60b48908235cdf9 upstream. Static checkers complain that the current condition is never true. It seems pretty likely that it's a typo and "URB" was intended instead of "USB". Fixes: 3d97ff63f899 ('usbdevfs: Use scatter-gather lists for large bulk transfers') Signed-off-by: Dan Carpenter Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 4b0448c26810c..986abde076831 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -513,7 +513,7 @@ static void async_completed(struct urb *urb) snoop(&urb->dev->dev, "urb complete\n"); snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length, as->status, COMPLETE, NULL, 0); - if ((urb->transfer_flags & URB_DIR_MASK) == USB_DIR_IN) + if ((urb->transfer_flags & URB_DIR_MASK) == URB_DIR_IN) snoop_urb_data(urb, urb->actual_length); if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET && From d1221a608bd19cc6b539cffacfad3e104fcff0ef Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 16 Apr 2015 18:03:04 +1000 Subject: [PATCH 0237/2091] phy: twl4030-usb: remove incorrect pm_runtime_get_sync() in probe function. commit 4724e27114c4a7eceeee07db227a17fcab6f165c upstream. The USB phy should initialize with power-off, and will be powered on by the USB system when a cable connection is detected. Having this pm_runtime_get_sync() during probe causes the phy to *always* be powered on. Removing it returns to sensible power management. Fixes: 96be39ab34b77c6f6f5cd6ae03aac6c6449ee5c4 Signed-off-by: NeilBrown Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-twl4030-usb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index bc42d6a8939f4..8882afbef6885 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -711,7 +711,6 @@ static int twl4030_usb_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, 2000); pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); /* Our job is to use irqs and status from the power module * to keep the transceiver disabled when nothing's connected. From cc4c7d238d166b4e92878a113f00f7e6d4044041 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 15 Jun 2015 04:37:00 +0000 Subject: [PATCH 0238/2091] usb: phy: mxs: suspend to RAM causes NULL pointer dereference commit 543aa4867d4a2dff5fc11e1b688197ee3bad7f89 upstream. Triggering suspend to RAM via sysfs on a i.MX28 causes a NULL pointer dereference. This patch avoids the oops in mxs_phy_get_vbus_status() by aborting since there is no syscon available. Signed-off-by: Stefan Wahren Fixes: efdbd3a5d6e ("usb: phy: mxs: do not set PWD.RXPWD1PT1 for low speed connection") Acked-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-mxs-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c index 8f7cb068d29ba..3fcc0483a0811 100644 --- a/drivers/usb/phy/phy-mxs-usb.c +++ b/drivers/usb/phy/phy-mxs-usb.c @@ -217,6 +217,9 @@ static bool mxs_phy_get_vbus_status(struct mxs_phy *mxs_phy) { unsigned int vbus_value; + if (!mxs_phy->regmap_anatop) + return false; + if (mxs_phy->port_id == 0) regmap_read(mxs_phy->regmap_anatop, ANADIG_USB1_VBUS_DET_STAT, From a808fa062ff7ec85b7b0a344df52329083f044b0 Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Thu, 2 Jul 2015 01:04:18 -0400 Subject: [PATCH 0239/2091] phy: berlin-usb: fix divider for BG2CD commit 96696a9df935d15fd2e89603454c20a692ec232a upstream. The marvell,berlin2cd-usb-phy compatible incorrectly sets the PLL divider to BG2's value instead of BG2CD/BG2Q's. Change it to the right value. Signed-off-by: Thomas Hebb Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-berlin-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-berlin-usb.c b/drivers/phy/phy-berlin-usb.c index c6fc95b530835..ab54f28644510 100644 --- a/drivers/phy/phy-berlin-usb.c +++ b/drivers/phy/phy-berlin-usb.c @@ -106,8 +106,8 @@ static const u32 phy_berlin_pll_dividers[] = { /* Berlin 2 */ CLK_REF_DIV(0xc) | FEEDBACK_CLK_DIV(0x54), - /* Berlin 2CD */ - CLK_REF_DIV(0x6) | FEEDBACK_CLK_DIV(0x55), + /* Berlin 2CD/Q */ + CLK_REF_DIV(0xc) | FEEDBACK_CLK_DIV(0x54), }; struct phy_berlin_usb_priv { From c856349b0e83bf406242a1a3e44f8391d71fd5a8 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 11 Jun 2015 22:12:11 +0530 Subject: [PATCH 0240/2091] usb: gadget: composite: Fix NULL pointer dereference commit b4c21f0bdd2c0cd5d5be1bb56f0a28dae5041eed upstream. commit f563d230903210acc ("usb: gadget: composite: add req_match method to usb_function") accesses cdev->config even before set config is invoked causing a NULL pointer dereferencing error while running Lecroy Mass Storage Compliance test. Fix it here by accessing cdev->config only if it is non NULL. Fixes: commit f563d230903210acc ("usb: gadget: composite: add req_match method to usb_function"). Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 4e3447bbd0976..58b4657fc721d 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1758,10 +1758,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) * take such requests too, if that's ever needed: to work * in config 0, etc. */ - list_for_each_entry(f, &cdev->config->functions, list) - if (f->req_match && f->req_match(f, ctrl)) - goto try_fun_setup; - f = NULL; + if (cdev->config) { + list_for_each_entry(f, &cdev->config->functions, list) + if (f->req_match && f->req_match(f, ctrl)) + goto try_fun_setup; + f = NULL; + } + switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_INTERFACE: if (!cdev->config || intf >= MAX_CONFIG_INTERFACES) From 4db43b5199062e0c6f0424462ce8c4978bcecf15 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Mon, 18 May 2015 16:02:07 +0100 Subject: [PATCH 0241/2091] usb: gadget: f_fs: do not set cancel function on synchronous {read,write} commit 4088acf1e845aba35f30fb91dee10649edbd0e84 upstream. do not try to set cancel function in synchronous operations in ffs_epfile_{read,write}_iter. Acked-by: Al Viro Signed-off-by: Rui Miguel Silva Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 45b8c8b338df7..6e7be91e6097c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -924,7 +924,8 @@ static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from) kiocb->private = p; - kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); + if (p->aio) + kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); res = ffs_epfile_io(kiocb->ki_filp, p); if (res == -EIOCBQUEUED) @@ -968,7 +969,8 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to) kiocb->private = p; - kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); + if (p->aio) + kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); res = ffs_epfile_io(kiocb->ki_filp, p); if (res == -EIOCBQUEUED) From 35dff3a0bd57b32df5d7990fddbf4e54d96a4621 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 19 Jul 2015 23:13:28 +0700 Subject: [PATCH 0242/2091] usb: gadget: mv_udc_core: fix phy_regs I/O memory leak commit 53e20f2eb161fbe9eea28b54dccc870cec94eca2 upstream. There was an omission in transition to devm_xxx resource handling. iounmap(udc->phy_regs) were removed, but ioremap() was left without devm_. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Fixes: 3517c31a8ece6 ("usb: gadget: mv_udc: use devm_xxx for probe") Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/mv_udc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c index d32160d6463f5..5da37c957b53c 100644 --- a/drivers/usb/gadget/udc/mv_udc_core.c +++ b/drivers/usb/gadget/udc/mv_udc_core.c @@ -2167,7 +2167,7 @@ static int mv_udc_probe(struct platform_device *pdev) return -ENODEV; } - udc->phy_regs = ioremap(r->start, resource_size(r)); + udc->phy_regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (udc->phy_regs == NULL) { dev_err(&pdev->dev, "failed to map phy I/O memory\n"); return -EBUSY; From 1fd1fe35f15436869e182c1fb075d9f837dcf0c2 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 19 Jun 2015 23:56:34 +0200 Subject: [PATCH 0243/2091] usb: f_mass_storage: limit number of reported LUNs commit 8515bac01a983d277148e4fcc5f235bf603de577 upstream. Mass storage function created via configfs always reports eight LUNs to the hosts even if only one LUN has been configured. Adjust the number when the USB function is allocated based on LUNs that user has created. Tested-by: Gregory CLEMENT Signed-off-by: Michal Nazarewicz Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3cc109f3c9c80..15c3071550370 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2786,7 +2786,7 @@ int fsg_common_set_nluns(struct fsg_common *common, int nluns) return -EINVAL; } - curlun = kcalloc(nluns, sizeof(*curlun), GFP_KERNEL); + curlun = kcalloc(FSG_MAX_LUNS, sizeof(*curlun), GFP_KERNEL); if (unlikely(!curlun)) return -ENOMEM; @@ -2796,8 +2796,6 @@ int fsg_common_set_nluns(struct fsg_common *common, int nluns) common->luns = curlun; common->nluns = nluns; - pr_info("Number of LUNs=%d\n", common->nluns); - return 0; } EXPORT_SYMBOL_GPL(fsg_common_set_nluns); @@ -3563,14 +3561,26 @@ static struct usb_function *fsg_alloc(struct usb_function_instance *fi) struct fsg_opts *opts = fsg_opts_from_func_inst(fi); struct fsg_common *common = opts->common; struct fsg_dev *fsg; + unsigned nluns, i; fsg = kzalloc(sizeof(*fsg), GFP_KERNEL); if (unlikely(!fsg)) return ERR_PTR(-ENOMEM); mutex_lock(&opts->lock); + if (!opts->refcnt) { + for (nluns = i = 0; i < FSG_MAX_LUNS; ++i) + if (common->luns[i]) + nluns = i + 1; + if (!nluns) + pr_warn("No LUNS defined, continuing anyway\n"); + else + common->nluns = nluns; + pr_info("Number of LUNs=%u\n", common->nluns); + } opts->refcnt++; mutex_unlock(&opts->lock); + fsg->function.name = FSG_DRIVER_DESC; fsg->function.bind = fsg_bind; fsg->function.unbind = fsg_unbind; From 13d5f386c4978fd3ab9bbe92013ede0c21bbaed1 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 2 Jun 2015 13:03:36 -0500 Subject: [PATCH 0244/2091] usb: musb: host: rely on port_mode to call musb_start() commit be9d39881fc4fa39a64b6eed6bab5d9ee5125344 upstream. Currently, we're calling musb_start() twice for DRD ports in some situations. This has been observed to cause enumeration issues after suspend/resume cycles with AM335x. In order to fix the problem, we just have to fix the check on musb_has_gadget() so that it only returns true if current mode is Host and ignore the fact that we have or not a gadget driver loaded. Fixes: ae44df2e21b5 (usb: musb: call musb_start() only once in OTG mode) Cc: Sebastian Andrzej Siewior Tested-by: Sekhar Nori Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_virthub.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 86c4b533e90b3..4731baca377f9 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -273,9 +273,7 @@ static int musb_has_gadget(struct musb *musb) #ifdef CONFIG_USB_MUSB_HOST return 1; #else - if (musb->port_mode == MUSB_PORT_MODE_HOST) - return 1; - return musb->g.dev.driver != NULL; + return musb->port_mode == MUSB_PORT_MODE_HOST; #endif } From 7e5423dc7b2589e2f781274da8f112fb13c07a50 Mon Sep 17 00:00:00 2001 From: Peter Sanford Date: Thu, 25 Jun 2015 17:40:05 -0700 Subject: [PATCH 0245/2091] USB: cp210x: add ID for Aruba Networks controllers commit f98a7aa81eeeadcad25665c3501c236d531d4382 upstream. Add the USB serial console device ID for Aruba Networks 7xxx series controllers which have a USB port for their serial console. Signed-off-by: Peter Sanford Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index ffd739e31bfc1..eac7ccaa3c859 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -187,6 +187,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1FB9, 0x0602) }, /* Lake Shore Model 648 Magnet Power Supply */ { USB_DEVICE(0x1FB9, 0x0700) }, /* Lake Shore Model 737 VSM Controller */ { USB_DEVICE(0x1FB9, 0x0701) }, /* Lake Shore Model 776 Hall Matrix */ + { USB_DEVICE(0x2626, 0xEA60) }, /* Aruba Networks 7xxx USB Serial Console */ { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */ { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ From 8701f782b511f75be55315c92426f7197908ea86 Mon Sep 17 00:00:00 2001 From: Claudio Cappelli Date: Wed, 10 Jun 2015 20:38:30 +0200 Subject: [PATCH 0246/2091] USB: option: add 2020:4000 ID commit f6d7fb37f92622479ef6da604f27561f5045ba1e upstream. Add device Olivetti Olicard 300 (Network Connect: MT6225) - IDs 2020:4000. T: Bus=01 Lev=02 Prnt=04 Port=00 Cnt=01 Dev#= 10 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=4000 Rev=03.00 S: Manufacturer=Network Connect S: Product=MT6225 C: #Ifs= 7 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=02 Prot=01 Driver=option I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 6 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Signed-off-by: Claudio Cappelli Suggested-by: Lars Melin [johan: amend commit message with devices info ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f0c0c53359ad6..19b85ee98a724 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1765,6 +1765,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { } /* Terminating entry */ From ba4ba9b026589f920909f6a5f2e3bfa861b162fb Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:26:37 +0200 Subject: [PATCH 0247/2091] USB: serial: Destroy serial_minors IDR on module exit commit d23f47d4927fd2f61b3a754d83c7bcec215b5cfe upstream. Destroy serial_minors IDR on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 529066bbc7e81..46f1f13b41f14 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1306,6 +1306,7 @@ static void __exit usb_serial_exit(void) tty_unregister_driver(usb_serial_tty_driver); put_tty_driver(usb_serial_tty_driver); bus_unregister(&usb_serial_bus_type); + idr_destroy(&serial_minors); } From 2c231aeff7e41ddd011417973262b78914b45899 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 30 Jun 2015 11:25:54 -0400 Subject: [PATCH 0248/2091] USB: OHCI: Fix race between ED unlink and URB submission commit 7d8021c967648accd1b78e5e1ddaad655cd2c61f upstream. This patch fixes a bug introduced by commit 977dcfdc6031 ("USB: OHCI: don't lose track of EDs when a controller dies"). The commit changed ed_state from ED_UNLINK to ED_IDLE too early, before finish_urb() had been called. The user-visible consequence is that the driver occasionally crashes or locks up when an URB is submitted while another URB for the same endpoint is being unlinked. This patch moves the ED state change later, to the right place. The drawback is that now we may unnecessarily execute some instructions multiple times when a controller dies. Since controllers dying is an exceptional occurrence, a little wasted time won't matter. Signed-off-by: Alan Stern Reported-by: Heiko Przybyl Tested-by: Heiko Przybyl Fixes: 977dcfdc60311e7aa571cabf6f39c36dde13339e Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-q.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index 1463c398d3220..fe1d5fc7da2d9 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -980,10 +980,6 @@ static void finish_unlinks(struct ohci_hcd *ohci) int completed, modified; __hc32 *prev; - /* Is this ED already invisible to the hardware? */ - if (ed->state == ED_IDLE) - goto ed_idle; - /* only take off EDs that the HC isn't using, accounting for * frame counter wraps and EDs with partially retired TDs */ @@ -1011,12 +1007,10 @@ static void finish_unlinks(struct ohci_hcd *ohci) } /* ED's now officially unlinked, hc doesn't see */ - ed->state = ED_IDLE; ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H); ed->hwNextED = 0; wmb(); ed->hwINFO &= ~cpu_to_hc32(ohci, ED_SKIP | ED_DEQUEUE); -ed_idle: /* reentrancy: if we drop the schedule lock, someone might * have modified this list. normally it's just prepending @@ -1087,6 +1081,7 @@ static void finish_unlinks(struct ohci_hcd *ohci) if (list_empty(&ed->td_list)) { *last = ed->ed_next; ed->ed_next = NULL; + ed->state = ED_IDLE; list_del(&ed->in_use_list); } else if (ohci->rh_state == OHCI_RH_RUNNING) { *last = ed->ed_next; From e951f84074b84a3f5aecbffd01da74576e0068d5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 16 Jun 2015 09:08:26 +0800 Subject: [PATCH 0249/2091] usb: core: lpm: set lpm_capable for root hub device commit 2d2a316765d956bc5cb6bb367b2ec52ca59ab8e9 upstream. Commit 25cd2882e2fc ("usb/xhci: Change how we indicate a host supports Link PM.") removed the code to set lpm_capable for USB 3.0 super-speed root hub. The intention of that change was to avoid touching usb core internal field, a.k.a. lpm_capable, and let usb core to set it by checking U1 and U2 exit latency values in the descriptor. Usb core checks and sets lpm_capable in hub_port_init(). Unfortunately, root hub is a special usb device as it has no parent. Hub_port_init() will never be called for a root hub device. That means lpm_capable will by no means be set for the root hub. As the result, lpm isn't functional at all in Linux kernel. This patch add the code to check and set lpm_capable when registering a root hub device. It could be back-ported to kernels as old as v3.15, that contains the Commit 25cd2882e2fc ("usb/xhci: Change how we indicate a host supports Link PM."). Reported-by: Kevin Strasser Signed-off-by: Lu Baolu Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 7 +++++-- drivers/usb/core/hub.c | 2 +- drivers/usb/core/usb.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 45a915ccd71c0..1c1385e3a8248 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1022,9 +1022,12 @@ static int register_root_hub(struct usb_hcd *hcd) dev_name(&usb_dev->dev), retval); return (retval < 0) ? retval : -EMSGSIZE; } - if (usb_dev->speed == USB_SPEED_SUPER) { + + if (le16_to_cpu(usb_dev->descriptor.bcdUSB) >= 0x0201) { retval = usb_get_bos_descriptor(usb_dev); - if (retval < 0) { + if (!retval) { + usb_dev->lpm_capable = usb_device_supports_lpm(usb_dev); + } else if (usb_dev->speed == USB_SPEED_SUPER) { mutex_unlock(&usb_bus_list_lock); dev_dbg(parent_dev, "can't read %s bos descriptor %d\n", dev_name(&usb_dev->dev), retval); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index fd787debb4c87..1e9a8c9aa531f 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -122,7 +122,7 @@ struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev) return usb_get_intfdata(hdev->actconfig->interface[0]); } -static int usb_device_supports_lpm(struct usb_device *udev) +int usb_device_supports_lpm(struct usb_device *udev) { /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 7eb1e26798e5f..457255a3306a3 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -65,6 +65,7 @@ extern int usb_hub_init(void); extern void usb_hub_cleanup(void); extern int usb_major_init(void); extern void usb_major_cleanup(void); +extern int usb_device_supports_lpm(struct usb_device *udev); #ifdef CONFIG_PM From 0906f9b902c18323bd82945a272432ba3330191a Mon Sep 17 00:00:00 2001 From: AMAN DEEP Date: Tue, 21 Jul 2015 17:20:27 +0300 Subject: [PATCH 0250/2091] usb: xhci: Bugfix for NULL pointer deference in xhci_endpoint_init() function commit 3496810663922617d4b706ef2780c279252ddd6a upstream. virt_dev->num_cached_rings counts on freed ring and is not updated correctly. In xhci_free_or_cache_endpoint_ring() function, the free ring is added into cache and then num_rings_cache is incremented as below: virt_dev->ring_cache[rings_cached] = virt_dev->eps[ep_index].ring; virt_dev->num_rings_cached++; here, free ring pointer is added to a current index and then index is incremented. So current index always points to empty location in the ring cache. For getting available free ring, current index should be decremented first and then corresponding ring buffer value should be taken from ring cache. But In function xhci_endpoint_init(), the num_rings_cached index is accessed before decrement. virt_dev->eps[ep_index].new_ring = virt_dev->ring_cache[virt_dev->num_rings_cached]; virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL; virt_dev->num_rings_cached--; This is bug in manipulating the index of ring cache. And it should be as below: virt_dev->num_rings_cached--; virt_dev->eps[ep_index].new_ring = virt_dev->ring_cache[virt_dev->num_rings_cached]; virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL; Signed-off-by: Aman Deep Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f8336408ef07c..3e442f77a2b93 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1427,10 +1427,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Attempt to use the ring cache */ if (virt_dev->num_rings_cached == 0) return -ENOMEM; + virt_dev->num_rings_cached--; virt_dev->eps[ep_index].new_ring = virt_dev->ring_cache[virt_dev->num_rings_cached]; virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL; - virt_dev->num_rings_cached--; xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring, 1, type); } From 3bf9ad4c48534dd0d7f201e315c340069a244664 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 20 May 2015 10:30:32 +0100 Subject: [PATCH 0251/2091] dm cache: fix race when issuing a POLICY_REPLACE operation commit fb4100ae7f312c3d614b37621c2b17b3b7cf65f8 upstream. There is a race between a policy deciding to replace a cache entry, the core target writing back any dirty data from this block, and other IO threads doing IO to the same block. This sort of problem is avoided most of the time by the core target grabbing a bio prison cell before making the request to the policy. But for a demotion the core target doesn't know which block will be demoted, so can't do this in advance. Fix this demotion race by introducing a callback to the policy interface that allows the policy to grab the cell on behalf of the core target. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-policy-cleaner.c | 3 +- drivers/md/dm-cache-policy-internal.h | 5 ++- drivers/md/dm-cache-policy-mq.c | 41 +++++++++++++------ drivers/md/dm-cache-policy.h | 15 ++++++- drivers/md/dm-cache-target.c | 58 ++++++++++++++++++--------- 5 files changed, 85 insertions(+), 37 deletions(-) diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index b04d1f904d076..004e463c9423c 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -171,7 +171,8 @@ static void remove_cache_hash_entry(struct wb_cache_entry *e) /* Public interface (see dm-cache-policy.h */ static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { struct policy *p = to_policy(pe); struct wb_cache_entry *e; diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h index 2256a1f24f73e..c198e6defb9cd 100644 --- a/drivers/md/dm-cache-policy-internal.h +++ b/drivers/md/dm-cache-policy-internal.h @@ -16,9 +16,10 @@ */ static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { - return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result); + return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result); } static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 3ddd1162334df..515d44bf24d3d 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -693,9 +693,10 @@ static void requeue(struct mq_policy *mq, struct entry *e) * - set the hit count to a hard coded value other than 1, eg, is it better * if it goes in at level 2? */ -static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +static int demote_cblock(struct mq_policy *mq, + struct policy_locker *locker, dm_oblock_t *oblock) { - struct entry *demoted = pop(mq, &mq->cache_clean); + struct entry *demoted = peek(&mq->cache_clean); if (!demoted) /* @@ -707,6 +708,13 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) */ return -ENOSPC; + if (locker->fn(locker, demoted->oblock)) + /* + * We couldn't lock the demoted block. + */ + return -EBUSY; + + del(mq, demoted); *oblock = demoted->oblock; free_entry(&mq->cache_pool, demoted); @@ -795,6 +803,7 @@ static int cache_entry_found(struct mq_policy *mq, * finding which cache block to use. */ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, + struct policy_locker *locker, struct policy_result *result) { int r; @@ -803,11 +812,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, /* Ensure there's a free cblock in the cache */ if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - r = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, locker, &result->old_oblock); if (r) { result->op = POLICY_MISS; return 0; } + } else result->op = POLICY_NEW; @@ -829,7 +839,8 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { int r = 0; @@ -842,7 +853,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, else { requeue(mq, e); - r = pre_cache_to_cache(mq, e, result); + r = pre_cache_to_cache(mq, e, locker, result); } return r; @@ -872,6 +883,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, } static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, + struct policy_locker *locker, struct policy_result *result) { int r; @@ -879,7 +891,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - r = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, locker, &result->old_oblock); if (unlikely(r)) { result->op = POLICY_MISS; insert_in_pre_cache(mq, oblock); @@ -907,11 +919,12 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) { if (can_migrate) - insert_in_cache(mq, oblock, result); + insert_in_cache(mq, oblock, locker, result); else return -EWOULDBLOCK; } else { @@ -928,7 +941,8 @@ static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, */ static int map(struct mq_policy *mq, dm_oblock_t oblock, bool can_migrate, bool discarded_oblock, - int data_dir, struct policy_result *result) + int data_dir, struct policy_locker *locker, + struct policy_result *result) { int r = 0; struct entry *e = hash_lookup(mq, oblock); @@ -942,11 +956,11 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, else if (e) r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, - data_dir, result); + data_dir, locker, result); else r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, - data_dir, result); + data_dir, locker, result); if (r == -EWOULDBLOCK) result->op = POLICY_MISS; @@ -1012,7 +1026,8 @@ static void copy_tick(struct mq_policy *mq) static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result) + struct bio *bio, struct policy_locker *locker, + struct policy_result *result) { int r; struct mq_policy *mq = to_mq_policy(p); @@ -1028,7 +1043,7 @@ static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, iot_examine_bio(&mq->tracker, bio); r = map(mq, oblock, can_migrate, discarded_oblock, - bio_data_dir(bio), result); + bio_data_dir(bio), locker, result); mutex_unlock(&mq->lock); diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index f50fe360c5462..5524e21e48365 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -69,6 +69,18 @@ enum policy_operation { POLICY_REPLACE }; +/* + * When issuing a POLICY_REPLACE the policy needs to make a callback to + * lock the block being demoted. This doesn't need to occur during a + * writeback operation since the block remains in the cache. + */ +struct policy_locker; +typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock); + +struct policy_locker { + policy_lock_fn fn; +}; + /* * This is the instruction passed back to the core target. */ @@ -122,7 +134,8 @@ struct dm_cache_policy { */ int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock, bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_result *result); + struct bio *bio, struct policy_locker *locker, + struct policy_result *result); /* * Sometimes we want to see if a block is in the cache, without diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7755af3518676..e049becaaf2d2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1445,16 +1445,43 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) &cache->stats.read_miss : &cache->stats.write_miss); } +/*----------------------------------------------------------------*/ + +struct old_oblock_lock { + struct policy_locker locker; + struct cache *cache; + struct prealloc *structs; + struct dm_bio_prison_cell *cell; +}; + +static int null_locker(struct policy_locker *locker, dm_oblock_t b) +{ + /* This should never be called */ + BUG(); + return 0; +} + +static int cell_locker(struct policy_locker *locker, dm_oblock_t b) +{ + struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker); + struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs); + + return bio_detain(l->cache, b, NULL, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + l->structs, &l->cell); +} + static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { int r; bool release_cell = true; dm_oblock_t block = get_bio_block(cache, bio); - struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; struct policy_result lookup_result; bool passthrough = passthrough_mode(&cache->features); bool discarded_block, can_migrate; + struct old_oblock_lock ool; /* * Check to see if that block is currently migrating. @@ -1469,8 +1496,12 @@ static void process_bio(struct cache *cache, struct prealloc *structs, discarded_block = is_discarded_oblock(cache, block); can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + ool.locker.fn = cell_locker; + ool.cache = cache; + ool.structs = structs; + ool.cell = NULL; r = policy_map(cache->policy, block, true, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) /* migration has been denied */ @@ -1527,27 +1558,11 @@ static void process_bio(struct cache *cache, struct prealloc *structs, break; case POLICY_REPLACE: - cell_prealloc = prealloc_get_cell(structs); - r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, - (cell_free_fn) prealloc_put_cell, - structs, &old_ocell); - if (r > 0) { - /* - * We have to be careful to avoid lock inversion of - * the cells. So we back off, and wait for the - * old_ocell to become free. - */ - policy_force_mapping(cache->policy, block, - lookup_result.old_oblock); - atomic_inc(&cache->stats.cache_cell_clash); - break; - } atomic_inc(&cache->stats.demotion); atomic_inc(&cache->stats.promotion); - demote_then_promote(cache, structs, lookup_result.old_oblock, block, lookup_result.cblock, - old_ocell, new_ocell); + ool.cell, new_ocell); release_cell = false; break; @@ -2595,6 +2610,9 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso bool discarded_block; struct policy_result lookup_result; struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); + struct old_oblock_lock ool; + + ool.locker.fn = null_locker; if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* @@ -2633,7 +2651,7 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso discarded_block = is_discarded_oblock(cache, block); r = policy_map(cache->policy, block, false, can_migrate, discarded_block, - bio, &lookup_result); + bio, &ool.locker, &lookup_result); if (r == -EWOULDBLOCK) { cell_defer(cache, *cell, true); return DM_MAPIO_SUBMITTED; From 3198d36127dc9015017926b4d905cc62de962f90 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 5 Jun 2015 09:50:42 -0400 Subject: [PATCH 0252/2091] dm stats: fix divide by zero if 'number_of_areas' arg is zero commit dd4c1b7d0c95be1c9245118a3accc41a16f1db67 upstream. If the number_of_areas argument was zero the kernel would crash on div-by-zero. Add better input validation. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index f478a4c96d2f5..419bdd4fc8b8e 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -795,6 +795,8 @@ static int message_stats_create(struct mapped_device *md, return -EINVAL; if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) { + if (!divisor) + return -EINVAL; step = end - start; if (do_div(step, divisor)) step++; From 0cb6b5abbc5f4a4e237c9c42c4a5014217038891 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 17 Jun 2015 13:35:19 +0100 Subject: [PATCH 0253/2091] dm space map metadata: fix occasional leak of a metadata block on resize commit 6096d91af0b65a3967139b32d5adbb3647858a26 upstream. The metadata space map has a simplified 'bootstrap' mode that is operational when extending the space maps. Whilst in this mode it's possible for some refcount decrement operations to become queued (eg, as a result of shadowing one of the bitmap indexes). These decrements were not being applied when switching out of bootstrap mode. The effect of this bug was the leaking of a 4k metadata block. This is detected by the latest version of thin_check as a non fatal error. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- .../persistent-data/dm-space-map-metadata.c | 50 +++++++++++++------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e8a9042988871..53091295fce9b 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -204,6 +204,27 @@ static void in(struct sm_metadata *smm) smm->recursion_count++; } +static int apply_bops(struct sm_metadata *smm) +{ + int r = 0; + + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); + if (r) + break; + } + + return r; +} + static int out(struct sm_metadata *smm) { int r = 0; @@ -216,21 +237,8 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1) { - while (!brb_empty(&smm->uncommitted)) { - struct block_op bop; - - r = brb_pop(&smm->uncommitted, &bop); - if (r) { - DMERR("bug in bop ring buffer"); - break; - } - - r = commit_bop(smm, &bop); - if (r) - break; - } - } + if (smm->recursion_count == 1) + apply_bops(smm); smm->recursion_count--; @@ -704,6 +712,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) } old_len = smm->begin; + r = apply_bops(smm); + if (r) { + DMERR("%s: apply_bops failed", __func__); + goto out; + } + r = sm_ll_commit(&smm->ll); if (r) goto out; @@ -773,6 +787,12 @@ int dm_sm_metadata_create(struct dm_space_map *sm, if (r) return r; + r = apply_bops(smm); + if (r) { + DMERR("%s: apply_bops failed", __func__); + return r; + } + return sm_metadata_commit(sm); } From 3fa6173cb7abee0d4fe493bc71665aa12188bace Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Fri, 26 Jun 2015 15:25:48 +0100 Subject: [PATCH 0254/2091] dm btree remove: fix bug in redistribute3 commit 4c7e309340ff85072e96f529582d159002c36734 upstream. redistribute3() shares entries out across 3 nodes. Some entries were being moved the wrong way, breaking the ordering. This manifested as a BUG() in dm-btree-remove.c:shift() when entries were removed from the btree. For additional context see: https://www.redhat.com/archives/dm-devel/2015-May/msg00113.html Signed-off-by: Dennis Yang Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree-remove.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index b88757cd0d1d9..a03178e91a79a 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, if (s < 0 && nr_center < -s) { /* not enough in central node */ - shift(left, center, nr_center); - s = nr_center - target; + shift(left, center, -nr_center); + s += nr_center; shift(left, right, s); nr_right += s; } else @@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); - s = target - nr_center; + s -= nr_center; shift(left, right, s); nr_left -= s; } else From b2ce140833c6cc81068f2c9d2cbefd78be7550bf Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 3 Jul 2015 10:22:42 +0100 Subject: [PATCH 0255/2091] dm thin: allocate the cell_sort_array dynamically commit a822c83e47d97cdef38c4352e1ef62d9f46cfe98 upstream. Given the pool's cell_sort_array holds 8192 pointers it triggers an order 5 allocation via kmalloc. This order 5 allocation is prone to failure as system memory gets more fragmented over time. Fix this by allocating the cell_sort_array using vmalloc. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 921aafd12aee6..e22e6c892b8a0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -260,7 +261,7 @@ struct pool { process_mapping_fn process_prepared_mapping; process_mapping_fn process_prepared_discard; - struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; + struct dm_bio_prison_cell **cell_sort_array; }; static enum pool_mode get_pool_mode(struct pool *pool); @@ -2499,6 +2500,7 @@ static void __pool_destroy(struct pool *pool) { __pool_table_remove(pool); + vfree(pool->cell_sort_array); if (dm_pool_metadata_close(pool->pmd) < 0) DMWARN("%s: dm_pool_metadata_close() failed.", __func__); @@ -2611,6 +2613,13 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } + pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE); + if (!pool->cell_sort_array) { + *error = "Error allocating cell sort array"; + err_p = ERR_PTR(-ENOMEM); + goto bad_sort_array; + } + pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -2619,6 +2628,8 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; +bad_sort_array: + mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: From be22af9f28db4b669969b0ea231d5ecb4dc7b01d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 3 Jul 2015 14:51:32 +0100 Subject: [PATCH 0256/2091] dm btree: silence lockdep lock inversion in dm_btree_del() commit 1c7518794a3647eb345d59ee52844e8a40405198 upstream. Allocate memory using GFP_NOIO when deleting a btree. dm_btree_del() can be called via an ioctl and we don't want to recurse into the FS or block layer. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 200ac12a1d407..fdd3793e22f95 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) int r; struct del_stack *s; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc(sizeof(*s), GFP_NOIO); if (!s) return -ENOMEM; s->info = info; From 4f27844f84d6dd76fe84d25c5bece18295d870f6 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 16 Jul 2015 15:50:45 +0200 Subject: [PATCH 0257/2091] mmc: block: Add missing mmc_blk_put() in power_ro_lock_show() commit 9098f84cced870f54d8c410dd2444cfa61467fa0 upstream. Enclosing mmc_blk_put() is missing in power_ro_lock_show() sysfs handler, let's add it. Fixes: add710eaa886 ("mmc: boot partition ro lock support") Signed-off-by: Tomas Winkler Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/card/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 60f7141a6b02e..e4a52ce9242fe 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -208,6 +208,8 @@ static ssize_t power_ro_lock_show(struct device *dev, ret = snprintf(buf, PAGE_SIZE, "%d\n", locked); + mmc_blk_put(md); + return ret; } From 7c9da37f27a40fabe91dae40efb6117e97469326 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 5 May 2015 19:49:54 +0800 Subject: [PATCH 0258/2091] block: loop: convert to per-device workqueue commit f4aa4c7bbac6c4afdd4adccf90898c1a3685396d upstream. Documentation/workqueue.txt: If there is dependency among multiple work items used during memory reclaim, they should be queued to separate wq each with WQ_MEM_RECLAIM. Loop devices can be stacked, so we have to convert to per-device workqueue. One example is Fedora live CD. Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778 Cc: Justin M. Forbes Signed-off-by: Ming Lei Acked-by: Tejun Heo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 30 ++++++++++++++---------------- drivers/block/loop.h | 1 + 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d7173cb1ea76c..9786aa3f9cf27 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; -static struct workqueue_struct *loop_wq; - static int transfer_xor(struct loop_device *lo, int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, @@ -725,6 +723,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) goto out_putf; + error = -ENOMEM; + lo->wq = alloc_workqueue("kloopd%d", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0, + lo->lo_number); + if (!lo->wq) + goto out_putf; error = 0; @@ -872,6 +876,8 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; + destroy_workqueue(lo->wq); + lo->wq = NULL; mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -1425,9 +1431,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct loop_device *lo = cmd->rq->q->queuedata; blk_mq_start_request(bd->rq); + if (lo->lo_state != Lo_bound) + return -EIO; + if (cmd->rq->cmd_flags & REQ_WRITE) { struct loop_device *lo = cmd->rq->q->queuedata; bool need_sched = true; @@ -1441,9 +1451,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&lo->lo_lock); if (need_sched) - queue_work(loop_wq, &lo->write_work); + queue_work(lo->wq, &lo->write_work); } else { - queue_work(loop_wq, &cmd->read_work); + queue_work(lo->wq, &cmd->read_work); } return BLK_MQ_RQ_QUEUE_OK; @@ -1455,9 +1465,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd) struct loop_device *lo = cmd->rq->q->queuedata; int ret = -EIO; - if (lo->lo_state != Lo_bound) - goto failed; - if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) goto failed; @@ -1806,13 +1813,6 @@ static int __init loop_init(void) goto misc_out; } - loop_wq = alloc_workqueue("kloopd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0); - if (!loop_wq) { - err = -ENOMEM; - goto misc_out; - } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1850,8 +1850,6 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); - destroy_workqueue(loop_wq); - misc_deregister(&loop_misc); } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 301c27f8323ff..49564edf55816 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -54,6 +54,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; + struct workqueue_struct *wq; struct list_head write_cmd_head; struct work_struct write_work; bool write_started; From adc7a64b2882aba125431dd22e4e364b71ff37c6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 5 May 2015 19:49:55 +0800 Subject: [PATCH 0259/2091] block: loop: avoiding too many pending per work I/O commit 4d4e41aef9429872ea3b105e83426941f7185ab6 upstream. If there are too many pending per work I/O, too many high priority work thread can be generated so that system performance can be effected. This patch limits the max_active parameter of workqueue as 16. This patch fixes Fedora 22 live booting performance regression when it is booted from squashfs over dm based on loop, and looks the following reasons are related with the problem: - not like other filesyststems(such as ext4), squashfs is a bit special, and I observed that increasing I/O jobs to access file in squashfs only improve I/O performance a little, but it can make big difference for ext4 - nested loop: both squashfs.img and ext3fs.img are mounted as loop block, and ext3fs.img is inside the squashfs - during booting, lots of tasks may run concurrently Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778 Cc: Justin M. Forbes Signed-off-by: Ming Lei Acked-by: Tejun Heo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 9786aa3f9cf27..cef6fa83a2740 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -725,7 +725,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, goto out_putf; error = -ENOMEM; lo->wq = alloc_workqueue("kloopd%d", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0, + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16, lo->lo_number); if (!lo->wq) goto out_putf; From f7639d364c02eff91f433aca4e2d9ddaf37fb099 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 22 Jul 2015 07:57:12 -0400 Subject: [PATCH 0260/2091] block: Do a full clone when splitting discard bios commit f3f5da624e0a891c34d8cd513c57f1d9b0c7dadc upstream. This fixes a data corruption bug when using discard on top of MD linear, raid0 and raid10 personalities. Commit 20d0189b1012 "block: Introduce new bio_split()" permits sharing the bio_vec between the two resulting bios. That is fine for read/write requests where the bio_vec is immutable. For discards, however, we need to be able to attach a payload and update the bio_vec so the page can get mapped to a scatterlist entry. Therefore the bio_vec can not be shared when splitting discards and we must do a full clone. Signed-off-by: Martin K. Petersen Reported-by: Seunguk Shin Tested-by: Seunguk Shin Cc: Seunguk Shin Cc: Jens Axboe Cc: Kent Overstreet Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/block/bio.c b/block/bio.c index f66a4eae16ee4..4441522ca339e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1814,8 +1814,9 @@ EXPORT_SYMBOL(bio_endio_nodec); * Allocates and returns a new bio which represents @sectors from the start of * @bio, and updates @bio to represent the remaining sectors. * - * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's - * responsibility to ensure that @bio is not freed before the split. + * Unless this is a discard request the newly allocated bio will point + * to @bio's bi_io_vec; it is the caller's responsibility to ensure that + * @bio is not freed before the split. */ struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) @@ -1825,7 +1826,15 @@ struct bio *bio_split(struct bio *bio, int sectors, BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); - split = bio_clone_fast(bio, gfp, bs); + /* + * Discards need a mutable bio_vec to accommodate the payload + * required by the DSM TRIM and UNMAP commands. + */ + if (bio->bi_rw & REQ_DISCARD) + split = bio_clone_bioset(bio, gfp, bs); + else + split = bio_clone_fast(bio, gfp, bs); + if (!split) return NULL; From 85d5e517fe566cc3581d2220c321e9ee11d2022f Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 28 May 2015 11:55:16 +0200 Subject: [PATCH 0261/2091] ARM: at91/dt: sama5d4ek: mci0 uses slot 0 commit ea25525ce0d195724fead07fe6562fe478a3bf6f upstream. Mci0 uses slot 0 not 1. Signed-off-by: Ludovic Desroches Fixes: 7a4752677c44 ("ARM: at91: dt: add device tree file for SAMA5D4ek board") Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91-sama5d4ek.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts index 89ef4a540db58..45e7761b7a295 100644 --- a/arch/arm/boot/dts/at91-sama5d4ek.dts +++ b/arch/arm/boot/dts/at91-sama5d4ek.dts @@ -108,8 +108,8 @@ mmc0: mmc@f8000000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0_clk_cmd_dat0 &pinctrl_mmc0_dat1_3 &pinctrl_mmc0_cd>; - slot@1 { - reg = <1>; + slot@0 { + reg = <0>; bus-width = <4>; cd-gpios = <&pioE 5 0>; }; From e4b5f20b9d80d0d87af5d1393f6fdab98245dc41 Mon Sep 17 00:00:00 2001 From: "ludovic.desroches@atmel.com" Date: Mon, 8 Jun 2015 15:55:48 +0200 Subject: [PATCH 0262/2091] ARM: at91/dt: sama5d4: fix dma conf for aes, sha and tdes nodes commit aabbe8f1a561dd8318e693830d9ae377c9a04d2b upstream. The xdmac channel configuration is done in one cell not two. This error prevents from probing devices correctly. Signed-off-by: Ludovic Desroches Fixes: 83906783b766 ("ARM: at91/dt: sama5d4: add aes, sha and tdes nodes") Acked-by: Nicolas Ferre Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d4.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 6b1bb58f9c0b6..51ddec4569b64 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -1125,10 +1125,10 @@ compatible = "atmel,at91sam9g46-aes"; reg = <0xfc044000 0x100>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH 0>; - dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(41)>, - <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(40)>; + dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(41))>, + <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(40))>; dma-names = "tx", "rx"; clocks = <&aes_clk>; clock-names = "aes_clk"; @@ -1139,10 +1139,10 @@ compatible = "atmel,at91sam9g46-tdes"; reg = <0xfc04c000 0x100>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH 0>; - dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(42)>, - <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(43)>; + dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(42))>, + <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(43))>; dma-names = "tx", "rx"; clocks = <&tdes_clk>; clock-names = "tdes_clk"; @@ -1153,8 +1153,8 @@ compatible = "atmel,at91sam9g46-sha"; reg = <0xfc050000 0x100>; interrupts = <15 IRQ_TYPE_LEVEL_HIGH 0>; - dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(44)>; + dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(44))>; dma-names = "tx"; clocks = <&sha_clk>; clock-names = "sha_clk"; From 6e853d1593afa65bcddaa991b1b4fef508262317 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 11 May 2015 13:00:31 +0200 Subject: [PATCH 0263/2091] tty/serial: at91: RS485 mode: 0 is valid for delay_rts_after_send MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8687634b7908c42eb700e0469e110e02833611d1 upstream. In RS485 mode, we may want to set the delay_rts_after_send value to 0. In the datasheet, the 0 value is said to "disable" the Transmitter Timeguard but this is exactly the expected behavior if we want no delay... Moreover, if the value was set to non-zero value by device-tree or earlier ioctl command, it was impossible to change it back to zero. Reported-by: Sami Pietikäinen Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 27dade29646b7..5ca1dfb0561cd 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -315,8 +315,7 @@ static int atmel_config_rs485(struct uart_port *port, if (rs485conf->flags & SER_RS485_ENABLED) { dev_dbg(port->dev, "Setting UART to RS485\n"); atmel_port->tx_done_mask = ATMEL_US_TXEMPTY; - if ((rs485conf->delay_rts_after_send) > 0) - UART_PUT_TTGR(port, rs485conf->delay_rts_after_send); + UART_PUT_TTGR(port, rs485conf->delay_rts_after_send); mode |= ATMEL_US_USMODE_RS485; } else { dev_dbg(port->dev, "Setting UART to RS232\n"); @@ -354,8 +353,7 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl) /* override mode to RS485 if needed, otherwise keep the current mode */ if (port->rs485.flags & SER_RS485_ENABLED) { - if ((port->rs485.delay_rts_after_send) > 0) - UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); + UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); mode &= ~ATMEL_US_USMODE; mode |= ATMEL_US_USMODE_RS485; } @@ -2061,8 +2059,7 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, /* mode */ if (port->rs485.flags & SER_RS485_ENABLED) { - if ((port->rs485.delay_rts_after_send) > 0) - UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); + UART_PUT_TTGR(port, port->rs485.delay_rts_after_send); mode |= ATMEL_US_USMODE_RS485; } else if (termios->c_cflag & CRTSCTS) { /* RS232 with hardware handshake (RTS/CTS) */ From 70e421867746a693dd01b05531c4787e0747666e Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 17 Jun 2015 10:59:04 +0200 Subject: [PATCH 0264/2091] ARM: at91/dt: trivial: fix USB udc compatible string commit 50f0a44991516b5b9744ecb2c080c2ec6ad21b25 upstream. To please checkpatch and the tiresome reader, add the "atmel," prefix to the USB udc compatible string. Signed-off-by: Nicolas Ferre Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/atmel-usb.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt index e180d56c75dbe..de773a00e2d47 100644 --- a/Documentation/devicetree/bindings/usb/atmel-usb.txt +++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt @@ -60,9 +60,9 @@ Atmel High-Speed USB device controller Required properties: - compatible: Should be one of the following - "at91sam9rl-udc" - "at91sam9g45-udc" - "sama5d3-udc" + "atmel,at91sam9rl-udc" + "atmel,at91sam9g45-udc" + "atmel,sama5d3-udc" - reg: Address and length of the register set for the device - interrupts: Should contain usba interrupt - ep childnode: To specify the number of endpoints and their properties. From 8aef146bbff57b508e8adfb3541dbc464abe3676 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 17 Jun 2015 10:59:05 +0200 Subject: [PATCH 0265/2091] ARM: at91/dt: update udc compatible strings commit 6540165cf41655810ee67b78f01537af022a636a upstream. at91sam9g45, at91sam9x5 and sama5 SoCs should not use "atmel,at91sam9rl-udc" for their USB device compatible property since this compatible is attached to a specific hardware bug fix. Signed-off-by: Boris Brezillon Acked-by: Alexandre Belloni Tested-by: Bo Shen Acked-by: Nicolas Ferre Signed-off-by: Kevin Hilman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9g45.dtsi | 2 +- arch/arm/boot/dts/at91sam9x5.dtsi | 2 +- arch/arm/boot/dts/sama5d3.dtsi | 2 +- arch/arm/boot/dts/sama5d4.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 70e59c5ceb2f7..e54421176af8a 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -1148,7 +1148,7 @@ usb2: gadget@fff78000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00600000 0x80000 0xfff78000 0x400>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 3aa56ae3410a5..3314a73037546 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1062,7 +1062,7 @@ usb2: gadget@f803c000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 57ab8587f7b97..37e6182f14700 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1321,7 +1321,7 @@ usb0: gadget@00500000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00500000 0x100000 0xf8030000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 51ddec4569b64..a5f5f4090af6d 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -123,7 +123,7 @@ usb0: gadget@00400000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00400000 0x100000 0xfc02c000 0x4000>; interrupts = <47 IRQ_TYPE_LEVEL_HIGH 2>; From 95f9b41004efdfe910aaf91b5d79f32d532845b1 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 2 Apr 2015 18:50:32 +0100 Subject: [PATCH 0266/2091] bus: arm-ccn: Fix node->XP config conversion commit a18f8e97fe69195823d7fb5c68a8d6565f39db4b upstream. Events defined as watchpoints on nodes must have their config values converted so that they apply to the respective node's XP. The function setting new values was using wrong mask for the "port" field, resulting in corrupted value. Fixed now. Signed-off-by: Pawel Moll Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index aaa0f2a871185..60397ec77ff7b 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -212,7 +212,7 @@ static int arm_ccn_node_to_xp_port(int node) static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port) { - *config &= ~((0xff << 0) | (0xff << 8) | (0xff << 24)); + *config &= ~((0xff << 0) | (0xff << 8) | (0x3 << 24)); *config |= (node_xp << 0) | (type << 8) | (port << 24); } From 0f0b7b10c3545704cf0d1705e33f1cc5a9fe8097 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 23 Jun 2015 16:35:06 +0200 Subject: [PATCH 0267/2091] drm/vgem: Set unique to "vgem" commit fa2f97dd33c2c32a06a5ea7f6e87af06a2e26baa upstream. Since there's only one global instance ever we don't need to have anything fancy. Stops a WARNING in the get_unique ioctl that the unique name isn't set. Reportedy-and-tested-by: Fabio Coatti Cc: Fabio Coatti Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vgem/vgem_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 7a207ca547be2..6394547cf67a0 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -328,6 +328,8 @@ static int __init vgem_init(void) goto out; } + drm_dev_set_unique(vgem_device, "vgem"); + ret = drm_dev_register(vgem_device, 0); if (ret) From 5c507167d2ef467ecd2ec4ad40a0bd70b4fad814 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 15 Jun 2015 10:34:28 +1000 Subject: [PATCH 0268/2091] drm/dp/mst: close deadlock in connector destruction. commit 6b8eeca65b18ae77e175cc2b6571731f0ee413bf upstream. I've only seen this once, and I failed to capture the lockdep backtrace, but I did some investigations. If we are calling into the MST layer from EDID probing, we have the mode_config mutex held, if during that EDID probing, the MST hub goes away, then we can get a deadlock where the connector destruction function in the driver tries to retake the mode config mutex. This offloads connector destruction to a workqueue, and avoid the subsequenct lock ordering issue. Acked-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 40 +++++++++++++++++++++++++-- include/drm/drm_crtc.h | 2 ++ include/drm/drm_dp_mst_helper.h | 4 +++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 132581ca4ad84..c0069e8bb4bba 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -867,8 +867,16 @@ static void drm_dp_destroy_port(struct kref *kref) port->vcpi.num_slots = 0; kfree(port->cached_edid); - if (port->connector) - (*port->mgr->cbs->destroy_connector)(mgr, port->connector); + + /* we can't destroy the connector here, as + we might be holding the mode_config.mutex + from an EDID retrieval */ + if (port->connector) { + mutex_lock(&mgr->destroy_connector_lock); + list_add(&port->connector->destroy_list, &mgr->destroy_connector_list); + mutex_unlock(&mgr->destroy_connector_lock); + schedule_work(&mgr->destroy_connector_work); + } drm_dp_port_teardown_pdt(port, port->pdt); if (!port->input && port->vcpi.vcpi > 0) @@ -2632,6 +2640,30 @@ static void drm_dp_tx_work(struct work_struct *work) mutex_unlock(&mgr->qlock); } +static void drm_dp_destroy_connector_work(struct work_struct *work) +{ + struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); + struct drm_connector *connector; + + /* + * Not a regular list traverse as we have to drop the destroy + * connector lock before destroying the connector, to avoid AB->BA + * ordering between this lock and the config mutex. + */ + for (;;) { + mutex_lock(&mgr->destroy_connector_lock); + connector = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_connector, destroy_list); + if (!connector) { + mutex_unlock(&mgr->destroy_connector_lock); + break; + } + list_del(&connector->destroy_list); + mutex_unlock(&mgr->destroy_connector_lock); + + mgr->cbs->destroy_connector(mgr, connector); + } +} + /** * drm_dp_mst_topology_mgr_init - initialise a topology manager * @mgr: manager struct to initialise @@ -2651,10 +2683,13 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->lock); mutex_init(&mgr->qlock); mutex_init(&mgr->payload_lock); + mutex_init(&mgr->destroy_connector_lock); INIT_LIST_HEAD(&mgr->tx_msg_upq); INIT_LIST_HEAD(&mgr->tx_msg_downq); + INIT_LIST_HEAD(&mgr->destroy_connector_list); INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work); INIT_WORK(&mgr->tx_work, drm_dp_tx_work); + INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work); init_waitqueue_head(&mgr->tx_waitq); mgr->dev = dev; mgr->aux = aux; @@ -2679,6 +2714,7 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init); */ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr) { + flush_work(&mgr->destroy_connector_work); mutex_lock(&mgr->payload_lock); kfree(mgr->payloads); mgr->payloads = NULL; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index ca71c03143d1b..54233583c6cb9 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -731,6 +731,8 @@ struct drm_connector { uint8_t num_h_tile, num_v_tile; uint8_t tile_h_loc, tile_v_loc; uint16_t tile_h_size, tile_v_size; + + struct list_head destroy_list; }; /** diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index a2507817be419..86d0b25ed0547 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -463,6 +463,10 @@ struct drm_dp_mst_topology_mgr { struct work_struct work; struct work_struct tx_work; + + struct list_head destroy_connector_list; + struct mutex destroy_connector_lock; + struct work_struct destroy_connector_work; }; int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, struct device *dev, struct drm_dp_aux *aux, int max_dpcd_transaction_bytes, int max_payloads, int conn_base_id); From e60bd2d701a57d0cd9fe53de5e1426af03874c0d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 22 Jun 2015 14:40:44 +1000 Subject: [PATCH 0269/2091] drm/dp/mst: take lock around looking up the branch device on hpd irq commit 9eb1e57f564d4e6e10991402726cc83fe0b9172f upstream. If we are doing an MST transaction and we've gotten HPD and we lookup the device from the incoming msg, we should take the mgr lock around it, so that mst_primary and mstb->ports are valid. Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c0069e8bb4bba..9dce953316244 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1171,6 +1171,8 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ struct drm_dp_mst_port *port; int i; /* find the port by iterating down */ + + mutex_lock(&mgr->lock); mstb = mgr->mst_primary; for (i = 0; i < lct - 1; i++) { @@ -1190,6 +1192,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ } } kref_get(&mstb->kref); + mutex_unlock(&mgr->lock); return mstb; } From c8f84f037a61845643f4f86bf9ba49494a65e2c0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 22 Jun 2015 17:31:59 +1000 Subject: [PATCH 0270/2091] drm/dp/mst: make sure mst_primary mstb is valid in work function commit 9254ec496a1dbdddeab50021a8138dc627a8166a upstream. This validates the mst_primary under the lock, and then calls into the check and send function. This makes the code a lot easier to understand the locking rules in. Signed-off-by: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 9dce953316244..778bbb6425b80 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1200,7 +1200,7 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m struct drm_dp_mst_branch *mstb) { struct drm_dp_mst_port *port; - + struct drm_dp_mst_branch *mstb_child; if (!mstb->link_address_sent) { drm_dp_send_link_address(mgr, mstb); mstb->link_address_sent = true; @@ -1215,17 +1215,31 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m if (!port->available_pbn) drm_dp_send_enum_path_resources(mgr, mstb, port); - if (port->mstb) - drm_dp_check_and_send_link_address(mgr, port->mstb); + if (port->mstb) { + mstb_child = drm_dp_get_validated_mstb_ref(mgr, port->mstb); + if (mstb_child) { + drm_dp_check_and_send_link_address(mgr, mstb_child); + drm_dp_put_mst_branch_device(mstb_child); + } + } } } static void drm_dp_mst_link_probe_work(struct work_struct *work) { struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, work); + struct drm_dp_mst_branch *mstb; - drm_dp_check_and_send_link_address(mgr, mgr->mst_primary); - + mutex_lock(&mgr->lock); + mstb = mgr->mst_primary; + if (mstb) { + kref_get(&mstb->kref); + } + mutex_unlock(&mgr->lock); + if (mstb) { + drm_dp_check_and_send_link_address(mgr, mstb); + drm_dp_put_mst_branch_device(mstb); + } } static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, From 3950be0f2d0feef1a8e647af82af861cd020c81e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 11 Jun 2015 18:33:48 +0200 Subject: [PATCH 0271/2091] drm/tegra: dpaux: Fix transfers larger than 4 bytes commit 3c1dae0a07c651526f8e878d223a88f82caa5a50 upstream. The DPAUX read/write FIFO registers aren't sequential in the register space, causing transfers larger than 4 bytes to cause accesses to non- existing FIFO registers. Fixes: 6b6b604215c6 ("drm/tegra: Add eDP support") Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/tegra/dpaux.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index d6b55e3e3716c..a43a836e6f882 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -72,34 +72,32 @@ static inline void tegra_dpaux_writel(struct tegra_dpaux *dpaux, static void tegra_dpaux_write_fifo(struct tegra_dpaux *dpaux, const u8 *buffer, size_t size) { - unsigned long offset = DPAUX_DP_AUXDATA_WRITE(0); size_t i, j; - for (i = 0; i < size; i += 4) { - size_t num = min_t(size_t, size - i, 4); + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); unsigned long value = 0; for (j = 0; j < num; j++) - value |= buffer[i + j] << (j * 8); + value |= buffer[i * 4 + j] << (j * 8); - tegra_dpaux_writel(dpaux, value, offset++); + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXDATA_WRITE(i)); } } static void tegra_dpaux_read_fifo(struct tegra_dpaux *dpaux, u8 *buffer, size_t size) { - unsigned long offset = DPAUX_DP_AUXDATA_READ(0); size_t i, j; - for (i = 0; i < size; i += 4) { - size_t num = min_t(size_t, size - i, 4); + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); unsigned long value; - value = tegra_dpaux_readl(dpaux, offset++); + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXDATA_READ(i)); for (j = 0; j < num; j++) - buffer[i + j] = value >> (j * 8); + buffer[i * 4 + j] = value >> (j * 8); } } From 3d47c41f985273eaee48e8b1b72a41f5b126cd70 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 3 Jun 2015 12:09:09 +0100 Subject: [PATCH 0272/2091] drm/qxl: Do not cause spice-server to clean our objects commit 2fa19535ca6abcbfd1ccc9ef694db52f49f77747 upstream. If objects are moved back from system memory to VRAM (and spice id created again) memory is already initialized so we need to set flag to not clear memory. If you don't do it after a while using desktop many images turns to black or transparents. Signed-off-by: Frediano Ziglio Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 97823644d3474..f33251d67914c 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -505,6 +505,7 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_SURFACE_CMD_CREATE; + cmd->flags = QXL_SURF_FLAG_KEEP_DATA; cmd->u.surface_create.format = surf->surf.format; cmd->u.surface_create.width = surf->surf.width; cmd->u.surface_create.height = surf->surf.height; From e0cf83cc3de0341d8cabbb23097ad85f5ce97a11 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 3 Jun 2015 12:09:10 +0100 Subject: [PATCH 0273/2091] drm/qxl: Do not leak memory if qxl_release_list_add fails commit 8451cc964c1d193b989c41a44e5e77109cc696f8 upstream. If the function fails reference counter to the object is not decremented causing leaks. This is hard to spot as it happens only on very low memory situations. Signed-off-by: Frediano Ziglio Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index b110883f8253d..7354a4cda59d9 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -122,8 +122,10 @@ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev, qobj = gem_to_qxl_bo(gobj); ret = qxl_release_list_add(release, qobj); - if (ret) + if (ret) { + drm_gem_object_unreference_unlocked(gobj); return NULL; + } return qobj; } From db4d9159da3f595b964d986cb53d33fd24e5774c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 May 2015 18:32:17 +0200 Subject: [PATCH 0274/2091] drm/bridge: ptn3460: Include linux/gpio/consumer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dad3c3503462f59c6bec7edfa19dbde1857962c0 upstream. If GPIOLIB=n and asm-generic/gpio.h is not used: drivers/gpu/drm/bridge/ptn3460.c: In function ‘ptn3460_pre_enable’: drivers/gpu/drm/bridge/ptn3460.c:135: error: implicit declaration of function ‘gpiod_set_value’ drivers/gpu/drm/bridge/ptn3460.c: In function ‘ptn3460_probe’: drivers/gpu/drm/bridge/ptn3460.c:333: error: implicit declaration of function ‘devm_gpiod_get’ drivers/gpu/drm/bridge/ptn3460.c:333: warning: assignment makes pointer from integer without a cast drivers/gpu/drm/bridge/ptn3460.c:340: error: implicit declaration of function ‘gpiod_direction_output’ drivers/gpu/drm/bridge/ptn3460.c:346: warning: assignment makes pointer from integer without a cast Add the missing #include to fix this. Fixes: af478d8823 ("drm/bridge: ptn3460: use gpiod interface") Signed-off-by: Geert Uytterhoeven Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/ptn3460.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/ptn3460.c b/drivers/gpu/drm/bridge/ptn3460.c index 9d2f053382e18..63a09e4079f35 100644 --- a/drivers/gpu/drm/bridge/ptn3460.c +++ b/drivers/gpu/drm/bridge/ptn3460.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include From c074d39c847fa25c63bff1361ac0518ccd1f2fef Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 25 May 2015 13:29:44 +0300 Subject: [PATCH 0275/2091] drm/atomic: fix out of bounds read in for_each_*_in_state helpers commit 60f207a5b6d8f23c2e8388b415e8d5c7311cc79d upstream. for_each_*_in_state validate array index after access to array elements, thus perform out of bounds read. Fix this by validating index in the first place and read array element iff validation was successful. Fixes: df63b9994eaf ("drm/atomic: Add for_each_{connector,crtc,plane}_in_state helper macros") Signed-off-by: Andrey Ryabinin Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_atomic.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index c157103492b0d..3f13b910f8d2e 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -77,26 +77,26 @@ int __must_check drm_atomic_async_commit(struct drm_atomic_state *state); #define for_each_connector_in_state(state, connector, connector_state, __i) \ for ((__i) = 0; \ - (connector) = (state)->connectors[__i], \ - (connector_state) = (state)->connector_states[__i], \ - (__i) < (state)->num_connector; \ + (__i) < (state)->num_connector && \ + ((connector) = (state)->connectors[__i], \ + (connector_state) = (state)->connector_states[__i], 1); \ (__i)++) \ if (connector) #define for_each_crtc_in_state(state, crtc, crtc_state, __i) \ for ((__i) = 0; \ - (crtc) = (state)->crtcs[__i], \ - (crtc_state) = (state)->crtc_states[__i], \ - (__i) < (state)->dev->mode_config.num_crtc; \ + (__i) < (state)->dev->mode_config.num_crtc && \ + ((crtc) = (state)->crtcs[__i], \ + (crtc_state) = (state)->crtc_states[__i], 1); \ (__i)++) \ if (crtc_state) -#define for_each_plane_in_state(state, plane, plane_state, __i) \ - for ((__i) = 0; \ - (plane) = (state)->planes[__i], \ - (plane_state) = (state)->plane_states[__i], \ - (__i) < (state)->dev->mode_config.num_total_plane; \ - (__i)++) \ +#define for_each_plane_in_state(state, plane, plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (state)->dev->mode_config.num_total_plane && \ + ((plane) = (state)->planes[__i], \ + (plane_state) = (state)->plane_states[__i], 1); \ + (__i)++) \ if (plane_state) #endif /* DRM_ATOMIC_H_ */ From 32d12fc20e3c726ca858d0e5055fb596fce2f8bc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 15 May 2015 11:48:52 -0400 Subject: [PATCH 0276/2091] drm/radeon: take the mode_config mutex when dealing with hpds (v2) commit 39fa10f7e21574a70cecf1fed0f9b36535aa68a0 upstream. Since we are messing with state in the worker. v2: drop the changes in the mst worker Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_irq_kms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 7162c935371c6..f682e5351252e 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -79,10 +79,12 @@ static void radeon_hotplug_work_func(struct work_struct *work) struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + mutex_lock(&mode_config->mutex); if (mode_config->num_connector) { list_for_each_entry(connector, &mode_config->connector_list, head) radeon_connector_hotplug(connector); } + mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); } From 598f69d4fb04f188a819bf649c0b35dd4225dc8d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 May 2015 11:11:48 -0400 Subject: [PATCH 0277/2091] drm/radeon: clean up radeon_audio_enable commit 010621936103fcfc15375ccdc92c0f583923d489 upstream. - make it static - fix mask/bool handling for last param Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_audio.c | 18 +++++++++--------- drivers/gpu/drm/radeon/radeon_audio.h | 2 -- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 25191f126f3bb..c89215275053d 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -242,6 +242,13 @@ static struct radeon_audio_funcs dce6_dp_funcs = { .dpms = evergreen_dp_enable, }; +static void radeon_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, u8 enable_mask) +{ + if (rdev->audio.funcs->enable) + rdev->audio.funcs->enable(rdev, pin, enable_mask); +} + static void radeon_audio_interface_init(struct radeon_device *rdev) { if (ASIC_IS_DCE6(rdev)) { @@ -307,7 +314,7 @@ int radeon_audio_init(struct radeon_device *rdev) /* disable audio. it will be set up later */ for (i = 0; i < rdev->audio.num_pins; i++) - radeon_audio_enable(rdev, &rdev->audio.pin[i], false); + radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); return 0; } @@ -443,13 +450,6 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder) radeon_encoder->audio->select_pin(encoder); } -void radeon_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, u8 enable_mask) -{ - if (rdev->audio.funcs->enable) - rdev->audio.funcs->enable(rdev, pin, enable_mask); -} - void radeon_audio_detect(struct drm_connector *connector, enum drm_connector_status status) { @@ -502,7 +502,7 @@ void radeon_audio_fini(struct radeon_device *rdev) return; for (i = 0; i < rdev->audio.num_pins; i++) - radeon_audio_enable(rdev, &rdev->audio.pin[i], false); + radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); rdev->audio.enabled = false; } diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index c92d059ab204d..8438304f71395 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -74,8 +74,6 @@ u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev, void radeon_audio_endpoint_wreg(struct radeon_device *rdev, u32 offset, u32 reg, u32 v); struct r600_audio_pin *radeon_audio_get_pin(struct drm_encoder *encoder); -void radeon_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, u8 enable_mask); void radeon_audio_fini(struct radeon_device *rdev); void radeon_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); From 28362ad0c52531e85a296ba2c3623c220101c628 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 25 Jun 2015 12:59:38 +0100 Subject: [PATCH 0278/2091] drm/i915/ppgtt: Break loop in gen8_ppgtt_clear_range failure path commit 00245266b4be4fbe989ee073663f56716da6c1f3 upstream. If for some reason [1], the page directory/table does not exist, clear_range would end up in an infinite while loop. Introduced by commit 06fda602dbca ("drm/i915: Create page table allocators"). [1] This is already being addressed in one of Mika's patches: http://mid.gmane.org/1432314314-23530-17-git-send-email-mika.kuoppala@intel.com Cc: Mika Kuoppala Reported-by: John Harrison Signed-off-by: Michel Thierry Reviewed-by: Mika Kuoppala Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0239fbff7bf72..ad90fa3045e51 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -502,17 +502,17 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct page *page_table; if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) - continue; + break; pd = ppgtt->pdp.page_directory[pdpe]; if (WARN_ON(!pd->page_table[pde])) - continue; + break; pt = pd->page_table[pde]; if (WARN_ON(!pt->page)) - continue; + break; page_table = pt->page; From 6947522ca7e23f92dc5e29fa7d2a3e68df3d7d04 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 28 May 2015 11:07:11 -0700 Subject: [PATCH 0279/2091] drm/i915: Fix IPS related flicker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ac88cd738425e04dbed3706621cf613a00708834 upstream. We cannot let IPS enabled with no plane on the pipe: BSpec: "IPS cannot be enabled until after at least one plane has been enabled for at least one vertical blank." and "IPS must be disabled while there is still at least one plane enabled on the same pipe as IPS." This restriction apply to HSW and BDW. However a shortcut path on update primary plane function to make primary plane invisible by setting DSPCTRL to 0 was leting IPS enabled while there was no other plane enabled on the pipe causing flickerings that we were believing that it was caused by that other restriction where ips cannot be used when pixel rate is greater than 95% of cdclok. v2: Don't mess with Atomic path as pointed out by Ville. Reference: https://bugs.freedesktop.org/show_bug.cgi?id=85583 Cc: Ville Syrjälä Cc: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 13 +++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d0f3cbc87474c..57c887843dc3f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12499,6 +12499,16 @@ intel_check_primary_plane(struct drm_plane *plane, intel_crtc->atomic.wait_vblank = true; } + /* + * FIXME: Actually if we will still have any other plane enabled + * on the pipe we could let IPS enabled still, but for + * now lets consider that when we make primary invisible + * by setting DSPCNTR to 0 on update_primary_plane function + * IPS needs to be disable. + */ + if (!state->visible || !fb) + intel_crtc->atomic.disable_ips = true; + intel_crtc->atomic.fb_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); @@ -12590,6 +12600,9 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc) if (intel_crtc->atomic.disable_fbc) intel_fbc_disable(dev); + if (intel_crtc->atomic.disable_ips) + hsw_disable_ips(intel_crtc); + if (intel_crtc->atomic.pre_disable_primary) intel_pre_disable_primary(crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 897f17db08af4..68d1f74a74038 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -424,6 +424,7 @@ struct intel_crtc_atomic_commit { /* Sleepable operations to perform before commit */ bool wait_for_flips; bool disable_fbc; + bool disable_ips; bool pre_disable_primary; bool update_wm; unsigned disabled_planes; From fb07c9c85095d9520c4d4db69bef70442ec62a72 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 26 Jun 2015 14:18:56 +0300 Subject: [PATCH 0280/2091] drm/i915: fix backlight after resume on 855gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2059ac3b1304cb6a82f9d90762dea9f556831627 upstream. Some 855gm models (at least ThinkPad X40) regressed because of commit b0cd324faed23d10d66ba6ade66579c681feef6f Author: Jani Nikula Date: Wed Nov 12 16:25:43 2014 +0200 drm/i915: don't save/restore backlight hist ctl registers which tried to make our driver more robust by not blindly saving and restoring registers, but it failed to take into account commit 0eb96d6ed38430b72897adde58f5477a6b71757a Author: Jesse Barnes Date: Wed Oct 14 12:33:41 2009 -0700 drm/i915: save/restore BLC histogram control reg across suspend/resume Fix the regression by enabling hist ctl on gen2. v2: Improved the comment. v3: Improved the comment, again. Reported-and-tested-by: Philipp Gesang References: http://mid.gmane.org/20150623222648.GD12335@acheron Fixes: b0cd324faed2 ("drm/i915: don't save/restore backlight hist ctl registers") Cc: Ville Syrjälä Acked-by: Chris Wilson Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_panel.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 773d1d24e604c..a30db4b4050e4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3209,6 +3209,7 @@ enum skl_disp_power_wells { #define BLM_POLARITY_PNV (1 << 0) /* pnv only */ #define BLC_HIST_CTL (dev_priv->info.display_mmio_offset + 0x61260) +#define BLM_HISTOGRAM_ENABLE (1 << 31) /* New registers for PCH-split platforms. Safe where new bits show up, the * register layout machtes with gen4 BLC_PWM_CTL[12]. */ diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 08532d4ffe0ac..2bf92cba4a554 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -879,6 +879,14 @@ static void i9xx_enable_backlight(struct intel_connector *connector) /* XXX: combine this into above write? */ intel_panel_actually_set_backlight(connector, panel->backlight.level); + + /* + * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is + * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2 + * that has backlight. + */ + if (IS_GEN2(dev)) + I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } static void i965_enable_backlight(struct intel_connector *connector) From 0f2bb042f21bdb28f20efcf1ff1c507e2f8b3caa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 29 Jun 2015 14:01:19 +0100 Subject: [PATCH 0281/2091] drm/i915: Declare the swizzling unknown for L-shaped configurations commit 19ee835cdb0b5a8eb11a68f25a51b8039d564488 upstream. The old style of memory interleaving swizzled upto the end of the first even bank of memory, and then used the remainder as unswizzled on the unpaired bank - i.e. swizzling is not constant for all memory. This causes problems when we try to migrate memory and so the kernel prevents migration at all when we detect L-shaped inconsistent swizzling. However, this issue also extends to userspace who try to manually detile into memory as the swizzling for an individual page is unknown (it depends on its physical address only known to the kernel), userspace cannot correctly swizzle objects. v2: Mark the global swizzling as unknown rather than adjust the value reported to userspace. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91105 Signed-off-by: Chris Wilson Cc: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_tiling.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6377b22269ad1..215e047afc6e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -183,8 +183,18 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) if (IS_GEN4(dev)) { uint32_t ddc2 = I915_READ(DCC2); - if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) + if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) { + /* Since the swizzling may vary within an + * object, we have no idea what the swizzling + * is for any page in particular. Thus we + * cannot migrate tiled pages using the GPU, + * nor can we tell userspace what the exact + * swizzling is for any object. + */ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } } if (dcc == 0xffffffff) { From e639564b6050d71ca2b4e575685b6f7d3642d18a Mon Sep 17 00:00:00 2001 From: Tomas Elf Date: Thu, 9 Jul 2015 15:30:57 +0100 Subject: [PATCH 0282/2091] drm/i915: Snapshot seqno of most recently submitted request. commit 94f7bbe1509731bdef651d7fb235b2c31fb23be8 upstream. The hang checker needs to inspect whether or not the ring request list is empty as well as if the given engine has reached or passed the most recently submitted request. The problem with this is that the hang checker cannot grab the struct_mutex, which is required in order to safely inspect requests since requests might be deallocated during inspection. In the past we've had kernel panics due to this very unsynchronized access in the hang checker. One solution to this problem is to not inspect the requests directly since we're only interested in the seqno of the most recently submitted request - not the request itself. Instead the seqno of the most recently submitted request is stored separately, which the hang checker then inspects, circumventing the issue of synchronization from the hang checker entirely. This fixes a regression introduced in commit 44cdd6d219bc64f6810b8ed0023a4d4db9e0fe68 Author: John Harrison Date: Mon Nov 24 18:49:40 2014 +0000 drm/i915: Convert 'ring_idle()' to use requests not seqnos v2 (Chris Wilson): - Pass current engine seqno to ring_idle() from i915_hangcheck_elapsed() rather than compute it over again. - Remove extra whitespace. Issue: VIZ-5998 Signed-off-by: Tomas Elf Reviewed-by: Chris Wilson [danvet: Add regressing commit citation provided by Chris.] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_irq.c | 13 +++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2d0995e7afc37..596bce56e3792 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2401,6 +2401,7 @@ int __i915_add_request(struct intel_engine_cs *ring, } request->emitted_jiffies = jiffies; + ring->last_submitted_seqno = request->seqno; list_add_tail(&request->list, &ring->request_list); request->file_priv = NULL; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6d494432b19f6..b0df8d10482ae 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2650,18 +2650,11 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static struct drm_i915_gem_request * -ring_last_request(struct intel_engine_cs *ring) -{ - return list_entry(ring->request_list.prev, - struct drm_i915_gem_request, list); -} - static bool -ring_idle(struct intel_engine_cs *ring) +ring_idle(struct intel_engine_cs *ring, u32 seqno) { return (list_empty(&ring->request_list) || - i915_gem_request_completed(ring_last_request(ring), false)); + i915_seqno_passed(seqno, ring->last_submitted_seqno)); } static bool @@ -2883,7 +2876,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) acthd = intel_ring_get_active_head(ring); if (ring->hangcheck.seqno == seqno) { - if (ring_idle(ring)) { + if (ring_idle(ring, seqno)) { ring->hangcheck.action = HANGCHECK_IDLE; if (waitqueue_active(&ring->irq_queue)) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe05ad6fd..94514d364d25f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -266,6 +266,13 @@ struct intel_engine_cs { * Do we have some not yet emitted requests outstanding? */ struct drm_i915_gem_request *outstanding_lazy_request; + /** + * Seqno of request most recently submitted to request_list. + * Used exclusively by hang checker to avoid grabbing lock while + * inspecting request list. + */ + u32 last_submitted_seqno; + bool gpu_caches_dirty; wait_queue_head_t irq_queue; From 7bd3a9a9f4c231c8e4c7233f2e0cd83db12726dc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 13 Jul 2015 16:51:39 +0100 Subject: [PATCH 0283/2091] drm/i915: Forward all core DRM ioctls to core compat handling commit ac7e7ab1c3243b10b41653cc8d8536088d83b152 upstream. Previously only core DRM ioctls under the DRM_COMMAND_BASE were being forwarded, but the drm.h header suggests (and reality confirms) ones after (and including) DRM_COMMAND_END should be forwarded as well. We need this to correctly forward the compat ioctl for the botched-up addfb2.1 extension. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter [danvet: Explain why this is suddenly needed and add cc: stable.] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_ioc32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c index 176de6322e4d0..23aa04cded6b0 100644 --- a/drivers/gpu/drm/i915/i915_ioc32.c +++ b/drivers/gpu/drm/i915/i915_ioc32.c @@ -204,7 +204,7 @@ long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) drm_ioctl_compat_t *fn = NULL; int ret; - if (nr < DRM_COMMAND_BASE) + if (nr < DRM_COMMAND_BASE || nr >= DRM_COMMAND_END) return drm_compat_ioctl(filp, cmd, arg); if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(i915_compat_ioctls)) From 6fff1aa7fc51334cb71a358f3f536fe9d5d6c994 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 14 Jul 2015 12:29:27 +0200 Subject: [PATCH 0284/2091] Revert "drm/i915: Declare the swizzling unknown for L-shaped configurations" commit d82c0ba6e306f079407f07003e53c262d683397b upstream. This reverts commit 19ee835cdb0b5a8eb11a68f25a51b8039d564488. It breaks existing old userspace which doesn't handle UNKNOWN swizzling correct. Yes UNKNOWN was a thing back in 2009 and probably still is on some other platforms, but it still pretty clearly broke the testers machine. If we want this we need to extend the ioctl with new paramters that only new userspace looks at. Cc: Harald Arnesen Cc: Chris Wilson Reported-by: Harald Arnesen Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_tiling.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 215e047afc6e7..6377b22269ad1 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -183,18 +183,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) if (IS_GEN4(dev)) { uint32_t ddc2 = I915_READ(DCC2); - if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) { - /* Since the swizzling may vary within an - * object, we have no idea what the swizzling - * is for any page in particular. Thus we - * cannot migrate tiled pages using the GPU, - * nor can we tell userspace what the exact - * swizzling is for any object. - */ + if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - } } if (dcc == 0xffffffff) { From 1f977d7e942519127aea0a08e9d08437b363cf19 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jul 2015 12:37:56 +0100 Subject: [PATCH 0285/2091] drm/i915: Use two 32bit reads for select 64bit REG_READ ioctls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 648a9bc5308d952f2c80772301b339f73026f013 upstream. Since the hardware sometimes mysteriously totally flummoxes the 64bit read of a 64bit register when read using a single instruction, split the read into two instructions. Since the read here is of automatically incrementing timestamp counters, we also have to be very careful in order to make sure that it does not increment between the two instructions. However, since userspace tried to workaround this issue and so enshrined this ABI for a broken hardware read and in the process neglected that the read only fails in some environments, we have to introduce a new uABI flag for userspace to request the 2x32 bit accurate read of the timestamp. v2: Fix alignment check and include details of the workaround for userspace. Reported-by: Karol Herbst Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317 Testcase: igt/gem_reg_read Signed-off-by: Chris Wilson Cc: Michał Winiarski Tested-by: Michał Winiarski Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_uncore.c | 26 +++++++++++++++++++------- include/uapi/drm/i915_drm.h | 8 ++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ff2a74651dd48..a18807ec8371b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1220,10 +1220,12 @@ int i915_reg_read_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_reg_read *reg = data; struct register_whitelist const *entry = whitelist; + unsigned size; + u64 offset; int i, ret = 0; for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { - if (entry->offset == reg->offset && + if (entry->offset == (reg->offset & -entry->size) && (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask)) break; } @@ -1231,23 +1233,33 @@ int i915_reg_read_ioctl(struct drm_device *dev, if (i == ARRAY_SIZE(whitelist)) return -EINVAL; + /* We use the low bits to encode extra flags as the register should + * be naturally aligned (and those that are not so aligned merely + * limit the available flags for that register). + */ + offset = entry->offset; + size = entry->size; + size |= reg->offset ^ offset; + intel_runtime_pm_get(dev_priv); - switch (entry->size) { + switch (size) { + case 8 | 1: + reg->val = I915_READ64_2x32(offset, offset+4); + break; case 8: - reg->val = I915_READ64(reg->offset); + reg->val = I915_READ64(offset); break; case 4: - reg->val = I915_READ(reg->offset); + reg->val = I915_READ(offset); break; case 2: - reg->val = I915_READ16(reg->offset); + reg->val = I915_READ16(offset); break; case 1: - reg->val = I915_READ8(reg->offset); + reg->val = I915_READ8(offset); break; default: - MISSING_CASE(entry->size); ret = -EINVAL; goto out; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b6737f5df6..a7e41fb6ed548 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1065,6 +1065,14 @@ struct drm_i915_reg_read { __u64 offset; __u64 val; /* Return value */ }; +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that use + * offset (0x2538 | 1) instead. + * + */ struct drm_i915_reset_stats { __u32 ctx_id; From e25083389dcf06d65d56a64c1eca4c2a79e53556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 19 Jun 2015 10:32:15 -0400 Subject: [PATCH 0286/2091] drm/radeon: compute ring fix hibernation (CI GPU family) v2. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 161569deaa03cf3c00ed63352006193f250b0648 upstream. In order for hibernation to reliably work we need to cleanup more thoroughly the compute ring. Hibernation is different from suspend resume as when we resume from hibernation the hardware is first fully initialize by regular kernel then freeze callback happens (which correspond to a suspend inside the radeon kernel driver) and turn off each of the block. It turns out we were not cleanly shutting down the compute ring. This patch fix that. Hibernation and suspend to ram were tested (several times) on : Bonaire Hawaii Mullins Kaveri Kabini Changed since v1: - Factor the ring stop logic into a function taking ring as arg. Signed-off-by: Jérôme Glisse Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/cik.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ba50f3c1c2e03..8de5081f077c8 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4579,6 +4579,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev, WDOORBELL32(ring->doorbell_index, ring->wptr); } +static void cik_compute_stop(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + u32 j, tmp; + + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); + /* Disable wptr polling. */ + tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); + tmp &= ~WPTR_POLL_EN; + WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); + /* Disable HQD. */ + if (RREG32(CP_HQD_ACTIVE) & 1) { + WREG32(CP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < rdev->usec_timeout; j++) { + if (!(RREG32(CP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32(CP_HQD_DEQUEUE_REQUEST, 0); + WREG32(CP_HQD_PQ_RPTR, 0); + WREG32(CP_HQD_PQ_WPTR, 0); + } + cik_srbm_select(rdev, 0, 0, 0, 0); +} + /** * cik_cp_compute_enable - enable/disable the compute CP MEs * @@ -4592,6 +4617,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) if (enable) WREG32(CP_MEC_CNTL, 0); else { + /* + * To make hibernation reliable we need to clear compute ring + * configuration before halting the compute ring. + */ + mutex_lock(&rdev->srbm_mutex); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + mutex_unlock(&rdev->srbm_mutex); + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; From 7b49262b642511a16699cc63cf2a716739f0c43f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 19 Jun 2015 10:32:16 -0400 Subject: [PATCH 0287/2091] drm/radeon: SDMA fix hibernation (CI GPU family). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2ba8d1bb8f6b589037f7db1f01144fc80750e8f7 upstream. In order for hibernation to reliably work we need to properly turn off the SDMA block, sadly after numerous attemps i haven't not found proper sequence for clean and full shutdown. So simply reset both SDMA block, this makes hibernation works reliably on sea island GPU family (CI) Hibernation and suspend to ram were tested (several times) on : Bonaire Hawaii Mullins Kaveri Kabini Signed-off-by: Jérôme Glisse Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/cik_sdma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index f86eb54e7763d..d16f2eebd95e6 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) } rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; + + /* FIXME use something else than big hammer but after few days can not + * seem to find good combination so reset SDMA blocks as it seems we + * do not shut them down properly. This fix hibernation and does not + * affect suspend to ram. + */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + (void)RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + (void)RREG32(SRBM_SOFT_RESET); } /** From d1a4362d41e4feb52df6464f70fb64f21b894623 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 29 Jun 2015 11:09:11 -0400 Subject: [PATCH 0288/2091] Revert "drm/radeon: dont switch vt on suspend" commit ac9134906b3f5c2b45dc80dab0fee792bd516d52 upstream. This reverts commit b9729b17a414f99c61f4db9ac9f9ed987fa0cbfe. This seems to break the cursor on resume for lots of systems. Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index aeb676708e60c..634793ea84188 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { From 8b941a43ea7709111d3cbea2bdfcc678975255da Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Jun 2015 09:30:01 -0400 Subject: [PATCH 0289/2091] drm/radeon: only check the sink type on DP connectors commit 479e9a95120aaae0bf0d3e0b5b26b36ac4a347b6 upstream. Avoids a crash on pre-DP asics that support HDMI. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_audio.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index c89215275053d..fa719c53449bc 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector, dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - struct radeon_connector *radeon_connector; - int sink_type; - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { radeon_encoder->audio = NULL; return; } - radeon_connector = to_radeon_connector(connector); - sink_type = radeon_dp_getsinktype(radeon_connector); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) - radeon_encoder->audio = rdev->audio.dp_funcs; - else + if (radeon_dp_getsinktype(radeon_connector) == + CONNECTOR_OBJECT_ID_DISPLAYPORT) + radeon_encoder->audio = rdev->audio.dp_funcs; + else + radeon_encoder->audio = rdev->audio.hdmi_funcs; + } else { radeon_encoder->audio = rdev->audio.hdmi_funcs; + } dig->afmt->pin = radeon_audio_get_pin(connector->encoder); radeon_audio_enable(rdev, dig->afmt->pin, 0xf); From 56308551a9569ea10791092a99c9f6b5b2d4ddf8 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:11 +0200 Subject: [PATCH 0290/2091] drm/radeon: fix HDP flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 54e03986133468e02cb01b76215e4d53a9cf6380 upstream. This was regressed by commit 39e7f6f8, although I don't know of any actual issues caused by it. The storage domain is read without TTM locking now, but the lock never helped to prevent any races. Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c1310b9531..e370cb4fd4676 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -471,6 +471,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); From 7c4cbc81127b31fa179578e8b17b422efc16d966 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:06 +0200 Subject: [PATCH 0291/2091] drm/radeon: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 07f18f0bb8d8d65badd8b4988b40d329fc0cc6dc upstream. Trying to resolve issues with missed vblanks and impossible values inside delivered kms pageflip completion events showed that radeon's irq handling sometimes doesn't handle valid irqs, but silently skips them. This was observed for vblank interrupts. Although those irqs have corresponding events queued in the gpu's irq ring at time of interrupt, and therefore the corresponding handling code gets triggered by these events, the handling code sometimes silently skipped processing the irq. The reason for those skips is that the handling code double-checks for each irq event if the corresponding irq status bits in the irq status registers are set. Sometimes those bits are not set at time of check for valid irqs, maybe due to some hardware race on some setups? The problem only seems to happen on some machine + card combos sometimes, e.g., never happened during my testing of different PC cards of the DCE-2/3/4 generation a year ago, but happens consistently now on two different Apple Mac cards (RV730, DCE-3, Apple iMac and Evergreen JUNIPER, DCE-4 in a Apple MacPro). It also doesn't happen at each interrupt but only occassionally every couple of hundred or thousand vblank interrupts. This results in XOrg warning messages like "[ 7084.472] (WW) RADEON(0): radeon_dri2_flip_event_handler: Pageflip completion event has impossible msc 420120 < target_msc 420121" as well as skipped frames and problems for applications that use kms pageflip events or vblank events, e.g., users of DRI2 and DRI3/Present, Waylands Weston compositor, etc. See also https://bugs.freedesktop.org/show_bug.cgi?id=85203 After some talking to Alex and Michel, we decided to fix this by turning the double-check for asserted irq status bits into a warning. Whenever a irq event is queued in the IH ring, always execute the corresponding interrupt handler. Still check the irq status bits, but only to log a DRM_DEBUG message on a mismatch. This fixed the problems reliably on both previously failing cards, RV-730 dual-head tested on both crtcs (pipes D1 and D2) and a triple-output Juniper HD-5770 card tested on all three available crtcs (D1/D2/D3). The r600 and evergreen irq handling is therefore tested, but the cik an si handling is only compile tested due to lack of hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/cik.c | 336 ++++++++++++++----------- drivers/gpu/drm/radeon/evergreen.c | 392 ++++++++++++++++------------- drivers/gpu/drm/radeon/r600.c | 155 +++++++----- drivers/gpu/drm/radeon/si.c | 336 ++++++++++++++----------- 4 files changed, 688 insertions(+), 531 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8de5081f077c8..8456653624750 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7939,23 +7939,27 @@ int cik_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7965,23 +7969,27 @@ int cik_irq_process(struct radeon_device *rdev) case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7991,23 +7999,27 @@ int cik_irq_process(struct radeon_device *rdev) case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8017,23 +8029,27 @@ int cik_irq_process(struct radeon_device *rdev) case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8043,23 +8059,27 @@ int cik_irq_process(struct radeon_device *rdev) case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8069,23 +8089,27 @@ int cik_irq_process(struct radeon_device *rdev) case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8105,88 +8129,112 @@ int cik_irq_process(struct radeon_device *rdev) case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f848acfd3fc8a..feef136cdb555 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4855,7 +4855,7 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE; rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -4873,23 +4873,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4899,23 +4903,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4925,23 +4933,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4951,23 +4963,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4977,23 +4993,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5003,23 +5023,27 @@ int evergreen_irq_process(struct radeon_device *rdev) case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5039,88 +5063,100 @@ int evergreen_irq_process(struct radeon_device *rdev) case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5130,46 +5166,52 @@ int evergreen_irq_process(struct radeon_device *rdev) case 44: /* hdmi */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI2\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI3\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI4\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI5\n"); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 8f6d862a18822..21e479fefcab0 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4039,23 +4039,27 @@ int r600_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4065,23 +4069,27 @@ int r600_irq_process(struct radeon_device *rdev) case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4101,46 +4109,53 @@ int r600_irq_process(struct radeon_device *rdev) case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 4: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 5: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 10: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 12: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4150,18 +4165,22 @@ int r600_irq_process(struct radeon_device *rdev) case 21: /* hdmi */ switch (src_data) { case 4: - if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); + break; case 5: - if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4c679b802bc85..e15185b165049 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6466,23 +6466,27 @@ int si_irq_process(struct radeon_device *rdev) case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6492,23 +6496,27 @@ int si_irq_process(struct radeon_device *rdev) case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6518,23 +6526,27 @@ int si_irq_process(struct radeon_device *rdev) case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6544,23 +6556,27 @@ int si_irq_process(struct radeon_device *rdev) case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6570,23 +6586,27 @@ int si_irq_process(struct radeon_device *rdev) case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6596,23 +6616,27 @@ int si_irq_process(struct radeon_device *rdev) case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6632,88 +6656,112 @@ int si_irq_process(struct radeon_device *rdev) case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); From 5161879c7e43ba40edbe2991ec31bfa6f7e2fe30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 7 Jul 2015 16:27:28 +0900 Subject: [PATCH 0292/2091] drm/radeon: Clean up reference counting and pinning of the cursor BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cd404af0c930104462aa91344f07d002cf8248ed upstream. Take a GEM reference for and pin the new cursor BO, unpin and drop the GEM reference for the old cursor BO in radeon_crtc_cursor_set2, and use radeon_crtc->cursor_addr in radeon_set_cursor. This fixes radeon_cursor_reset accidentally incrementing the cursor BO pin count, and cleans up the code a little. Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_cursor.c | 84 ++++++++++++-------------- drivers/gpu/drm/radeon/radeon_mode.h | 1 - 2 files changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 45e54060ee97e..fa661744a1f57 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -205,8 +205,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) | (x << 16) | y)); /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + - (yorigin * 256))); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr + + yorigin * 256); } radeon_crtc->cursor_x = x; @@ -227,51 +228,32 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj) +static void radeon_set_cursor(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; - struct radeon_bo *robj = gem_to_radeon_bo(obj); - uint64_t gpu_addr; - int ret; - - ret = radeon_bo_reserve(robj, false); - if (unlikely(ret != 0)) - goto fail; - /* Only 27 bit offset for legacy cursor */ - ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, - ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &gpu_addr); - radeon_bo_unreserve(robj); - if (ret) - goto fail; if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(gpu_addr)); + upper_32_bits(radeon_crtc->cursor_addr)); WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else if (ASIC_IS_AVIVO(rdev)) { if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); } WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else { - radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); } - - return 0; - -fail: - drm_gem_object_unreference_unlocked(obj); - - return ret; } int radeon_crtc_cursor_set2(struct drm_crtc *crtc, @@ -283,7 +265,9 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_device *rdev = crtc->dev->dev_private; struct drm_gem_object *obj; + struct radeon_bo *robj; int ret; if (!handle) { @@ -305,6 +289,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return -ENOENT; } + robj = gem_to_radeon_bo(obj); + ret = radeon_bo_reserve(robj, false); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + return ret; + } + /* Only 27 bit offset for legacy cursor */ + ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, + &radeon_crtc->cursor_addr); + radeon_bo_unreserve(robj); + if (ret) { + DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); + drm_gem_object_unreference_unlocked(obj); + return ret; + } + radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; @@ -323,13 +324,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - ret = radeon_set_cursor(crtc, obj); - - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n", - ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -341,8 +337,7 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_bo_unpin(robj); radeon_bo_unreserve(robj); } - if (radeon_crtc->cursor_bo != obj) - drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); + drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); } radeon_crtc->cursor_bo = obj; @@ -360,7 +355,6 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, void radeon_cursor_reset(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - int ret; if (radeon_crtc->cursor_bo) { radeon_lock_cursor(crtc, true); @@ -368,12 +362,8 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo); - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not showing " - "cursor\n", ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index fa91a17b81b69..f01c797b78cf0 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -343,7 +343,6 @@ struct radeon_crtc { int max_cursor_width; int max_cursor_height; uint32_t legacy_display_base_addr; - uint32_t legacy_cursor_offset; enum radeon_rmx_type rmx_type; u8 h_border; u8 v_border; From 46b334754f07e698c52ffbbbe33d58f303cab8e9 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 7 Jul 2015 16:27:29 +0900 Subject: [PATCH 0293/2091] drm/radeon: unpin cursor BOs on suspend and pin them again on resume (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f3cbb17bcf676a2fc6aedebe9fbebd59e550c51a upstream. Everything is evicted from VRAM before suspend, so we need to make sure all BOs are unpinned and re-pinned after resume. Fixes broken mouse cursor after resume introduced by commit b9729b17. [Michel Dänzer: Add pinning BOs on resume] v2: [Alex Deucher: merge cursor unpin into fb unpin loop] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100541 Reviewed-by: Christian König (v1) Signed-off-by: Grigori Goronzy Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_device.c | 34 +++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a7fdfa4f0857b..604c44d88e7a4 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1572,11 +1572,21 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } - /* unpin the front buffers */ + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + if (rfb == NULL || rfb->obj == NULL) { continue; } @@ -1639,6 +1649,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + struct drm_crtc *crtc; int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) @@ -1678,6 +1689,27 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) radeon_restore_bios_scratch_regs(rdev); + /* pin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + /* Only 27 bit offset for legacy cursor */ + r = radeon_bo_pin_restricted(robj, + RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? + 0 : 1 << 27, + &radeon_crtc->cursor_addr); + if (r != 0) + DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + radeon_bo_unreserve(robj); + } + } + } + /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); From f72642c67038dc91715dc96b29a05c100a4f26ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 3 Jul 2015 10:02:27 +0900 Subject: [PATCH 0294/2091] drm/radeon: Don't flush the GART TLB if rdev->gart.ptr == NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 233709d2cd6bbaaeda0aeb8d11f6ca7f98563b39 upstream. This can be the case when the GPU is powered off, e.g. via vgaswitcheroo or runpm. When the GPU is powered up again, radeon_gart_table_vram_pin flushes the TLB after setting rdev->gart.ptr to non-NULL. Fixes panic on powering off R7xx GPUs. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61529 Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_gart.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 5450fa95a47ef..c4777c8d0312a 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -260,8 +260,10 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, } } } - mb(); - radeon_gart_tlb_flush(rdev); + if (rdev->gart.ptr) { + mb(); + radeon_gart_tlb_flush(rdev); + } } /** @@ -306,8 +308,10 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, page_base += RADEON_GPU_PAGE_SIZE; } } - mb(); - radeon_gart_tlb_flush(rdev); + if (rdev->gart.ptr) { + mb(); + radeon_gart_tlb_flush(rdev); + } return 0; } From ea039f927524e36c15b5905b4c9469d788591932 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 9 Jul 2015 21:08:17 -0400 Subject: [PATCH 0295/2091] drm/radeon: add a dpm quirk for Sapphire Radeon R9 270X 2GB GDDR5 commit 5dfc71bc44d91d1620505c064fa22b0b3db58a9d upstream. bug: https://bugs.freedesktop.org/show_bug.cgi?id=76490 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index ff8b83f5e929a..9dfcedec05a69 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2925,6 +2925,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { 0, 0, 0, 0 }, }; From d019429c10adf02d4b107ce815ff3c4aa6c3459a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 14 Jul 2015 15:58:30 +0200 Subject: [PATCH 0296/2091] drm/radeon: fix user ptr race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 12f1384da650bdb835fff63e66fe815ea882fc0e upstream. Port of amdgpu patch 9298e52f8b51d1e4acd68f502832f3a97f8cf892. Signed-off-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_gem.c | 1 + drivers/gpu/drm/radeon/radeon_object.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e370cb4fd4676..186d0b792a02c 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -36,6 +36,7 @@ void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { if (robj->gem_base.import_attach) drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); + radeon_mn_unregister(robj); radeon_bo_unref(&robj); } } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 318165d4855c4..676362769b8db 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -75,7 +75,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); - radeon_mn_unregister(bo); mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); From 8a235410e43ae851e6657d00d3605efc7ff77f76 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 16 Jul 2015 10:17:09 -0400 Subject: [PATCH 0297/2091] drm/radeon/ci: silence a harmless PCC warning commit bda5e3e97ffe80c5a793383df5681d3581d46ac8 upstream. This has been a source of confusion. Make it debug only. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ci_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8730562323a8b..4a09947be2445 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5818,7 +5818,7 @@ int ci_dpm_init(struct radeon_device *rdev) tmp |= DPM_ENABLED; break; default: - DRM_ERROR("Invalid PCC GPIO: %u!\n", gpio.shift); + DRM_DEBUG("Invalid PCC GPIO: %u!\n", gpio.shift); break; } WREG32_SMC(CNB_PWRMGT_CNTL, tmp); From c22488876adbfca5511afed4b828407cb2556a11 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Tue, 7 Jul 2015 17:03:36 +0800 Subject: [PATCH 0298/2091] drm/rockchip: use drm_gem_mmap helpers commit 41315b793e13f884cda79389f0d5d44d027e57d1 upstream. Rather than (incompletely [0]) re-implementing drm_gem_mmap() and drm_gem_mmap_obj() helpers, call them directly from the rockchip mmap routines. Once the core functions return successfully, the rockchip mmap routines can still use dma_mmap_attrs() to simply mmap the entire buffer. [0] Previously, we were performing the mmap() without first taking a reference on the underlying gem buffer. This could leak ptes if the gem object is destroyed while userspace is still holding the mapping. Signed-off-by: Daniel Kurtz Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 67 +++++++++++---------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index eb2282cc4a565..eba5f8a52fbd9 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -54,55 +54,56 @@ static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj) &rk_obj->dma_attrs); } -int rockchip_gem_mmap_buf(struct drm_gem_object *obj, - struct vm_area_struct *vma) +static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) + { + int ret; struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); struct drm_device *drm = obj->dev; - unsigned long vm_size; - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - vm_size = vma->vm_end - vma->vm_start; - - if (vm_size > obj->size) - return -EINVAL; + /* + * dma_alloc_attrs() allocated a struct page table for rk_obj, so clear + * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap(). + */ + vma->vm_flags &= ~VM_PFNMAP; - return dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, + ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, obj->size, &rk_obj->dma_attrs); + if (ret) + drm_gem_vm_close(vma); + + return ret; } -/* drm driver mmap file operations */ -int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) +int rockchip_gem_mmap_buf(struct drm_gem_object *obj, + struct vm_area_struct *vma) { - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_gem_object *obj; - struct drm_vma_offset_node *node; + struct drm_device *drm = obj->dev; int ret; - if (drm_device_is_unplugged(dev)) - return -ENODEV; + mutex_lock(&drm->struct_mutex); + ret = drm_gem_mmap_obj(obj, obj->size, vma); + mutex_unlock(&drm->struct_mutex); + if (ret) + return ret; - mutex_lock(&dev->struct_mutex); + return rockchip_drm_gem_object_mmap(obj, vma); +} - node = drm_vma_offset_exact_lookup(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); - if (!node) { - mutex_unlock(&dev->struct_mutex); - DRM_ERROR("failed to find vma node.\n"); - return -EINVAL; - } else if (!drm_vma_node_is_allowed(node, filp)) { - mutex_unlock(&dev->struct_mutex); - return -EACCES; - } +/* drm driver mmap file operations */ +int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_gem_object *obj; + int ret; - obj = container_of(node, struct drm_gem_object, vma_node); - ret = rockchip_gem_mmap_buf(obj, vma); + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; - mutex_unlock(&dev->struct_mutex); + obj = vma->vm_private_data; - return ret; + return rockchip_drm_gem_object_mmap(obj, vma); } struct rockchip_gem_object * From 7e230794ec1a9a439c527893b7e7efb69ad33e78 Mon Sep 17 00:00:00 2001 From: Zhao Junwang Date: Tue, 7 Jul 2015 17:08:35 +0800 Subject: [PATCH 0299/2091] drm: add a check for x/y in drm_mode_setcrtc commit 01447e9f04ba1c49a9534ae6a5a6f26c2bb05226 upstream. legacy setcrtc ioctl does take a 32 bit value which might indeed overflow the checks of crtc_req->x > INT_MAX and crtc_req->y > INT_MAX aren't needed any more with this v2: -polish the annotation according to Daniel's comment Cc: Daniel Vetter Signed-off-by: Zhao Junwang Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_crtc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3007b44e6bf44..cafcf9dab8bd9 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2749,8 +2749,11 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; - /* For some reason crtc x/y offsets are signed internally. */ - if (crtc_req->x > INT_MAX || crtc_req->y > INT_MAX) + /* + * Universal plane src offsets are only 16.16, prevent havoc for + * drivers using universal plane code internally. + */ + if (crtc_req->x & 0xffff0000 || crtc_req->y & 0xffff0000) return -ERANGE; drm_modeset_lock_all(dev); From 463059b56b8468f31e6112213ac6667d507e6538 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Jul 2015 11:13:08 +0100 Subject: [PATCH 0300/2091] drm: Provide compat ioctl for addfb2.1 commit c631d5f90e7ee246536c72f80ade86e9ef4d2f13 upstream. Frame buffer modifiers extensions provided in; commit e3eb3250d84ef97b766312345774367b6a310db8 Author: Rob Clark Date: Thu Feb 5 14:41:52 2015 +0000 drm: add support for tiled/compressed/etc modifier in addfb2 Missed the structure packing/alignment problem where 64-bit members were added after the odd number of 32-bit ones. This makes the compiler produce structures of different sizes under 32- and 64-bit x86 targets and makes the ioctl need explicit compat handling. v2: Removed the typedef. (Daniel Vetter) Signed-off-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: Rob Clark Cc: Daniel Stone Cc: Daniel Vetter [danvet: Squash in compile fix from Mika.] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_ioc32.c | 60 +++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index aa8bbb460c571..9cfcd0aef0dfa 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -70,6 +70,8 @@ #define DRM_IOCTL_WAIT_VBLANK32 DRM_IOWR(0x3a, drm_wait_vblank32_t) +#define DRM_IOCTL_MODE_ADDFB232 DRM_IOWR(0xb8, drm_mode_fb_cmd232_t) + typedef struct drm_version_32 { int version_major; /**< Major version */ int version_minor; /**< Minor version */ @@ -1016,6 +1018,63 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return 0; } +typedef struct drm_mode_fb_cmd232 { + u32 fb_id; + u32 width; + u32 height; + u32 pixel_format; + u32 flags; + u32 handles[4]; + u32 pitches[4]; + u32 offsets[4]; + u64 modifier[4]; +} __attribute__((packed)) drm_mode_fb_cmd232_t; + +static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct drm_mode_fb_cmd232 __user *argp = (void __user *)arg; + struct drm_mode_fb_cmd232 req32; + struct drm_mode_fb_cmd2 __user *req64; + int i; + int err; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + req64 = compat_alloc_user_space(sizeof(*req64)); + + if (!access_ok(VERIFY_WRITE, req64, sizeof(*req64)) + || __put_user(req32.width, &req64->width) + || __put_user(req32.height, &req64->height) + || __put_user(req32.pixel_format, &req64->pixel_format) + || __put_user(req32.flags, &req64->flags)) + return -EFAULT; + + for (i = 0; i < 4; i++) { + if (__put_user(req32.handles[i], &req64->handles[i])) + return -EFAULT; + if (__put_user(req32.pitches[i], &req64->pitches[i])) + return -EFAULT; + if (__put_user(req32.offsets[i], &req64->offsets[i])) + return -EFAULT; + if (__put_user(req32.modifier[i], &req64->modifier[i])) + return -EFAULT; + } + + err = drm_ioctl(file, DRM_IOCTL_MODE_ADDFB2, (unsigned long)req64); + if (err) + return err; + + if (__get_user(req32.fb_id, &req64->fb_id)) + return -EFAULT; + + if (copy_to_user(argp, &req32, sizeof(req32))) + return -EFAULT; + + return 0; +} + static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, [DRM_IOCTL_NR(DRM_IOCTL_GET_UNIQUE32)] = compat_drm_getunique, @@ -1048,6 +1107,7 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw, #endif [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, + [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2, }; /** From 0f974562e301457fb3f314df5ae3bca7d9e0aa6c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 16 Jul 2015 16:47:50 +0200 Subject: [PATCH 0301/2091] drm: Stop resetting connector state to unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5677d67ae3949f09f57357241b88222d49b8c782 upstream. It's causing piles of issues since we've stopped forcing full detect cycles in the sysfs interfaces with commit c484f02d0f02fbbfc6decc945a69aae011041a27 Author: Chris Wilson Date: Fri Mar 6 12:36:42 2015 +0000 drm: Lighten sysfs connector 'status' The original justification for this was that the hpd handlers could use the unknown state as a hint to force a full detection. But current i915 code isn't doing that any more, and no one else really uses reset on resume. So instead just keep the old state around. References: http://article.gmane.org/gmane.comp.freedesktop.xorg.drivers.intel/62584 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100641 Cc: Rui Matos Cc: Julien Wajsberg Cc: kuddel.mail@gmx.de Cc: Lennart Poettering Acked-by: Rob Clark Tested-by: Rui Tiago Cação Matos Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_crtc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index cafcf9dab8bd9..800a025dd0629 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -5051,12 +5051,9 @@ void drm_mode_config_reset(struct drm_device *dev) if (encoder->funcs->reset) encoder->funcs->reset(encoder); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - connector->status = connector_status_unknown; - + list_for_each_entry(connector, &dev->mode_config.connector_list, head) if (connector->funcs->reset) connector->funcs->reset(connector); - } } EXPORT_SYMBOL(drm_mode_config_reset); From f8a895a6704bba5752c136f7338814c40c891e89 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 19 May 2015 19:44:17 -0400 Subject: [PATCH 0302/2091] libata: Fix regression when the NCQ Send and Receive log page is absent commit eab6ee1ce3c4678224d70338134f7a02005768cb upstream. Commit 5d3abf8ff67f ("libata: Fall back to unqueued READ LOG EXT if the DMA variant fails") allowed us to fall back to the unqueued READ LOG variant if the queued version failed. However, if the device did not support the page at all we would end up looping due to a merge snafu. Ensure we only take the fallback path once. Signed-off-by: Martin K. Petersen Reported-by: Sergey Senozhatsky Tested-by: Sergey Senozhatsky Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index f0931742a4392..7465031a893c6 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1521,6 +1521,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, } else { tf.command = ATA_CMD_READ_LOG_EXT; tf.protocol = ATA_PROT_PIO; + dma = false; } tf.lbal = log; tf.lbam = page; From 1d94f9fc91f94dacfce82638a485dcf9474dc305 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 22 Jun 2015 09:42:48 +1000 Subject: [PATCH 0303/2091] xfs: fix remote symlinks on V5/CRC filesystems commit 2ac56d3d4bd625450a54d4c3f9292d58f6b88232 upstream. If we create a CRC filesystem, mount it, and create a symlink with a path long enough that it can't live in the inode, we get a very strange result upon remount: # ls -l mnt total 4 lrwxrwxrwx. 1 root root 929 Jun 15 16:58 link -> XSLM XSLM is the V5 symlink block header magic (which happens to be followed by a NUL, so the string looks terminated). xfs_readlink_bmap() advanced cur_chunk by the size of the header for CRC filesystems, but never actually used that pointer; it kept reading from bp->b_addr, which is the start of the block, rather than the start of the symlink data after the header. Looks like this problem goes back to v3.10. Fixing this gets us reading the proper link target, again. Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 3df411eadb867..40c076523cfa7 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -104,7 +104,7 @@ xfs_readlink_bmap( cur_chunk += sizeof(struct xfs_dsymlink_hdr); } - memcpy(link + offset, bp->b_addr, byte_cnt); + memcpy(link + offset, cur_chunk, byte_cnt); pathlen -= byte_cnt; offset += byte_cnt; From f2cb7e30813040794190995f54f42a8addeec8da Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 23 Jun 2015 08:47:20 +1000 Subject: [PATCH 0304/2091] xfs: don't truncate attribute extents if no extents exist commit f66bf042693b620133d39af8d2f13615f03eadfc upstream. The xfs_attr3_root_inactive() call from xfs_attr_inactive() assumes that attribute blocks exist to invalidate. It is possible to have an attribute fork without extents, however. Consider the case where the attribute fork is created towards the beginning of xfs_attr_set() but some part of the subsequent attribute set fails. If an inode in such a state hits xfs_attr_inactive(), it eventually calls xfs_dabuf_map() and possibly xfs_bmapi_read(). The former emits a filesystem corruption warning, returns an error that bubbles back up to xfs_attr_inactive(), and leads to destruction of the in-core attribute fork without an on-disk reset. If the inode happens to make it back through xfs_inactive() in this state (e.g., via a concurrent bulkstat that cycles the inode from the reclaim state and releases it), i_afp might not exist when xfs_bmapi_read() is called and causes a NULL dereference panic. A '-p 2' fsstress run to ENOSPC on a relatively small fs (1GB) reproduces these problems. The behavior is a regression caused by: 6dfe5a0 xfs: xfs_attr_inactive leaves inconsistent attr fork state behind ... which removed logic that avoided the attribute extent truncate when no extents exist. Restore this logic to ensure the attribute fork is destroyed and reset correctly if it exists without any allocated extents. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr_inactive.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 3fbf167cfb4cd..73e75a87af502 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -435,8 +435,14 @@ xfs_attr_inactive( */ xfs_trans_ijoin(trans, dp, 0); - /* invalidate and truncate the attribute fork extents */ - if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + /* + * Invalidate and truncate the attribute fork extents. Make sure the + * fork actually has attributes as otherwise the invalidation has no + * blocks to read and returns an error. In this case, just do the fork + * removal below. + */ + if (xfs_inode_hasattr(dp) && + dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; From 7a8599a5a88f7996db19cf8ecc466fd95e9dcc4e Mon Sep 17 00:00:00 2001 From: David Fries Date: Fri, 8 May 2015 19:51:50 -0500 Subject: [PATCH 0305/2091] w1_therm reference count family data commit f7134eea05b2fb4a2c0935f8a540539fff01f3eb upstream. A temperature conversion can take 750 ms and when possible the w1_therm slave driver drops the bus_mutex to allow other bus operations, but that includes operations such as a periodic slave search, which can remove this slave when it is no longer detected. If that happens the sl->family_data will be freed and set to NULL causing w1_slave_show to crash when it wakes up. Signed-off-by: David Fries Reported-By: Thorsten Bschorr Tested-by: Thorsten Bschorr Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- drivers/w1/slaves/w1_therm.c | 62 +++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 1f11a20a8ab9d..55eb86c9e2141 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -59,16 +59,32 @@ MODULE_ALIAS("w1-family-" __stringify(W1_THERM_DS28EA00)); static int w1_strong_pullup = 1; module_param_named(strong_pullup, w1_strong_pullup, int, 0); +struct w1_therm_family_data { + uint8_t rom[9]; + atomic_t refcnt; +}; + +/* return the address of the refcnt in the family data */ +#define THERM_REFCNT(family_data) \ + (&((struct w1_therm_family_data*)family_data)->refcnt) + static int w1_therm_add_slave(struct w1_slave *sl) { - sl->family_data = kzalloc(9, GFP_KERNEL); + sl->family_data = kzalloc(sizeof(struct w1_therm_family_data), + GFP_KERNEL); if (!sl->family_data) return -ENOMEM; + atomic_set(THERM_REFCNT(sl->family_data), 1); return 0; } static void w1_therm_remove_slave(struct w1_slave *sl) { + int refcnt = atomic_sub_return(1, THERM_REFCNT(sl->family_data)); + while(refcnt) { + msleep(1000); + refcnt = atomic_read(THERM_REFCNT(sl->family_data)); + } kfree(sl->family_data); sl->family_data = NULL; } @@ -194,13 +210,22 @@ static ssize_t w1_slave_show(struct device *device, struct w1_slave *sl = dev_to_w1_slave(device); struct w1_master *dev = sl->master; u8 rom[9], crc, verdict, external_power; - int i, max_trying = 10; + int i, ret, max_trying = 10; ssize_t c = PAGE_SIZE; + u8 *family_data = sl->family_data; + + ret = mutex_lock_interruptible(&dev->bus_mutex); + if (ret != 0) + goto post_unlock; - i = mutex_lock_interruptible(&dev->bus_mutex); - if (i != 0) - return i; + if(!sl->family_data) + { + ret = -ENODEV; + goto pre_unlock; + } + /* prevent the slave from going away in sleep */ + atomic_inc(THERM_REFCNT(family_data)); memset(rom, 0, sizeof(rom)); while (max_trying--) { @@ -230,17 +255,19 @@ static ssize_t w1_slave_show(struct device *device, mutex_unlock(&dev->bus_mutex); sleep_rem = msleep_interruptible(tm); - if (sleep_rem != 0) - return -EINTR; + if (sleep_rem != 0) { + ret = -EINTR; + goto post_unlock; + } - i = mutex_lock_interruptible(&dev->bus_mutex); - if (i != 0) - return i; + ret = mutex_lock_interruptible(&dev->bus_mutex); + if (ret != 0) + goto post_unlock; } else if (!w1_strong_pullup) { sleep_rem = msleep_interruptible(tm); if (sleep_rem != 0) { - mutex_unlock(&dev->bus_mutex); - return -EINTR; + ret = -EINTR; + goto pre_unlock; } } @@ -269,19 +296,24 @@ static ssize_t w1_slave_show(struct device *device, c -= snprintf(buf + PAGE_SIZE - c, c, ": crc=%02x %s\n", crc, (verdict) ? "YES" : "NO"); if (verdict) - memcpy(sl->family_data, rom, sizeof(rom)); + memcpy(family_data, rom, sizeof(rom)); else dev_warn(device, "Read failed CRC check\n"); for (i = 0; i < 9; ++i) c -= snprintf(buf + PAGE_SIZE - c, c, "%02x ", - ((u8 *)sl->family_data)[i]); + ((u8 *)family_data)[i]); c -= snprintf(buf + PAGE_SIZE - c, c, "t=%d\n", w1_convert_temp(rom, sl->family->fid)); + ret = PAGE_SIZE - c; + +pre_unlock: mutex_unlock(&dev->bus_mutex); - return PAGE_SIZE - c; +post_unlock: + atomic_dec(THERM_REFCNT(family_data)); + return ret; } static int __init w1_therm_init(void) From 510c99974fdbc18f143c41cbd461c522f5ad7164 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 9 Jun 2015 15:07:59 +0300 Subject: [PATCH 0306/2091] tpm, tpm_crb: fix le64_to_cpu conversions in crb_acpi_add() commit 49afd7289bd937401c5f7faa193054bc3c41dad6 upstream. le64_to_cpu() was applied twice to the physical addresses read from the control area. This hasn't shown any visible regressions because CRB driver has been tested only on the little endian platofrms so far. Reported-by: Matt Fleming Signed-off-by: Jarkko Sakkinen Reviewed-By: Jason Gunthorpe Fixes: 30fc8d138e91 ("tpm: TPM 2.0 CRB Interface") Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_crb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index b26ceee3585e1..44f9d20c19ac6 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -267,7 +267,7 @@ static int crb_acpi_add(struct acpi_device *device) memcpy_fromio(&pa, &priv->cca->cmd_pa, 8); pa = le64_to_cpu(pa); - priv->cmd = devm_ioremap_nocache(dev, le64_to_cpu(pa), + priv->cmd = devm_ioremap_nocache(dev, pa, ioread32(&priv->cca->cmd_size)); if (!priv->cmd) { dev_err(dev, "ioremap of the command buffer failed\n"); @@ -276,7 +276,7 @@ static int crb_acpi_add(struct acpi_device *device) memcpy_fromio(&pa, &priv->cca->rsp_pa, 8); pa = le64_to_cpu(pa); - priv->rsp = devm_ioremap_nocache(dev, le64_to_cpu(pa), + priv->rsp = devm_ioremap_nocache(dev, pa, ioread32(&priv->cca->rsp_size)); if (!priv->rsp) { dev_err(dev, "ioremap of the response buffer failed\n"); From 82bebdad67b9d968cf564166a09aab6e7f4e2301 Mon Sep 17 00:00:00 2001 From: "Hon Ching \\\\(Vicky\\\\) Lo" Date: Fri, 22 May 2015 13:23:02 -0400 Subject: [PATCH 0307/2091] vTPM: set virtual device before passing to ibmvtpm_reset_crq commit 9d75f08946e8485109458ccf16f714697c207f41 upstream. tpm_ibmvtpm_probe() calls ibmvtpm_reset_crq(ibmvtpm) without having yet set the virtual device in the ibmvtpm structure. So in ibmvtpm_reset_crq, the phype call contains empty unit addresses, ibmvtpm->vdev->unit_address. Signed-off-by: Hon Ching(Vicky) Lo Signed-off-by: Joy Latten Reviewed-by: Ashley Lai Fixes: 132f76294744 ("drivers/char/tpm: Add new device driver to support IBM vTPM") Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_ibmvtpm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 42ffa5e7a1e0f..27ebf9511cb41 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -578,6 +578,9 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, goto cleanup; } + ibmvtpm->dev = dev; + ibmvtpm->vdev = vio_dev; + crq_q = &ibmvtpm->crq_queue; crq_q->crq_addr = (struct ibmvtpm_crq *)get_zeroed_page(GFP_KERNEL); if (!crq_q->crq_addr) { @@ -622,8 +625,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, crq_q->index = 0; - ibmvtpm->dev = dev; - ibmvtpm->vdev = vio_dev; TPM_VPRIV(chip) = (void *)ibmvtpm; spin_lock_init(&ibmvtpm->rtce_lock); From f7ea8a3bd52da9670d32fd9869657a8501edff23 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jun 2015 13:15:31 -0600 Subject: [PATCH 0308/2091] tpm: Fix initialization of the cdev commit ba0ef85479c46a2ab354c2220bdb6152f7f4baf3 upstream. When a cdev is contained in a dynamic structure the cdev parent kobj should be set to the kobj that controls the lifetime of the enclosing structure. In TPM's case this is the embedded struct device. Also, cdev_init 0's the whole structure, so all sets must be after, not before. This fixes module ref counting and cdev. Fixes: 313d21eeab92 ("tpm: device class for tpm") Signed-off-by: Jason Gunthorpe Reviewed-by: Dmitry Torokhov Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 283f00a7f0362..1082d4bb016a9 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -129,8 +129,9 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, device_initialize(&chip->dev); - chip->cdev.owner = chip->pdev->driver->owner; cdev_init(&chip->cdev, &tpm_fops); + chip->cdev.owner = chip->pdev->driver->owner; + chip->cdev.kobj.parent = &chip->dev.kobj; return chip; } From c2c40e1af2a2a78c04120567709344b1bf9f8111 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 24 Jun 2015 17:14:55 +0300 Subject: [PATCH 0309/2091] tpm, tpm_crb: fail when TPM2 ACPI table contents look corrupted commit b371616b8537d6450ebca0819defbf53452bebf3 upstream. At least some versions of AMI BIOS have corrupted contents in the TPM2 ACPI table and namely the physical address of the control area is set to zero. This patch changes the driver to fail gracefully when we observe a zero address instead of continuing to ioremap. Signed-off-by: Jarkko Sakkinen Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_crb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 44f9d20c19ac6..1267322595da0 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -233,6 +233,14 @@ static int crb_acpi_add(struct acpi_device *device) return -ENODEV; } + /* At least some versions of AMI BIOS have a bug that TPM2 table has + * zero address for the control area and therefore we must fail. + */ + if (!buf->control_area_pa) { + dev_err(dev, "TPM2 ACPI table has a zero address for the control area\n"); + return -EINVAL; + } + if (buf->hdr.length < sizeof(struct acpi_tpm2)) { dev_err(dev, "TPM2 ACPI table has wrong size"); return -EINVAL; From e47f1631376e53715e14a0d1e25421f3868f90b1 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 11 Feb 2015 07:33:34 -0500 Subject: [PATCH 0310/2091] KEYS: fix "ca_keys=" partial key matching commit f2b3dee484f9cee967a54ef05a66866282337519 upstream. The call to asymmetric_key_hex_to_key_id() from ca_keys_setup() silently fails with -ENOMEM. Instead of dynamically allocating memory from a __setup function, this patch defines a variable and calls __asymmetric_key_hex_to_key_id(), a new helper function, directly. This bug was introduced by 'commit 46963b774d44 ("KEYS: Overhaul key identification when searching for asymmetric keys")'. Changelog: - for clarification, rename hexlen to asciihexlen in asymmetric_key_hex_to_key_id() - add size argument to __asymmetric_key_hex_to_key_id() - David Howells - inline __asymmetric_key_hex_to_key_id() - David Howells - remove duplicate strlen() calls Acked-by: David Howells Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/asymmetric_keys.h | 3 +++ crypto/asymmetric_keys/asymmetric_type.c | 20 ++++++++++++++------ crypto/asymmetric_keys/x509_public_key.c | 23 ++++++++++++++++++----- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h index f97330886d587..3f5b537ab33ec 100644 --- a/crypto/asymmetric_keys/asymmetric_keys.h +++ b/crypto/asymmetric_keys/asymmetric_keys.h @@ -11,6 +11,9 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id); +extern int __asymmetric_key_hex_to_key_id(const char *id, + struct asymmetric_key_id *match_id, + size_t hexlen); static inline const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key) { diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index bcbbbd794e1da..b0e4ed23d6683 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -104,6 +104,15 @@ static bool asymmetric_match_key_ids( return false; } +/* helper function can be called directly with pre-allocated memory */ +inline int __asymmetric_key_hex_to_key_id(const char *id, + struct asymmetric_key_id *match_id, + size_t hexlen) +{ + match_id->len = hexlen; + return hex2bin(match_id->data, id, hexlen); +} + /** * asymmetric_key_hex_to_key_id - Convert a hex string into a key ID. * @id: The ID as a hex string. @@ -111,21 +120,20 @@ static bool asymmetric_match_key_ids( struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id) { struct asymmetric_key_id *match_id; - size_t hexlen; + size_t asciihexlen; int ret; if (!*id) return ERR_PTR(-EINVAL); - hexlen = strlen(id); - if (hexlen & 1) + asciihexlen = strlen(id); + if (asciihexlen & 1) return ERR_PTR(-EINVAL); - match_id = kmalloc(sizeof(struct asymmetric_key_id) + hexlen / 2, + match_id = kmalloc(sizeof(struct asymmetric_key_id) + asciihexlen / 2, GFP_KERNEL); if (!match_id) return ERR_PTR(-ENOMEM); - match_id->len = hexlen / 2; - ret = hex2bin(match_id->data, id, hexlen / 2); + ret = __asymmetric_key_hex_to_key_id(id, match_id, asciihexlen / 2); if (ret < 0) { kfree(match_id); return ERR_PTR(-EINVAL); diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index a6c42031628e9..24f17e6c59048 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -28,17 +28,30 @@ static bool use_builtin_keys; static struct asymmetric_key_id *ca_keyid; #ifndef MODULE +static struct { + struct asymmetric_key_id id; + unsigned char data[10]; +} cakey; + static int __init ca_keys_setup(char *str) { if (!str) /* default system keyring */ return 1; if (strncmp(str, "id:", 3) == 0) { - struct asymmetric_key_id *p; - p = asymmetric_key_hex_to_key_id(str + 3); - if (p == ERR_PTR(-EINVAL)) - pr_err("Unparsable hex string in ca_keys\n"); - else if (!IS_ERR(p)) + struct asymmetric_key_id *p = &cakey.id; + size_t hexlen = (strlen(str) - 3) / 2; + int ret; + + if (hexlen == 0 || hexlen > sizeof(cakey.data)) { + pr_err("Missing or invalid ca_keys id\n"); + return 1; + } + + ret = __asymmetric_key_hex_to_key_id(str + 3, p, hexlen); + if (ret < 0) + pr_err("Unparsable ca_keys id hex string\n"); + else ca_keyid = p; /* owner key 'id:xxxxxx' */ } else if (strcmp(str, "builtin") == 0) { use_builtin_keys = true; From 4fd5dc9eece297f49f16f82422ead3a28b11ea70 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Jul 2015 15:23:43 +0100 Subject: [PATCH 0311/2091] KEYS: ensure we free the assoc array edit if edit is valid commit ca4da5dd1f99fe9c59f1709fb43e818b18ad20e0 upstream. __key_link_end is not freeing the associated array edit structure and this leads to a 512 byte memory leak each time an identical existing key is added with add_key(). The reason the add_key() system call returns okay is that key_create_or_update() calls __key_link_begin() before checking to see whether it can update a key directly rather than adding/replacing - which it turns out it can. Thus __key_link() is not called through __key_instantiate_and_link() and __key_link_end() must cancel the edit. CVE-2015-1333 Signed-off-by: Colin Ian King Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/keys/keyring.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index e72548b5897ec..d33437007ad22 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1181,9 +1181,11 @@ void __key_link_end(struct key *keyring, if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); - if (edit && !edit->dead_leaf) { - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); + if (edit) { + if (!edit->dead_leaf) { + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + } assoc_array_cancel_edit(edit); } up_write(&keyring->sem); From 73cc530acfebd856d803d3f153350a2892715da8 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Sat, 11 Apr 2015 17:13:06 +0200 Subject: [PATCH 0312/2091] ima: skip measurement of cgroupfs files and update documentation commit 6438de9f3fb5180d78a0422695d0b88c687757d3 upstream. This patch adds a rule in the default measurement policy to skip inodes in the cgroupfs filesystem. Measurements for this filesystem can be avoided, as all the digests collected have the same value of the digest of an empty file. Furthermore, this patch updates the documentation of IMA policies in Documentation/ABI/testing/ima_policy to make it consistent with the policies set in security/integrity/ima/ima_policy.c. Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/ima_policy | 17 ++++++++++++----- security/integrity/ima/ima_policy.c | 2 ++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index d0d0c578324c7..232e174aa5de5 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -49,11 +49,22 @@ Description: dont_measure fsmagic=0x01021994 dont_appraise fsmagic=0x01021994 # RAMFS_MAGIC - dont_measure fsmagic=0x858458f6 dont_appraise fsmagic=0x858458f6 + # DEVPTS_SUPER_MAGIC + dont_measure fsmagic=0x1cd1 + dont_appraise fsmagic=0x1cd1 + # BINFMTFS_MAGIC + dont_measure fsmagic=0x42494e4d + dont_appraise fsmagic=0x42494e4d # SECURITYFS_MAGIC dont_measure fsmagic=0x73636673 dont_appraise fsmagic=0x73636673 + # SELINUX_MAGIC + dont_measure fsmagic=0xf97cff8c + dont_appraise fsmagic=0xf97cff8c + # CGROUP_SUPER_MAGIC + dont_measure fsmagic=0x27e0eb + dont_appraise fsmagic=0x27e0eb measure func=BPRM_CHECK measure func=FILE_MMAP mask=MAY_EXEC @@ -70,10 +81,6 @@ Description: Examples of LSM specific definitions: SELinux: - # SELINUX_MAGIC - dont_measure fsmagic=0xf97cff8c - dont_appraise fsmagic=0xf97cff8c - dont_measure obj_type=var_log_t dont_appraise obj_type=var_log_t dont_measure obj_type=auditd_log_t diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index d1eefb9d65fb2..b17f26fa727a4 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -79,6 +79,8 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC, + .flags = IMA_FSMAGIC}, {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, From 7869fa6047fe7c5076e70b57a83fb351b42c34c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Apr 2015 12:22:11 +0300 Subject: [PATCH 0313/2091] ima: cleanup ima_init_policy() a little commit 5577857f8e26e9027271f10daf96361640907300 upstream. It's a bit easier to read this if we split it up into two for loops. Signed-off-by: Dan Carpenter Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_policy.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index b17f26fa727a4..e4244fc43cf10 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -340,16 +340,12 @@ void __init ima_init_policy(void) appraise_entries = ima_use_appraise_tcb ? ARRAY_SIZE(default_appraise_rules) : 0; - for (i = 0; i < measure_entries + appraise_entries; i++) { - if (i < measure_entries) - list_add_tail(&default_rules[i].list, - &ima_default_rules); - else { - int j = i - measure_entries; - - list_add_tail(&default_appraise_rules[j].list, - &ima_default_rules); - } + for (i = 0; i < measure_entries; i++) + list_add_tail(&default_rules[i].list, &ima_default_rules); + + for (i = 0; i < appraise_entries; i++) { + list_add_tail(&default_appraise_rules[i].list, + &ima_default_rules); } ima_rules = &ima_default_rules; From 0ecc8ea6de19f41bf1332ef9dd472412b9ce4fa3 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 21 Apr 2015 16:54:24 -0400 Subject: [PATCH 0314/2091] ima: do not measure or appraise the NSFS filesystem commit cd025f7f94108995383edddfb61fc8afea6c66a9 upstream. Include don't appraise or measure rules for the NSFS filesystem in the builtin ima_tcb and ima_appraise_tcb policies. Changelog: - Update documentation Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/ima_policy | 3 +++ security/integrity/ima/ima_policy.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 232e174aa5de5..8ae3f57090d48 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -65,6 +65,9 @@ Description: # CGROUP_SUPER_MAGIC dont_measure fsmagic=0x27e0eb dont_appraise fsmagic=0x27e0eb + # NSFS_MAGIC + dont_measure fsmagic=0x6e736673 + dont_appraise fsmagic=0x6e736673 measure func=BPRM_CHECK measure func=FILE_MMAP mask=MAY_EXEC diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index e4244fc43cf10..f2421f7fa3c8d 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -81,6 +81,7 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, @@ -101,6 +102,7 @@ static struct ima_rule_entry default_appraise_rules[] = { {.action = DONT_APPRAISE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_APPRAISE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC}, #ifndef CONFIG_IMA_APPRAISE_SIGNED_INIT {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .flags = IMA_FOWNER}, From 60e2874ce4570fa546d9c173a5a5cf7b9e966070 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 21 Apr 2015 13:59:31 -0400 Subject: [PATCH 0315/2091] evm: labeling pseudo filesystems exception commit 5101a1850bb7ccbf107929dee9af0cd2f400940f upstream. To prevent offline stripping of existing file xattrs and relabeling of them at runtime, EVM allows only newly created files to be labeled. As pseudo filesystems are not persistent, stripping of xattrs is not a concern. Some LSMs defer file labeling on pseudo filesystems. This patch permits the labeling of existing files on pseudo files systems. Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/evm/evm_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 10f994307a04e..5820914988191 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -296,6 +296,17 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, iint = integrity_iint_find(d_backing_inode(dentry)); if (iint && (iint->flags & IMA_NEW_FILE)) return 0; + + /* exception for pseudo filesystems */ + if (dentry->d_inode->i_sb->s_magic == TMPFS_MAGIC + || dentry->d_inode->i_sb->s_magic == SYSFS_MAGIC) + return 0; + + integrity_audit_msg(AUDIT_INTEGRITY_METADATA, + dentry->d_inode, dentry->d_name.name, + "update_metadata", + integrity_status_msg[evm_status], + -EPERM, 0); } out: if (evm_status != INTEGRITY_PASS) From 2b92ad967d2884fed41469c0c37f3cd14937fad6 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Thu, 11 Jun 2015 11:54:42 -0400 Subject: [PATCH 0316/2091] ima: fix ima_show_template_data_ascii() commit 45b26133b97871896b8c5241d59f4ff7839db7b2 upstream. This patch fixes a bug introduced in "4d7aeee ima: define new template ima-ng and template fields d-ng and n-ng". Changelog: - change int to uint32 (Roberto Sassu's suggestion) Signed-off-by: Mimi Zohar Signed-off-by: Roberto Sassu Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_fs.c | 4 ++-- security/integrity/ima/ima_template_lib.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 8ee997dff1393..fc56d4dfa9547 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -106,7 +106,7 @@ void ima_add_violation(struct file *file, const unsigned char *filename, const char *op, const char *cause); int ima_init_crypto(void); void ima_putc(struct seq_file *m, void *data, int datalen); -void ima_print_digest(struct seq_file *m, u8 *digest, int size); +void ima_print_digest(struct seq_file *m, u8 *digest, u32 size); struct ima_template_desc *ima_template_desc_current(void); int ima_init_template(void); diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 461215e5fd31d..816d175da79aa 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -190,9 +190,9 @@ static const struct file_operations ima_measurements_ops = { .release = seq_release, }; -void ima_print_digest(struct seq_file *m, u8 *digest, int size) +void ima_print_digest(struct seq_file *m, u8 *digest, u32 size) { - int i; + u32 i; for (i = 0; i < size; i++) seq_printf(m, "%02x", *(digest + i)); diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index bcfc36cbde6ae..61fbd0c0d95c0 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -70,7 +70,8 @@ static void ima_show_template_data_ascii(struct seq_file *m, enum data_formats datafmt, struct ima_field_data *field_data) { - u8 *buf_ptr = field_data->data, buflen = field_data->len; + u8 *buf_ptr = field_data->data; + u32 buflen = field_data->len; switch (datafmt) { case DATA_FMT_DIGEST_WITH_ALGO: From 9428e8a37303a363bc4a8ac08501045f2195c02d Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 5 Nov 2014 07:48:36 -0500 Subject: [PATCH 0317/2091] ima: add support for new "euid" policy condition commit 139069eff7388407f19794384c42a534d618ccd7 upstream. The new "euid" policy condition measures files with the specified effective uid (euid). In addition, for CAP_SETUID files it measures files with the specified uid or suid. Changelog: - fixed checkpatch.pl warnings - fixed avc denied {setuid} messages - based on Roberto's feedback Signed-off-by: Mimi Zohar Signed-off-by: Dr. Greg Wettstein Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/ima_policy | 3 ++- security/integrity/ima/ima_policy.c | 27 +++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 8ae3f57090d48..4a571fa10f969 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -20,7 +20,7 @@ Description: action: measure | dont_measure | appraise | dont_appraise | audit condition:= base | lsm [option] base: [[func=] [mask=] [fsmagic=] [fsuuid=] [uid=] - [fowner]] + [euid=] [fowner=]] lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] option: [[appraise_type=]] [permit_directio] @@ -31,6 +31,7 @@ Description: fsmagic:= hex value fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6) uid:= decimal value + euid:= decimal value fowner:=decimal value lsm: are LSM specific option: appraise_type:= [imasig] diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index f2421f7fa3c8d..525301cf7d90f 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -27,6 +27,7 @@ #define IMA_UID 0x0008 #define IMA_FOWNER 0x0010 #define IMA_FSUUID 0x0020 +#define IMA_EUID 0x0080 #define UNKNOWN 0 #define MEASURE 0x0001 /* same as IMA_MEASURE */ @@ -194,6 +195,16 @@ static bool ima_match_rules(struct ima_rule_entry *rule, return false; if ((rule->flags & IMA_UID) && !uid_eq(rule->uid, cred->uid)) return false; + if (rule->flags & IMA_EUID) { + if (has_capability_noaudit(current, CAP_SETUID)) { + if (!uid_eq(rule->uid, cred->euid) + && !uid_eq(rule->uid, cred->suid) + && !uid_eq(rule->uid, cred->uid)) + return false; + } else if (!uid_eq(rule->uid, cred->euid)) + return false; + } + if ((rule->flags & IMA_FOWNER) && !uid_eq(rule->fowner, inode->i_uid)) return false; for (i = 0; i < MAX_LSM_RULES; i++) { @@ -373,7 +384,8 @@ enum { Opt_audit, Opt_obj_user, Opt_obj_role, Opt_obj_type, Opt_subj_user, Opt_subj_role, Opt_subj_type, - Opt_func, Opt_mask, Opt_fsmagic, Opt_uid, Opt_fowner, + Opt_func, Opt_mask, Opt_fsmagic, + Opt_uid, Opt_euid, Opt_fowner, Opt_appraise_type, Opt_fsuuid, Opt_permit_directio }; @@ -394,6 +406,7 @@ static match_table_t policy_tokens = { {Opt_fsmagic, "fsmagic=%s"}, {Opt_fsuuid, "fsuuid=%s"}, {Opt_uid, "uid=%s"}, + {Opt_euid, "euid=%s"}, {Opt_fowner, "fowner=%s"}, {Opt_appraise_type, "appraise_type=%s"}, {Opt_permit_directio, "permit_directio"}, @@ -566,6 +579,9 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) break; case Opt_uid: ima_log_string(ab, "uid", args[0].from); + case Opt_euid: + if (token == Opt_euid) + ima_log_string(ab, "euid", args[0].from); if (uid_valid(entry->uid)) { result = -EINVAL; @@ -574,11 +590,14 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) result = kstrtoul(args[0].from, 10, &lnum); if (!result) { - entry->uid = make_kuid(current_user_ns(), (uid_t)lnum); - if (!uid_valid(entry->uid) || (((uid_t)lnum) != lnum)) + entry->uid = make_kuid(current_user_ns(), + (uid_t) lnum); + if (!uid_valid(entry->uid) || + (uid_t)lnum != lnum) result = -EINVAL; else - entry->flags |= IMA_UID; + entry->flags |= (token == Opt_uid) + ? IMA_UID : IMA_EUID; } break; case Opt_fowner: From bf609547cdb2f274fb5bd978affc8649022b5d3a Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 5 Nov 2014 07:53:55 -0500 Subject: [PATCH 0318/2091] ima: extend "mask" policy matching support commit 4351c294b8c1028077280f761e158d167b592974 upstream. The current "mask" policy option matches files opened as MAY_READ, MAY_WRITE, MAY_APPEND or MAY_EXEC. This patch extends the "mask" option to match files opened containing one of these modes. For example, "mask=^MAY_READ" would match files opened read-write. Signed-off-by: Mimi Zohar Signed-off-by: Dr. Greg Wettstein Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/ima_policy | 3 ++- security/integrity/ima/ima_policy.c | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 4a571fa10f969..0a378a88217a4 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -27,7 +27,8 @@ Description: base: func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK] [FIRMWARE_CHECK] - mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] + mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND] + [[^]MAY_EXEC] fsmagic:= hex value fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6) uid:= decimal value diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 525301cf7d90f..b3a2038ed4243 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -27,6 +27,7 @@ #define IMA_UID 0x0008 #define IMA_FOWNER 0x0010 #define IMA_FSUUID 0x0020 +#define IMA_INMASK 0x0040 #define IMA_EUID 0x0080 #define UNKNOWN 0 @@ -187,6 +188,9 @@ static bool ima_match_rules(struct ima_rule_entry *rule, if ((rule->flags & IMA_MASK) && (rule->mask != mask && func != POST_SETATTR)) return false; + if ((rule->flags & IMA_INMASK) && + (!(rule->mask & mask) && func != POST_SETATTR)) + return false; if ((rule->flags & IMA_FSMAGIC) && rule->fsmagic != inode->i_sb->s_magic) return false; @@ -448,6 +452,7 @@ static void ima_log_string(struct audit_buffer *ab, char *key, char *value) static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) { struct audit_buffer *ab; + char *from; char *p; int result = 0; @@ -538,18 +543,23 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) if (entry->mask) result = -EINVAL; - if ((strcmp(args[0].from, "MAY_EXEC")) == 0) + from = args[0].from; + if (*from == '^') + from++; + + if ((strcmp(from, "MAY_EXEC")) == 0) entry->mask = MAY_EXEC; - else if (strcmp(args[0].from, "MAY_WRITE") == 0) + else if (strcmp(from, "MAY_WRITE") == 0) entry->mask = MAY_WRITE; - else if (strcmp(args[0].from, "MAY_READ") == 0) + else if (strcmp(from, "MAY_READ") == 0) entry->mask = MAY_READ; - else if (strcmp(args[0].from, "MAY_APPEND") == 0) + else if (strcmp(from, "MAY_APPEND") == 0) entry->mask = MAY_APPEND; else result = -EINVAL; if (!result) - entry->flags |= IMA_MASK; + entry->flags |= (*args[0].from == '^') + ? IMA_INMASK : IMA_MASK; break; case Opt_fsmagic: ima_log_string(ab, "fsmagic", args[0].from); From 66963999a2b2f589e108c786bfcb2e2b560d920e Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Thu, 11 Jun 2015 20:48:33 -0400 Subject: [PATCH 0319/2091] ima: update builtin policies commit 24fd03c87695a76f0517df42a37e51b1597d2c8a upstream. This patch defines a builtin measurement policy "tcb", similar to the existing "ima_tcb", but with additional rules to also measure files based on the effective uid and to measure files opened with the "read" mode bit set (eg. read, read-write). Changing the builtin "ima_tcb" policy could potentially break existing users. Instead of defining a new separate boot command line option each time the builtin measurement policy is modified, this patch defines a single generic boot command line option "ima_policy=" to specify the builtin policy and deprecates the use of the builtin ima_tcb policy. [The "ima_policy=" boot command line option is based on Roberto Sassu's "ima: added new policy type exec" patch.] Signed-off-by: Mimi Zohar Signed-off-by: Dr. Greg Wettstein Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 10 ++++- security/integrity/ima/ima_policy.c | 65 +++++++++++++++++++++++++---- 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6726139bd2899..cd03a0faca8f9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1398,7 +1398,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The list of supported hash algorithms is defined in crypto/hash_info.h. - ima_tcb [IMA] + ima_policy= [IMA] + The builtin measurement policy to load during IMA + setup. Specyfing "tcb" as the value, measures all + programs exec'd, files mmap'd for exec, and all files + opened with the read mode bit set by either the + effective uid (euid=0) or uid=0. + Format: "tcb" + + ima_tcb [IMA] Deprecated. Use ima_policy= instead. Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all programs exec'd, files mmap'd for exec, and all files diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index b3a2038ed4243..3997e206f82da 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -44,6 +44,8 @@ enum lsm_rule_types { LSM_OBJ_USER, LSM_OBJ_ROLE, LSM_OBJ_TYPE, LSM_SUBJ_USER, LSM_SUBJ_ROLE, LSM_SUBJ_TYPE }; +enum policy_types { ORIGINAL_TCB = 1, DEFAULT_TCB }; + struct ima_rule_entry { struct list_head list; int action; @@ -72,7 +74,7 @@ struct ima_rule_entry { * normal users can easily run the machine out of memory simply building * and running executables. */ -static struct ima_rule_entry default_rules[] = { +static struct ima_rule_entry dont_measure_rules[] = { {.action = DONT_MEASURE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC}, @@ -83,13 +85,29 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC} +}; + +static struct ima_rule_entry original_measurement_rules[] = { + {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, + .flags = IMA_FUNC | IMA_MASK}, + {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, + .flags = IMA_FUNC | IMA_MASK}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, + {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, +}; + +static struct ima_rule_entry default_measurement_rules[] = { {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, - {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, .uid = GLOBAL_ROOT_UID, - .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_EUID}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_UID}, {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, }; @@ -121,14 +139,29 @@ static struct list_head *ima_rules; static DEFINE_MUTEX(ima_rules_mutex); -static bool ima_use_tcb __initdata; +static int ima_policy __initdata; static int __init default_measure_policy_setup(char *str) { - ima_use_tcb = 1; + if (ima_policy) + return 1; + + ima_policy = ORIGINAL_TCB; return 1; } __setup("ima_tcb", default_measure_policy_setup); +static int __init policy_setup(char *str) +{ + if (ima_policy) + return 1; + + if (strcmp(str, "tcb") == 0) + ima_policy = DEFAULT_TCB; + + return 1; +} +__setup("ima_policy=", policy_setup); + static bool ima_use_appraise_tcb __initdata; static int __init default_appraise_policy_setup(char *str) { @@ -352,13 +385,27 @@ void __init ima_init_policy(void) { int i, measure_entries, appraise_entries; - /* if !ima_use_tcb set entries = 0 so we load NO default rules */ - measure_entries = ima_use_tcb ? ARRAY_SIZE(default_rules) : 0; + /* if !ima_policy set entries = 0 so we load NO default rules */ + measure_entries = ima_policy ? ARRAY_SIZE(dont_measure_rules) : 0; appraise_entries = ima_use_appraise_tcb ? ARRAY_SIZE(default_appraise_rules) : 0; for (i = 0; i < measure_entries; i++) - list_add_tail(&default_rules[i].list, &ima_default_rules); + list_add_tail(&dont_measure_rules[i].list, &ima_default_rules); + + switch (ima_policy) { + case ORIGINAL_TCB: + for (i = 0; i < ARRAY_SIZE(original_measurement_rules); i++) + list_add_tail(&original_measurement_rules[i].list, + &ima_default_rules); + break; + case DEFAULT_TCB: + for (i = 0; i < ARRAY_SIZE(default_measurement_rules); i++) + list_add_tail(&default_measurement_rules[i].list, + &ima_default_rules); + default: + break; + } for (i = 0; i < appraise_entries; i++) { list_add_tail(&default_appraise_rules[i].list, From baa7b46259b0dabb66bb4cd0dc28379bc5942c17 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 25 Jun 2015 18:02:29 -0400 Subject: [PATCH 0320/2091] tracing/filter: Do not WARN on operand count going below zero commit b4875bbe7e68f139bd3383828ae8e994a0df6d28 upstream. When testing the fix for the trace filter, I could not come up with a scenario where the operand count goes below zero, so I added a WARN_ON_ONCE(cnt < 0) to the logic. But there is legitimate case that it can happen (although the filter would be wrong). # echo '>' > /sys/kernel/debug/events/ext4/ext4_truncate_exit/filter That is, a single operation without any operands will hit the path where the WARN_ON_ONCE() can trigger. Although this is harmless, and the filter is reported as a error. But instead of spitting out a warning to the kernel dmesg, just fail nicely and report it via the proper channels. Link: http://lkml.kernel.org/r/558C6082.90608@oracle.com Reported-by: Vince Weaver Reported-by: Sasha Levin Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7f2e97ce71a7d..2900d7723d974 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1385,7 +1385,9 @@ static int check_preds(struct filter_parse_state *ps) if (elt->op != OP_NOT) cnt--; n_normal_preds++; - WARN_ON_ONCE(cnt < 0); + /* all ops should have operands */ + if (cnt < 0) + break; } if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) { From a27274be0112c7d458caba759e1d550fd9985e5f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 25 Jun 2015 18:10:09 -0400 Subject: [PATCH 0321/2091] tracing/filter: Do not allow infix to exceed end of string commit 6b88f44e161b9ee2a803e5b2b1fbcf4e20e8b980 upstream. While debugging a WARN_ON() for filtering, I found that it is possible for the filter string to be referenced after its end. With the filter: # echo '>' > /sys/kernel/debug/events/ext4/ext4_truncate_exit/filter The filter_parse() function can call infix_get_op() which calls infix_advance() that updates the infix filter pointers for the cnt and tail without checking if the filter is already at the end, which will put the cnt to zero and the tail beyond the end. The loop then calls infix_next() that has ps->infix.cnt--; return ps->infix.string[ps->infix.tail++]; The cnt will now be below zero, and the tail that is returned is already passed the end of the filter string. So far the allocation of the filter string usually has some buffer that is zeroed out, but if the filter string is of the exact size of the allocated buffer there's no guarantee that the charater after the nul terminating character will be zero. Luckily, only root can write to the filter. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2900d7723d974..52adf02d76191 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1056,6 +1056,9 @@ static void parse_init(struct filter_parse_state *ps, static char infix_next(struct filter_parse_state *ps) { + if (!ps->infix.cnt) + return 0; + ps->infix.cnt--; return ps->infix.string[ps->infix.tail++]; @@ -1071,6 +1074,9 @@ static char infix_peek(struct filter_parse_state *ps) static void infix_advance(struct filter_parse_state *ps) { + if (!ps->infix.cnt) + return; + ps->infix.cnt--; ps->infix.tail++; } From 2161c8679389c9008ec7a1c5c279ced37cc07f5e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 25 Jun 2015 18:19:37 -0400 Subject: [PATCH 0322/2091] tracing: Fix typo from "static inlin" to "static inline" commit cc9e4bde03f2b4cfba52406c021364cbd2a4a0f3 upstream. The trace.h header when called without CONFIG_EVENT_TRACING enabled (seldom done), will not compile because of a typo in the protocol of trace_event_enum_update(). Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d2612016de94f..3d2ad5f83e94a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1312,7 +1312,7 @@ void trace_event_init(void); void trace_event_enum_update(struct trace_enum_map **map, int len); #else static inline void __init trace_event_init(void) { } -static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { } +static inline void trace_event_enum_update(struct trace_enum_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; From 624dda42c3e7a00608ca4532b2f7bc127bc0f740 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 7 Jul 2015 15:05:03 -0400 Subject: [PATCH 0323/2091] tracing: Have branch tracer use recursive field of task struct commit 6224beb12e190ff11f3c7d4bf50cb2922878f600 upstream. Fengguang Wu's tests triggered a bug in the branch tracer's start up test when CONFIG_DEBUG_PREEMPT set. This was because that config adds some debug logic in the per cpu field, which calls back into the branch tracer. The branch tracer has its own recursive checks, but uses a per cpu variable to implement it. If retrieving the per cpu variable calls back into the branch tracer, you can see how things will break. Instead of using a per cpu variable, use the trace_recursion field of the current task struct. Simply set a bit when entering the branch tracing and clear it when leaving. If the bit is set on entry, just don't do the tracing. There's also the case with lockdep, as the local_irq_save() called before the recursion can also trigger code that can call back into the function. Changing that to a raw_local_irq_save() will protect that as well. This prevents the recursion and the inevitable crash that follows. Link: http://lkml.kernel.org/r/20150630141803.GA28071@wfg-t540p.sh.intel.com Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 1 + kernel/trace/trace_branch.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3d2ad5f83e94a..921691c5cb045 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,7 @@ enum { TRACE_CONTROL_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 57cbf1efdd440..1879980f06c22 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_branch *entry; struct ring_buffer *buffer; unsigned long flags; - int cpu, pc; + int pc; const char *p; + if (current->trace_recursion & TRACE_BRANCH_BIT) + return; + /* * I would love to save just the ftrace_likely_data pointer, but * this code can also be used by modules. Ugly things can happen @@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); - if (atomic_inc_return(&data->disabled) != 1) + raw_local_irq_save(flags); + current->trace_recursion |= TRACE_BRANCH_BIT; + data = this_cpu_ptr(tr->trace_buffer.data); + if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); @@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) __buffer_unlock_commit(buffer, event); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + current->trace_recursion &= ~TRACE_BRANCH_BIT; + raw_local_irq_restore(flags); } static inline From 63544f7d8ab278c0c63d92d093308c0d0c912f07 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 17 Jul 2015 14:03:26 -0400 Subject: [PATCH 0324/2091] tracing: Fix sample output of dynamic arrays commit d6726c8145290bef950ae2538ea6ae1d96a1944b upstream. He Kuang noticed that the trace event samples for arrays was broken: "The output result of trace_foo_bar event in traceevent samples is wrong. This problem can be reproduced as following: (Build kernel with SAMPLE_TRACE_EVENTS=m) $ insmod trace-events-sample.ko $ echo 1 > /sys/kernel/debug/tracing/events/sample-trace/foo_bar/enable $ cat /sys/kernel/debug/tracing/trace event-sample-980 [000] .... 43.649559: foo_bar: foo hello 21 0x15 BIT1|BIT3|0x10 {0x1,0x6f6f6e53,0xff007970,0xffffffff} Snoopy ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The array length is not right, should be {0x1}. (ffffffff,ffffffff) event-sample-980 [000] .... 44.653827: foo_bar: foo hello 22 0x16 BIT2|BIT3|0x10 {0x1,0x2,0x646e6147,0x666c61,0xffffffff,0xffffffff,0x750aeffe,0x7} ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The array length is not right, should be {0x1,0x2}. Gandalf (ffffffff,ffffffff)" This was caused by an update to have __print_array()'s second parameter be the count of items in the array and not the size of the array. As there is already users of __print_array(), it can not change. But the sample code can and we can also improve on the documentation about __print_array() and __get_dynamic_array_len(). Link: http://lkml.kernel.org/r/1436839171-31527-2-git-send-email-hekuang@huawei.com Fixes: ac01ce1410fc2 ("tracing: Make ftrace_print_array_seq compute buf_len") Reported-by: He Kuang Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- samples/trace_events/trace-events-sample.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 8965d1bb88119..125d6402f64f8 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -168,7 +168,10 @@ * * For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo) * Use __get_dynamic_array_len(foo) to get the length of the array - * saved. + * saved. Note, __get_dynamic_array_len() returns the total allocated + * length of the dynamic array; __print_array() expects the second + * parameter to be the number of elements. To get that, the array length + * needs to be divided by the element size. * * For __string(foo, bar) use __get_str(foo) * @@ -288,7 +291,7 @@ TRACE_EVENT(foo_bar, * This prints out the array that is defined by __array in a nice format. */ __print_array(__get_dynamic_array(list), - __get_dynamic_array_len(list), + __get_dynamic_array_len(list) / sizeof(int), sizeof(int)), __get_str(str), __get_bitmask(cpus)) ); From 499b1532e1ee4e3c6ebc1ced5a1d1e75f696262e Mon Sep 17 00:00:00 2001 From: Lior Amsalem Date: Tue, 26 May 2015 15:07:32 +0200 Subject: [PATCH 0325/2091] dmaengine: mv_xor: bug fix for racing condition in descriptors cleanup commit 9136291f1dbc1d4d1cacd2840fb35f4f3ce16c46 upstream. This patch fixes a bug in the XOR driver where the cleanup function can be called and free descriptors that never been processed by the engine (which result in data errors). The cleanup function will free descriptors based on the ownership bit in the descriptors. Fixes: ff7b04796d98 ("dmaengine: DMA engine driver for Marvell XOR engine") Signed-off-by: Lior Amsalem Signed-off-by: Maxime Ripard Reviewed-by: Ofer Heifetz Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/mv_xor.c | 72 ++++++++++++++++++++++++++++---------------- drivers/dma/mv_xor.h | 1 + 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 1c56001df676c..50f1b422dee3b 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -273,7 +273,8 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) dma_cookie_t cookie = 0; int busy = mv_chan_is_busy(mv_chan); u32 current_desc = mv_chan_get_current_desc(mv_chan); - int seen_current = 0; + int current_cleaned = 0; + struct mv_xor_desc *hw_desc; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc); @@ -285,38 +286,57 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) list_for_each_entry_safe(iter, _iter, &mv_chan->chain, chain_node) { - prefetch(_iter); - prefetch(&_iter->async_tx); - /* do not advance past the current descriptor loaded into the - * hardware channel, subsequent descriptors are either in - * process or have not been submitted - */ - if (seen_current) - break; + /* clean finished descriptors */ + hw_desc = iter->hw_desc; + if (hw_desc->status & XOR_DESC_SUCCESS) { + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, + cookie); - /* stop the search if we reach the current descriptor and the - * channel is busy - */ - if (iter->async_tx.phys == current_desc) { - seen_current = 1; - if (busy) + /* done processing desc, clean slot */ + mv_xor_clean_slot(iter, mv_chan); + + /* break if we did cleaned the current */ + if (iter->async_tx.phys == current_desc) { + current_cleaned = 1; + break; + } + } else { + if (iter->async_tx.phys == current_desc) { + current_cleaned = 0; break; + } } - - cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); - - if (mv_xor_clean_slot(iter, mv_chan)) - break; } if ((busy == 0) && !list_empty(&mv_chan->chain)) { - struct mv_xor_desc_slot *chain_head; - chain_head = list_entry(mv_chan->chain.next, - struct mv_xor_desc_slot, - chain_node); - - mv_xor_start_new_chain(mv_chan, chain_head); + if (current_cleaned) { + /* + * current descriptor cleaned and removed, run + * from list head + */ + iter = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + chain_node); + mv_xor_start_new_chain(mv_chan, iter); + } else { + if (!list_is_last(&iter->chain_node, &mv_chan->chain)) { + /* + * descriptors are still waiting after + * current, trigger them + */ + iter = list_entry(iter->chain_node.next, + struct mv_xor_desc_slot, + chain_node); + mv_xor_start_new_chain(mv_chan, iter); + } else { + /* + * some descriptors are still waiting + * to be cleaned + */ + tasklet_schedule(&mv_chan->irq_tasklet); + } + } } if (cookie > 0) diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 91958dba39a21..0e302b3a33ade 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -31,6 +31,7 @@ #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 #define XOR_DESCRIPTOR_SWAP BIT(14) +#define XOR_DESC_SUCCESS 0x40000000 #define XOR_DESC_DMA_OWNED BIT(31) #define XOR_DESC_EOD_INT_EN BIT(31) From adeb846a6d8fdc8b9dc19a65393382838cad727f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jun 2015 17:01:40 +1000 Subject: [PATCH 0326/2091] md: clear mddev->private when it has been freed. commit bd6919228d7e1867ae9e24ab27e3e4a366c87d21 upstream. If ->private is set when ->run is called, it is assumed to be a 'config' prepared as part of 'reshape'. So it is important when we free that config, that we also clear ->private. This is not often a problem as the mddev will normally be discarded shortly after the config us freed. However if an 'assemble' races with a final close, the assemble can use the old mddev which has a stale ->private. This leads to any of various sorts of crashes. So clear ->private after calling ->free(). Reported-by: Nate Clark Fixes: afa0f557cb15 ("md: rename ->stop to ->free") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4dbed4a67aaf4..5b7c914908cdc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5159,6 +5159,7 @@ int md_run(struct mddev *mddev) mddev_detach(mddev); if (mddev->private) pers->free(mddev, mddev->private); + mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); return err; @@ -5294,6 +5295,7 @@ static void md_clean(struct mddev *mddev) mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; + mddev->private = NULL; mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; @@ -5366,6 +5368,7 @@ static void __md_stop(struct mddev *mddev) mddev->pers = NULL; spin_unlock(&mddev->lock); pers->free(mddev, mddev->private); + mddev->private = NULL; if (pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); From 0f9457afe159aee56f80b20fff27a6af60f2b7a1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jun 2015 17:06:40 +1000 Subject: [PATCH 0327/2091] md: unlock mddev_lock on an error path. commit 9a8c0fa861e4db60409b4dda254cef5e17e4d43c upstream. This error path retuns while still holding the lock - bad. Fixes: 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5b7c914908cdc..fbe6ac1eea5a3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4005,8 +4005,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) else rdev = md_import_device(dev, -1, -1); - if (IS_ERR(rdev)) + if (IS_ERR(rdev)) { + mddev_unlock(mddev); return PTR_ERR(rdev); + } err = bind_rdev_to_array(rdev, mddev); out: if (err) From 7640ca524e936f7e47dc8ecbc463ea1876346f4d Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 22 Jul 2015 12:09:17 -0500 Subject: [PATCH 0328/2091] md: Skip cluster setup for dm-raid commit d3b178adb3a3adf54ecf77758138b654c3ee7f09 upstream. There is a bug that the bitmap superblock isn't initialised properly for dm-raid, so a new field can have garbage in new fields. (dm-raid does initialisation in the kernel - md initialised the superblock in mdadm). This means that for dm-raid we cannot currently trust the new ->nodes field. So: - use __GFP_ZERO to initialise the superblock properly for all new arrays - initialise all fields in bitmap_info in bitmap_new_disk_sb - ignore ->nodes for dm arrays (yes, this is a hack) This bug exposes dm-raid to bug in the (still experimental) md-cluster code, so it is suitable for -stable. It does cause crashes. References: https://bugzilla.kernel.org/show_bug.cgi?id=100491 Signed-off-By: Goldwyn Rodrigues Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/bitmap.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 135a0907e9de4..c90118e907081 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; - bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (bitmap->storage.sb_page == NULL) return -ENOMEM; bitmap->storage.sb_page->index = 0; @@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) sb->state = cpu_to_le32(bitmap->flags); bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + bitmap->mddev->bitmap_info.nodes = 0; kunmap_atomic(sb); @@ -611,8 +612,16 @@ static int bitmap_read_sb(struct bitmap *bitmap) daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); + /* XXX: This is a hack to ensure that we don't use clustering + * in case: + * - dm-raid is in use and + * - the nodes written in bitmap_sb is erroneous. + */ + if (!bitmap->mddev->sync_super) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) From f94402351ca06e1fd5699e3e01601d517e01d1a8 Mon Sep 17 00:00:00 2001 From: "Stevens, Nick" Date: Wed, 1 Jul 2015 16:07:41 +0000 Subject: [PATCH 0329/2091] hwmon: (mcp3021) Fix broken output scaling commit 347d7e45bd09ce09cbc30d5cea9de377eb22f55c upstream. The mcp3021 scaling code is dividing the VDD (full-scale) value in millivolts by the A2D resolution to obtain the scaling factor. When VDD is 3300mV (the standard value) and the resolution is 12-bit (4096 divisions), the result is a scale factor of 3300/4096, which is always one. Effectively, the raw A2D reading is always being returned because no scaling is applied. This patch fixes the issue and simplifies the register-to-volts calculation, removing the unneeded "output_scale" struct member. Signed-off-by: Nick Stevens [Guenter Roeck: Dropped unnecessary value check] Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/mcp3021.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/hwmon/mcp3021.c b/drivers/hwmon/mcp3021.c index d219c06a857bb..972444a14cca5 100644 --- a/drivers/hwmon/mcp3021.c +++ b/drivers/hwmon/mcp3021.c @@ -31,14 +31,11 @@ /* output format */ #define MCP3021_SAR_SHIFT 2 #define MCP3021_SAR_MASK 0x3ff - #define MCP3021_OUTPUT_RES 10 /* 10-bit resolution */ -#define MCP3021_OUTPUT_SCALE 4 #define MCP3221_SAR_SHIFT 0 #define MCP3221_SAR_MASK 0xfff #define MCP3221_OUTPUT_RES 12 /* 12-bit resolution */ -#define MCP3221_OUTPUT_SCALE 1 enum chips { mcp3021, @@ -54,7 +51,6 @@ struct mcp3021_data { u16 sar_shift; u16 sar_mask; u8 output_res; - u8 output_scale; }; static int mcp3021_read16(struct i2c_client *client) @@ -84,13 +80,7 @@ static int mcp3021_read16(struct i2c_client *client) static inline u16 volts_from_reg(struct mcp3021_data *data, u16 val) { - if (val == 0) - return 0; - - val = val * data->output_scale - data->output_scale / 2; - - return val * DIV_ROUND_CLOSEST(data->vdd, - (1 << data->output_res) * data->output_scale); + return DIV_ROUND_CLOSEST(data->vdd * val, 1 << data->output_res); } static ssize_t show_in_input(struct device *dev, struct device_attribute *attr, @@ -132,14 +122,12 @@ static int mcp3021_probe(struct i2c_client *client, data->sar_shift = MCP3021_SAR_SHIFT; data->sar_mask = MCP3021_SAR_MASK; data->output_res = MCP3021_OUTPUT_RES; - data->output_scale = MCP3021_OUTPUT_SCALE; break; case mcp3221: data->sar_shift = MCP3221_SAR_SHIFT; data->sar_mask = MCP3221_SAR_MASK; data->output_res = MCP3221_OUTPUT_RES; - data->output_scale = MCP3221_OUTPUT_SCALE; break; } From 2618fae8d09a715fd79a9f8673bd307ab2aa6986 Mon Sep 17 00:00:00 2001 From: Constantine Shulyupin Date: Fri, 26 Jun 2015 17:47:44 +0300 Subject: [PATCH 0330/2091] hwmon: (nct7802) fix visibility of temp3 commit 56172d81a9bc37a69b95dd627b8d48135c9c7b31 upstream. Excerpt from datasheet: 7.2.32 Mode Selection Register RTD3_MD : 00=Closed , 01=Reserved , 10=Thermistor mode , 11=Voltage sense Show temp3 only in Thermistor mode Signed-off-by: Constantine Shulyupin Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7802.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 55765790907b3..28fcb2e246d55 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -547,7 +547,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj, if (index >= 9 && index < 18 && (reg & 0x0c) != 0x04 && (reg & 0x0c) != 0x08) /* RD2 */ return 0; - if (index >= 18 && index < 27 && (reg & 0x30) != 0x10) /* RD3 */ + if (index >= 18 && index < 27 && (reg & 0x30) != 0x20) /* RD3 */ return 0; if (index >= 27 && index < 35) /* local */ return attr->mode; From 83719f40f525caffa63e2599416241156d4f6a3b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Tue, 2 Jun 2015 22:03:28 +0000 Subject: [PATCH 0331/2091] ARM: dts: mx23: fix iio-hwmon support commit e8e94ed6285428ab780cd7b0df4622f71eceb39e upstream. In order to get iio-hwmon support, the lradc must be declared as an iio provider. So fix this issue by adding the #io-channel-cells property. Signed-off-by: Stefan Wahren Fixes: bd798f9c7b30 ("ARM: dts: mxs: Add iio-hwmon to mx23 soc") Reviewed-by: Marek Vasut Reviewed-by: Alexandre Belloni Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx23.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index bbcfb5a19c770..0cb8b0b11c3ff 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -435,6 +435,7 @@ interrupts = <36 37 38 39 40 41 42 43 44>; status = "disabled"; clocks = <&clks 26>; + #io-channel-cells = <1>; }; spdif@80054000 { From 54b1fb57886f3b2a7be247937efa26d6e305aaa4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 2 Jun 2015 17:31:00 -0700 Subject: [PATCH 0332/2091] Btrfs: don't invalidate root dentry when subvolume deletion fails commit 64ad6c488975d7516230cf7849190a991fd615ae upstream. Since commit bafc9b754f75 ("vfs: More precise tests in d_invalidate"), mounted subvolumes can be deleted because d_invalidate() won't fail. However, we run into problems when we attempt to delete the default subvolume while it is mounted as the root filesystem: # btrfs subvol list / ID 257 gen 306 top level 5 path rootvol ID 267 gen 334 top level 5 path snap1 # btrfs subvol get-default / ID 267 gen 334 top level 5 path snap1 # btrfs inspect-internal rootid / 267 # mount -o subvol=/ /dev/vda1 /mnt # btrfs subvol del /mnt/snap1 Delete subvolume (no-commit): '/mnt/snap1' ERROR: cannot delete '/mnt/snap1' - Operation not permitted # findmnt / findmnt: can't read /proc/mounts: No such file or directory # ls /proc # Markus reported that this same scenario simply led to a kernel oops. This happens because in btrfs_ioctl_snap_destroy(), we call d_invalidate() before we check may_destroy_subvol(), which means that we detach the submounts and drop the dentry before erroring out. Instead, we should only invalidate the dentry once the deletion has succeeded. Additionally, the shrink_dcache_sb() isn't necessary; d_invalidate() will prune the dcache for the deleted subvolume. Fixes: bafc9b754f75 ("vfs: More precise tests in d_invalidate") Reported-by: Markus Schauler Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1c22c65185045..6f790bcddfc1c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2413,8 +2413,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_unlock_inode; } - d_invalidate(dentry); - down_write(&root->fs_info->subvol_sem); err = may_destroy_subvol(dest); @@ -2508,7 +2506,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, out_unlock_inode: mutex_unlock(&inode->i_mutex); if (!err) { - shrink_dcache_sb(root->fs_info->sb); + d_invalidate(dentry); btrfs_invalidate_inodes(dest); d_delete(dentry); ASSERT(dest->send_in_progress == 0); From 528feaeaf153863372517c6e713fcc4fe848deeb Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 11 Jun 2015 09:41:10 +0800 Subject: [PATCH 0333/2091] md: fix a build warning commit 4e023612325a9034a542bfab79f78b1fe5ebb841 upstream. Warning like this: drivers/md/md.c: In function "update_array_info": drivers/md/md.c:6394:26: warning: logical not is only applied to the left hand side of comparison [-Wlogical-not-parentheses] !mddev->persistent != info->not_persistent|| Fix it as Neil Brown said: mddev->persistent != !info->not_persistent || Signed-off-by: Firo Yang Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index fbe6ac1eea5a3..b9200282fd779 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6380,7 +6380,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->ctime != info->ctime || mddev->level != info->level || /* mddev->layout != info->layout || */ - !mddev->persistent != info->not_persistent|| + mddev->persistent != !info->not_persistent || mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) From 6f953ad80fba78eed807fe61d96c4dde50708698 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:56 +0100 Subject: [PATCH 0334/2091] Btrfs: use kmem_cache_free when freeing entry in inode cache commit c3f4a1685bb87e59c886ee68f7967eae07d4dffa upstream. The free space entries are allocated using kmem_cache_zalloc(), through __btrfs_add_free_space(), therefore we should use kmem_cache_free() and not kfree() to avoid any confusion and any potential problem. Looking at the kfree() definition at mm/slab.c it has the following comment: /* * (...) * * Don't free memory not originally allocated by kmalloc() * or you will run into trouble. */ So better be safe and use kmem_cache_free(). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a6374..218df701e6071 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) __btrfs_add_free_space(ctl, info->offset, count); free: rb_erase(&info->offset_index, rbroot); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } } From 9547e86be4af61fb535d1487d6a0a607bb67f392 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:57 +0100 Subject: [PATCH 0335/2091] Btrfs: fix race between caching kthread and returning inode to inode cache commit ae9d8f17118551bedd797406a6768b87c2146234 upstream. While the inode cache caching kthread is calling btrfs_unpin_free_ino(), we could have a concurrent call to btrfs_return_ino() that adds a new entry to the root's free space cache of pinned inodes. This concurrent call does not acquire the fs_info->commit_root_sem before adding a new entry if the caching state is BTRFS_CACHE_FINISHED, which is a problem because the caching kthread calls btrfs_unpin_free_ino() after setting the caching state to BTRFS_CACHE_FINISHED and therefore races with the task calling btrfs_return_ino(), which is adding a new entry, while the former (caching kthread) is navigating the cache's rbtree, removing and freeing nodes from the cache's rbtree without acquiring the spinlock that protects the rbtree. This race resulted in memory corruption due to double free of struct btrfs_free_space objects because both tasks can end up doing freeing the same objects. Note that adding a new entry can result in merging it with other entries in the cache, in which case those entries are freed. This is particularly important as btrfs_free_space structures are also used for the block group free space caches. This memory corruption can be detected by a debugging kernel, which reports it with the following trace: [132408.501148] slab error in verify_redzone_free(): cache `btrfs_free_space': double free detected [132408.505075] CPU: 15 PID: 12248 Comm: btrfs-ino-cache Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [132408.505075] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [132408.505075] ffff880023e7d320 ffff880163d73cd8 ffffffff8145eec7 ffffffff81095dce [132408.505075] ffff880009735d40 ffff880163d73ce8 ffffffff81154e1e ffff880163d73d68 [132408.505075] ffffffff81155733 ffffffffa054a95a ffff8801b6099f00 ffffffffa0505b5f [132408.505075] Call Trace: [132408.505075] [] dump_stack+0x4f/0x7b [132408.505075] [] ? console_unlock+0x356/0x3a2 [132408.505075] [] __slab_error.isra.28+0x25/0x36 [132408.505075] [] __cache_free+0xe2/0x4b6 [132408.505075] [] ? __btrfs_add_free_space+0x2f0/0x343 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] ? time_hardirqs_off+0x15/0x28 [132408.505075] [] ? trace_hardirqs_off+0xd/0xf [132408.505075] [] ? kfree+0xb6/0x14e [132408.505075] [] kfree+0xe5/0x14e [132408.505075] [] btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] caching_kthread+0x29e/0x2d9 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x99/0x99 [btrfs] [132408.505075] [] kthread+0xef/0xf7 [132408.505075] [] ? time_hardirqs_on+0x15/0x28 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] [] ret_from_fork+0x42/0x70 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] ffff880023e7d320: redzone 1:0x9f911029d74e35b, redzone 2:0x9f911029d74e35b. [132409.501654] slab: double free detected in cache 'btrfs_free_space', objp ffff880023e7d320 [132409.503355] ------------[ cut here ]------------ [132409.504241] kernel BUG at mm/slab.c:2571! Therefore fix this by having btrfs_unpin_free_ino() acquire the lock that protects the rbtree while doing the searches and removing entries. Fixes: 1c70d8fb4dfa ("Btrfs: fix inode caching vs tree log") Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode-map.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 218df701e6071..d4a582ac3f730 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,23 +255,29 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); kmem_cache_free(btrfs_free_space_cachep, info); } } From 544f8fbe0af1ab753531cfc4efedf39d64a2a656 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Jun 2015 12:49:23 +0100 Subject: [PATCH 0336/2091] Btrfs: fix fsync data loss after append write commit e4545de5b035c7debb73d260c78377dbb69cbfb5 upstream. If we do an append write to a file (which increases its inode's i_size) that does not have the flag BTRFS_INODE_NEEDS_FULL_SYNC set in its inode, and the previous transaction added a new hard link to the file, which sets the flag BTRFS_INODE_COPY_EVERYTHING in the file's inode, and then fsync the file, the inode's new i_size isn't logged. This has the consequence that after the fsync log is replayed, the file size remains what it was before the append write operation, which means users/applications will not be able to read the data that was successsfully fsync'ed before. This happens because neither the inode item nor the delayed inode get their i_size updated when the append write is made - doing so would require starting a transaction in the buffered write path, something that we do not do intentionally for performance reasons. Fix this by making sure that when the flag BTRFS_INODE_COPY_EVERYTHING is set the inode is logged with its current i_size (log the in-memory inode into the log tree). This issue is not a recent regression and is easy to reproduce with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _supported_fs generic _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_metadata_journaling $SCRATCH_DEV _crash_and_mount() { # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey } rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and then fsync it. # The fsync here is only needed to trigger the issue in btrfs, as it causes the # the flag BTRFS_INODE_NEEDS_FULL_SYNC to be removed from the btrfs inode. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io sync # Add a hard link to our file. # On btrfs this sets the flag BTRFS_INODE_COPY_EVERYTHING on the btrfs inode, # which is a necessary condition to trigger the issue. ln $SCRATCH_MNT/foo $SCRATCH_MNT/bar # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now append more data to our file, increasing its size, and fsync the file. # In btrfs because the inode flag BTRFS_INODE_COPY_EVERYTHING was set and the # write path did not update the inode item in the btree nor the delayed inode # item (in memory struture) in the current transaction (created by the fsync # handler), the fsync did not record the inode's new i_size in the fsync # log/journal. This made the data unavailable after the fsync log/journal is # replayed. $XFS_IO_PROG -c "pwrite -S 0xbb 32K 32K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io echo "File content after fsync and before crash:" od -t x1 $SCRATCH_MNT/foo _crash_and_mount echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo status=0 exit The expected file output before and after the crash/power failure expects the appended data to be available, which is: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0100000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0200000 Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d04968374e9d8..4920fceffacb2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4161,6 +4161,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4269,11 +4270,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4296,6 +4292,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4366,6 +4365,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed From 992a3fbb5f5d77b23e4d7a785e4ad22b4acf82cd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 3 Jul 2015 08:36:11 +0100 Subject: [PATCH 0337/2091] Btrfs: fix memory leak in the extent_same ioctl commit 497b4050e0eacd4c746dd396d14916b1e669849d upstream. We were allocating memory with memdup_user() but we were never releasing that memory. This affected pretty much every call to the ioctl, whether it deduplicated extents or not. This issue was reported on IRC by Julian Taylor and on the mailing list by Marcel Ritter, credit goes to them for finding the issue. Reported-by: Julian Taylor Reported-by: Marcel Ritter Signed-off-by: Filipe Manana Reviewed-by: Mark Fasheh Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6f790bcddfc1c..e82ea1aa932d2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2938,7 +2938,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, static long btrfs_ioctl_file_extent_same(struct file *file, struct btrfs_ioctl_same_args __user *argp) { - struct btrfs_ioctl_same_args *same; + struct btrfs_ioctl_same_args *same = NULL; struct btrfs_ioctl_same_extent_info *info; struct inode *src = file_inode(file); u64 off; @@ -2968,6 +2968,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (IS_ERR(same)) { ret = PTR_ERR(same); + same = NULL; goto out; } @@ -3038,6 +3039,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, out: mnt_drop_write_file(file); + kfree(same); return ret; } From 98f7bfe6a25a7b822efc7fcc72b7b494fefd438f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 3 Jul 2015 20:30:34 +0100 Subject: [PATCH 0338/2091] Btrfs: fix list transaction->pending_ordered corruption commit d3efe08400317888f559bbedf0e42cd31575d0ef upstream. When we call btrfs_commit_transaction(), we splice the list "ordered" of our transaction handle into the transaction's "pending_ordered" list, but we don't re-initialize the "ordered" list of our transaction handle, this means it still points to the same elements it used to before the splice. Then we check if the current transaction's state is >= TRANS_STATE_COMMIT_START and if it is we end up calling btrfs_end_transaction() which simply splices again the "ordered" list of our handle into the transaction's "pending_ordered" list, leaving multiple pointers to the same ordered extents which results in list corruption when we are iterating, removing and freeing ordered extents at btrfs_wait_pending_ordered(), resulting in access to dangling pointers / use-after-free issues. Similarly, btrfs_end_transaction() can end up in some cases calling btrfs_commit_transaction(), and both did a list splice of the transaction handle's "ordered" list into the transaction's "pending_ordered" without re-initializing the handle's "ordered" list, resulting in exactly the same problem. This produces the following warning on a kernel with linked list debugging enabled: [109749.265416] ------------[ cut here ]------------ [109749.266410] WARNING: CPU: 7 PID: 324 at lib/list_debug.c:59 __list_del_entry+0x5a/0x98() [109749.267969] list_del corruption. prev->next should be ffff8800ba087e20, but was fffffff8c1f7c35d (...) [109749.287505] Call Trace: [109749.288135] [] dump_stack+0x4f/0x7b [109749.298080] [] ? console_unlock+0x356/0x3a2 [109749.331605] [] warn_slowpath_common+0xa1/0xbb [109749.334849] [] ? __list_del_entry+0x5a/0x98 [109749.337093] [] warn_slowpath_fmt+0x46/0x48 [109749.337847] [] __list_del_entry+0x5a/0x98 [109749.338678] [] btrfs_wait_pending_ordered+0x46/0xdb [btrfs] [109749.340145] [] ? __btrfs_run_delayed_items+0x149/0x163 [btrfs] [109749.348313] [] btrfs_commit_transaction+0x36b/0xa10 [btrfs] [109749.349745] [] ? trace_hardirqs_on+0xd/0xf [109749.350819] [] btrfs_sync_file+0x36f/0x3fc [btrfs] [109749.351976] [] vfs_fsync_range+0x8f/0x9e [109749.360341] [] vfs_fsync+0x1c/0x1e [109749.368828] [] do_fsync+0x34/0x4e [109749.369790] [] SyS_fsync+0x10/0x14 [109749.370925] [] system_call_fastpath+0x12/0x6f [109749.382274] ---[ end trace 48e0d07f7c03d95a ]--- On a non-debug kernel this leads to invalid memory accesses, causing a crash. Fix this by using list_splice_init() instead of list_splice() in btrfs_commit_transaction() and btrfs_end_transaction(). Fixes: 50d9aa99bd35 ("Btrfs: make sure logged extents complete in the current transaction V3" Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5628e25250c0d..94e909c5a5034 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -758,7 +758,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->ordered)) { spin_lock(&info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); spin_unlock(&info->trans_lock); } @@ -1848,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); From df7c9ca8f5925d9b839864437b7090a865d560e4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 14 Jul 2015 16:09:39 +0100 Subject: [PATCH 0339/2091] Btrfs: fix file corruption after cloning inline extents commit ed958762644b404654a6f5d23e869f496fe127c6 upstream. Using the clone ioctl (or extent_same ioctl, which calls the same extent cloning function as well) we end up allowing copy an inline extent from the source file into a non-zero offset of the destination file. This is something not expected and that the btrfs code is not prepared to deal with - all inline extents must be at a file offset equals to 0. For example, the following excerpt of a test case for fstests triggers a crash/BUG_ON() on a write operation after an inline extent is cloned into a non-zero offset: _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount # Create our test files. File foo has the same 2K of data at offset 4K # as file bar has at its offset 0. $XFS_IO_PROG -f -s -c "pwrite -S 0xaa 0 4K" \ -c "pwrite -S 0xbb 4k 2K" \ -c "pwrite -S 0xcc 8K 4K" \ $SCRATCH_MNT/foo | _filter_xfs_io # File bar consists of a single inline extent (2K size). $XFS_IO_PROG -f -s -c "pwrite -S 0xbb 0 2K" \ $SCRATCH_MNT/bar | _filter_xfs_io # Now call the clone ioctl to clone the extent of file bar into file # foo at its offset 4K. This made file foo have an inline extent at # offset 4K, something which the btrfs code can not deal with in future # IO operations because all inline extents are supposed to start at an # offset of 0, resulting in all sorts of chaos. # So here we validate that clone ioctl returns an EOPNOTSUPP, which is # what it returns for other cases dealing with inlined extents. $CLONER_PROG -s 0 -d $((4 * 1024)) -l $((2 * 1024)) \ $SCRATCH_MNT/bar $SCRATCH_MNT/foo # Because of the inline extent at offset 4K, the following write made # the kernel crash with a BUG_ON(). $XFS_IO_PROG -c "pwrite -S 0xdd 6K 2K" $SCRATCH_MNT/foo | _filter_xfs_io status=0 exit The stack trace of the BUG_ON() triggered by the last write is: [152154.035903] ------------[ cut here ]------------ [152154.036424] kernel BUG at mm/page-writeback.c:2286! [152154.036424] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [152154.036424] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc acpi_cpu$ [152154.036424] CPU: 2 PID: 17873 Comm: xfs_io Tainted: G W 4.1.0-rc6-btrfs-next-11+ #2 [152154.036424] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [152154.036424] task: ffff880429f70990 ti: ffff880429efc000 task.ti: ffff880429efc000 [152154.036424] RIP: 0010:[] [] clear_page_dirty_for_io+0x1e/0x90 [152154.036424] RSP: 0018:ffff880429effc68 EFLAGS: 00010246 [152154.036424] RAX: 0200000000000806 RBX: ffffea0006a6d8f0 RCX: 0000000000000001 [152154.036424] RDX: 0000000000000000 RSI: ffffffff81155d1b RDI: ffffea0006a6d8f0 [152154.036424] RBP: ffff880429effc78 R08: ffff8801ce389fe0 R09: 0000000000000001 [152154.036424] R10: 0000000000002000 R11: ffffffffffffffff R12: ffff8800200dce68 [152154.036424] R13: 0000000000000000 R14: ffff8800200dcc88 R15: ffff8803d5736d80 [152154.036424] FS: 00007fbf119f6700(0000) GS:ffff88043d280000(0000) knlGS:0000000000000000 [152154.036424] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [152154.036424] CR2: 0000000001bdc000 CR3: 00000003aa555000 CR4: 00000000000006e0 [152154.036424] Stack: [152154.036424] ffff8803d5736d80 0000000000000001 ffff880429effcd8 ffffffffa04e97c1 [152154.036424] ffff880429effd68 ffff880429effd60 0000000000000001 ffff8800200dc9c8 [152154.036424] 0000000000000001 ffff8800200dcc88 0000000000000000 0000000000001000 [152154.036424] Call Trace: [152154.036424] [] lock_and_cleanup_extent_if_need+0x147/0x18d [btrfs] [152154.036424] [] __btrfs_buffered_write+0x245/0x4c8 [btrfs] [152154.036424] [] ? btrfs_file_write_iter+0x150/0x3e0 [btrfs] [152154.036424] [] ? btrfs_file_write_iter+0x15f/0x3e0 [btrfs] [152154.036424] [] btrfs_file_write_iter+0x2cc/0x3e0 [btrfs] [152154.036424] [] __vfs_write+0x7c/0xa5 [152154.036424] [] vfs_write+0xa0/0xe4 [152154.036424] [] SyS_pwrite64+0x64/0x82 [152154.036424] [] system_call_fastpath+0x12/0x6f [152154.036424] Code: 48 89 c7 e8 0f ff ff ff 5b 41 5c 5d c3 0f 1f 44 00 00 55 48 89 e5 41 54 53 48 89 fb e8 ae ef 00 00 49 89 c4 48 8b 03 a8 01 75 02 <0f> 0b 4d 85 e4 74 59 49 8b 3c 2$ [152154.036424] RIP [] clear_page_dirty_for_io+0x1e/0x90 [152154.036424] RSP [152154.242621] ---[ end trace e3d3376b23a57041 ]--- Fix this by returning the error EOPNOTSUPP if an attempt to copy an inline extent into a non-zero offset happens, just like what is done for other scenarios that would require copying/splitting inline extents, which were introduced by the following commits: 00fdf13a2e9f ("Btrfs: fix a crash of clone with inline extents's split") 3f9e3df8da3c ("btrfs: replace error code from btrfs_drop_extents") Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e82ea1aa932d2..37d456a9a3b8c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3434,6 +3434,20 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u64 trim = 0; u64 aligned_end = 0; + /* + * Don't copy an inline extent into an offset + * greater than zero. Having an inline extent + * at such an offset results in chaos as btrfs + * isn't prepared for such cases. Just skip + * this case for the same reasons as commented + * at btrfs_ioctl_clone(). + */ + if (last_dest_end > 0) { + ret = -EOPNOTSUPP; + btrfs_end_transaction(trans, root); + goto out; + } + if (off > key.offset) { skip = off - key.offset; new_key.offset += skip; From 9d680e03989324f000596be4862728a7c30f22c1 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 9 Jul 2015 14:20:36 -0400 Subject: [PATCH 0340/2091] selinux: don't waste ebitmap space when importing NetLabel categories commit 3324603524925c7727207027d1c15e597412d15e upstream. At present we don't create efficient ebitmaps when importing NetLabel category bitmaps. This can present a problem when comparing ebitmaps since ebitmap_cmp() is very strict about these things and considers these wasteful ebitmaps not equal when compared to their more efficient counterparts, even if their values are the same. This isn't likely to cause problems on 64-bit systems due to a bit of luck on how NetLabel/CIPSO works and the default ebitmap size, but it can be a problem on 32-bit systems. This patch fixes this problem by being a bit more intelligent when importing NetLabel category bitmaps by skipping over empty sections which should result in a nice, efficient ebitmap. Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/ebitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index afe6a269ec177..57644b1dc42e1 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -153,6 +153,12 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, if (offset == (u32)-1) return 0; + /* don't waste ebitmap space if the netlabel bitmap is empty */ + if (bitmap == 0) { + offset += EBITMAP_UNIT_SIZE; + continue; + } + if (e_iter == NULL || offset >= e_iter->startbit + EBITMAP_SIZE) { e_prev = e_iter; From 872d2790a36047dd0f032666550951b4fb865fb4 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 Jul 2015 09:40:59 -0400 Subject: [PATCH 0341/2091] selinux: fix mprotect PROT_EXEC regression caused by mm change commit 892e8cac99a71f6254f84fc662068d912e1943bf upstream. commit 66fc13039422ba7df2d01a8ee0873e4ef965b50b ("mm: shmem_zero_setup skip security check and lockdep conflict with XFS") caused a regression for SELinux by disabling any SELinux checking of mprotect PROT_EXEC on shared anonymous mappings. However, even before that regression, the checking on such mprotect PROT_EXEC calls was inconsistent with the checking on a mmap PROT_EXEC call for a shared anonymous mapping. On a mmap, the security hook is passed a NULL file and knows it is dealing with an anonymous mapping and therefore applies an execmem check and no file checks. On a mprotect, the security hook is passed a vma with a non-NULL vm_file (as this was set from the internally-created shmem file during mmap) and therefore applies the file-based execute check and no execmem check. Since the aforementioned commit now marks the shmem zero inode with the S_PRIVATE flag, the file checks are disabled and we have no checking at all on mprotect PROT_EXEC. Add a test to the mprotect hook logic for such private inodes, and apply an execmem check in that case. This makes the mmap and mprotect checking consistent for shared anonymous mappings, as well as for /dev/zero and ashmem. Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 212070e1de1ac..7f8d7f19e0446 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3288,7 +3288,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared int rc = 0; if (default_noexec && - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || + (!shared && (prot & PROT_WRITE)))) { /* * We are making executable an anonymous mapping or a * private file mapping that will also be writable. From 650b07ba3c2382113a52f23581e861629476cca5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 1 Jul 2015 16:25:55 +0200 Subject: [PATCH 0342/2091] fuse: initialize fc->release before calling it commit 0ad0b3255a08020eaf50e34ef0d6df5bdf5e09ed upstream. fc->release is called from fuse_conn_put() which was used in the error cleanup before fc->release was initialized. [Jeremiah Mahler : assign fc->release after calling fuse_conn_init(fc) instead of before.] Signed-off-by: Miklos Szeredi Fixes: a325f9b92273 ("fuse: update fuse_conn_init() and separate out fuse_conn_kill()") Signed-off-by: Greg Kroah-Hartman --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 18dacf9ed8ff3..708d697113fc9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1026,6 +1026,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); + fc->release = fuse_free_conn; fc->dev = sb->s_dev; fc->sb = sb; @@ -1040,7 +1041,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= MS_POSIXACL; - fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; From 94fc30841d7d5f59957db6231100c5c07e7c0eab Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 29 Jun 2015 19:30:23 +0300 Subject: [PATCH 0343/2091] crush: fix a bug in tree bucket decode commit 82cd003a77173c91b9acad8033fb7931dac8d751 upstream. struct crush_bucket_tree::num_nodes is u8, so ceph_decode_8_safe() should be used. -Wconversion catches this, but I guess it went unnoticed in all the noise it spews. The actual problem (at least for common crushmaps) isn't the u32 -> u8 truncation though - it's the advancement by 4 bytes instead of 1 in the crushmap buffer. Fixes: http://tracker.ceph.com/issues/2759 Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin Signed-off-by: Greg Kroah-Hartman --- net/ceph/osdmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 15796696d64ed..4a3125836b64a 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end, { int j; dout("crush_decode_tree_bucket %p to %p\n", *p, end); - ceph_decode_32_safe(p, end, b->num_nodes, bad); + ceph_decode_8_safe(p, end, b->num_nodes, bad); b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); if (b->node_weights == NULL) return -ENOMEM; From 2dfdaa269d01df3861f6967fd3d08d1ad732794b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Jun 2015 17:30:15 +0300 Subject: [PATCH 0344/2091] ACPI / resources: free memory on error in add_region_before() commit 7bc10388ccdd79b3d20463151a1f8e7a590a775b upstream. There is a small memory leak on error. Fixes: 0f1b414d1907 (ACPI / PNP: Avoid conflicting resource reservations) Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index fcb7807ea8b73..10561ce16ed13 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -660,8 +660,10 @@ static int add_region_before(u64 start, u64 end, u8 space_id, return -ENOMEM; error = request_range(start, end, space_id, flags, desc); - if (error) + if (error) { + kfree(reg); return error; + } reg->start = start; reg->end = end; From 3dfbf8770ae059d73ee34e7c49c0f628863be932 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: [PATCH 0345/2091] ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage commit 0294112ee3135fbd15eaa70015af8283642dd970 upstream. This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 12 ++- drivers/acpi/resource.c | 162 ---------------------------------------- drivers/pnp/system.c | 35 +++------ include/linux/acpi.h | 10 --- 4 files changed, 18 insertions(+), 201 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 5226a8b921ae1..98f5316aad727 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,10 +175,14 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - acpi_reserve_region(addr, length, gas->space_id, 0, desc); + /* Resources are never freed */ + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + request_region(addr, length, desc); + else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -207,7 +211,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -1838,7 +1845,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed13..8244f013f2109 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #ifdef CONFIG_X86 @@ -622,164 +621,3 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); - -struct reserved_region { - struct list_head node; - u64 start; - u64 end; -}; - -static LIST_HEAD(reserved_io_regions); -static LIST_HEAD(reserved_mem_regions); - -static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, - char *desc) -{ - unsigned int length = end - start + 1; - struct resource *res; - - res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? - request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (!res) - return -EIO; - - res->flags &= ~flags; - return 0; -} - -static int add_region_before(u64 start, u64 end, u8 space_id, - unsigned long flags, char *desc, - struct list_head *head) -{ - struct reserved_region *reg; - int error; - - reg = kmalloc(sizeof(*reg), GFP_KERNEL); - if (!reg) - return -ENOMEM; - - error = request_range(start, end, space_id, flags, desc); - if (error) { - kfree(reg); - return error; - } - - reg->start = start; - reg->end = end; - list_add_tail(®->node, head); - return 0; -} - -/** - * acpi_reserve_region - Reserve an I/O or memory region as a system resource. - * @start: Starting address of the region. - * @length: Length of the region. - * @space_id: Identifier of address space to reserve the region from. - * @flags: Resource flags to clear for the region after requesting it. - * @desc: Region description (for messages). - * - * Reserve an I/O or memory region as a system resource to prevent others from - * using it. If the new region overlaps with one of the regions (in the given - * address space) already reserved by this routine, only the non-overlapping - * parts of it will be reserved. - * - * Returned is either 0 (success) or a negative error code indicating a resource - * reservation problem. It is the code of the first encountered error, but the - * routine doesn't abort until it has attempted to request all of the parts of - * the new region that don't overlap with other regions reserved previously. - * - * The resources requested by this routine are never released. - */ -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc) -{ - struct list_head *regions; - struct reserved_region *reg; - u64 end = start + length - 1; - int ret = 0, error = 0; - - if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) - regions = &reserved_io_regions; - else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - regions = &reserved_mem_regions; - else - return -EINVAL; - - if (list_empty(regions)) - return add_region_before(start, end, space_id, flags, desc, regions); - - list_for_each_entry(reg, regions, node) - if (reg->start == end + 1) { - /* The new region can be prepended to this one. */ - ret = request_range(start, end, space_id, flags, desc); - if (!ret) - reg->start = start; - - return ret; - } else if (reg->start > end) { - /* No overlap. Add the new region here and get out. */ - return add_region_before(start, end, space_id, flags, - desc, ®->node); - } else if (reg->end == start - 1) { - goto combine; - } else if (reg->end >= start) { - goto overlap; - } - - /* The new region goes after the last existing one. */ - return add_region_before(start, end, space_id, flags, desc, regions); - - overlap: - /* - * The new region overlaps an existing one. - * - * The head part of the new region immediately preceding the existing - * overlapping one can be combined with it right away. - */ - if (reg->start > start) { - error = request_range(start, reg->start - 1, space_id, flags, desc); - if (error) - ret = error; - else - reg->start = start; - } - - combine: - /* - * The new region is adjacent to an existing one. If it extends beyond - * that region all the way to the next one, it is possible to combine - * all three of them. - */ - while (reg->end < end) { - struct reserved_region *next = NULL; - u64 a = reg->end + 1, b = end; - - if (!list_is_last(®->node, regions)) { - next = list_next_entry(reg, node); - if (next->start <= end) - b = next->start - 1; - } - error = request_range(a, b, space_id, flags, desc); - if (!error) { - if (next && next->start == b + 1) { - reg->end = next->end; - list_del(&next->node); - kfree(next); - } else { - reg->end = end; - break; - } - } else if (next) { - if (!ret) - ret = error; - - reg = next; - } else { - break; - } - } - - return ret ? ret : error; -} -EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 515f33882ab89..49c1720df59a8 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,7 +7,6 @@ * Bjorn Helgaas */ -#include #include #include #include @@ -23,41 +22,25 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -#ifdef CONFIG_ACPI -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; - return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); -} -#else -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - struct resource *res; - - res = io ? request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (res) { - res->flags &= ~IORESOURCE_BUSY; - return true; - } - return false; -} -#endif - static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - bool reserved; + struct resource *res; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - reserved = __reserve_range(start, end - start + 1, !!port, regionid); - if (!reserved) + if (port) + res = request_region(start, end - start + 1, regionid); + else + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else kfree(regionid); /* @@ -66,7 +49,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - reserved ? "has been" : "could not be"); + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5da2d2e9d38e3..4550be3bb63b5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -332,9 +332,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -530,13 +527,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) From af3cc7722632f36dd8e05c63d1d261d8fc543487 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 00:31:47 +0200 Subject: [PATCH 0346/2091] ACPI / LPSS: Fix up acpi_lpss_create_device() commit d3e13ff3c1aa2403d9a5f371baac088daeb8f56d upstream. Fix a return value (which should be a negative error code) and a memory leak (the list allocated by acpi_dev_get_resources() needs to be freed on ioremap() errors too) in acpi_lpss_create_device() introduced by commit 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()'. Fixes: 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()' Reported-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_lpss.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 37fb190476039..73f056a597a9d 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -352,13 +352,16 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(rentry->res); pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); - if (!pdata->mmio_base) - goto err_out; break; } acpi_dev_free_resource_list(&resource_list); + if (!pdata->mmio_base) { + ret = -ENOMEM; + goto err_out; + } + pdata->dev_desc = dev_desc; if (dev_desc->setup) From b1bce17e823a3f0c5abb8c6becb2ba314fba8588 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:11 +0800 Subject: [PATCH 0347/2091] ACPICA: Tables: Enable both 32-bit and 64-bit FACS commit c04e1fb4396d27f18296db0f914760fa7fe8223a upstream. ACPICA commit f7b86f35416e3d1f71c3d816ff5075ddd33ed486 The following commit is reported to have broken s2ram on some platforms: Commit: 0249ed2444d65d65fc3f3f64f398f1ad0b7e54cd ACPICA: Add option to favor 32-bit FADT addresses. The platform reports 2 FACS tables (which is not allowed by ACPI specification) and the new 32-bit address favor rule forces OSPMs to use the FACS table reported via FADT's X_FIRMWARE_CTRL field. The root cause of the reported bug might be one of the followings: 1. BIOS may favor the 64-bit firmware waking vector address when the version of the FACS is greater than 0 and Linux currently only supports resuming from the real mode, so the 64-bit firmware waking vector has never been set and might be invalid to BIOS while the commit enables higher version FACS. 2. BIOS may favor the FACS reported via the "FIRMWARE_CTRL" field in the FADT while the commit doesn't set the firmware waking vector address of the FACS reported by "FIRMWARE_CTRL", it only sets the firware waking vector address of the FACS reported by "X_FIRMWARE_CTRL". This patch excludes the cases that can trigger the bugs caused by the root cause 2. There is no handshaking mechanism can be used by OSPM to tell BIOS which FACS is currently used. Thus the FACS reported by "FIRMWARE_CTRL" may still be used by BIOS and the 0 value of the 32-bit firmware waking vector might trigger such failure. This patch tries to favor 32bit FACS address in another way where both the FACS reported by "FIRMWARE_CTRL" and the FACS reported by "X_FIRMWARE_CTRL" are loaded so that further commit can set firmware waking vector in the both tables to ensure we can exclude the cases that trigger the bugs caused by the root cause 2. The exclusion is split into 2 commits as this commit is also useful for dumping more ACPI tables, it won't get reverted when such exclusion is no longer necessary. Lv Zheng. Link: https://bugzilla.kernel.org/show_bug.cgi?id=74021 Link: https://github.com/acpica/acpica/commit/f7b86f35 Reported-and-tested-by: Oswald Buddenhagen Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/aclocal.h | 1 + drivers/acpi/acpica/tbfadt.c | 21 +++++++++++++-------- drivers/acpi/acpica/tbutils.c | 34 +++++++++++++++++++++++----------- drivers/acpi/acpica/tbxfload.c | 3 ++- include/acpi/acpixf.h | 9 +++++++++ 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 87b27521fcacb..7f50dd9eb1d0e 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -213,6 +213,7 @@ struct acpi_table_list { #define ACPI_TABLE_INDEX_DSDT (0) #define ACPI_TABLE_INDEX_FACS (1) +#define ACPI_TABLE_INDEX_X_FACS (2) struct acpi_find_context { char *search_for; diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 7d2486005e3f2..05be59c772c75 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -350,9 +350,18 @@ void acpi_tb_parse_fadt(u32 table_index) /* If Hardware Reduced flag is set, there is no FACS */ if (!acpi_gbl_reduced_hardware) { - acpi_tb_install_fixed_table((acpi_physical_address) - acpi_gbl_FADT.Xfacs, ACPI_SIG_FACS, - ACPI_TABLE_INDEX_FACS); + if (acpi_gbl_FADT.facs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.facs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_FACS); + } + if (acpi_gbl_FADT.Xfacs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.Xfacs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_X_FACS); + } } } @@ -491,13 +500,9 @@ static void acpi_tb_convert_fadt(void) acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); /* - * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary. + * Expand the 32-bit DSDT addresses to 64-bit as necessary. * Later ACPICA code will always use the X 64-bit field. */ - acpi_gbl_FADT.Xfacs = acpi_tb_select_address("FACS", - acpi_gbl_FADT.facs, - acpi_gbl_FADT.Xfacs); - acpi_gbl_FADT.Xdsdt = acpi_tb_select_address("DSDT", acpi_gbl_FADT.dsdt, acpi_gbl_FADT.Xdsdt); diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 6559a58439c5d..2fb1afaacc6d6 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -68,7 +68,8 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); acpi_status acpi_tb_initialize_facs(void) { - acpi_status status; + struct acpi_table_facs *facs32; + struct acpi_table_facs *facs64; /* If Hardware Reduced flag is set, there is no FACS */ @@ -77,11 +78,22 @@ acpi_status acpi_tb_initialize_facs(void) return (AE_OK); } - status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, - ACPI_CAST_INDIRECT_PTR(struct - acpi_table_header, - &acpi_gbl_FACS)); - return (status); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &facs32)); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &facs64)); + + if (acpi_gbl_use32_bit_facs_addresses) { + acpi_gbl_FACS = facs32 ? facs32 : facs64; + } else { + acpi_gbl_FACS = facs64 ? facs64 : facs32; + } + + return (AE_OK); } #endif /* !ACPI_REDUCED_HARDWARE */ @@ -101,7 +113,7 @@ acpi_status acpi_tb_initialize_facs(void) u8 acpi_tb_tables_loaded(void) { - if (acpi_gbl_root_table_list.current_table_count >= 3) { + if (acpi_gbl_root_table_list.current_table_count >= 4) { return (TRUE); } @@ -357,11 +369,11 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address) table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header)); /* - * First two entries in the table array are reserved for the DSDT - * and FACS, which are not actually present in the RSDT/XSDT - they - * come from the FADT + * First three entries in the table array are reserved for the DSDT + * and 32bit/64bit FACS, which are not actually present in the + * RSDT/XSDT - they come from the FADT */ - acpi_gbl_root_table_list.current_table_count = 2; + acpi_gbl_root_table_list.current_table_count = 3; /* Initialize the root table array from the RSDT/XSDT */ diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index aadb3002a2ddd..b63e35d6d1bf8 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -166,7 +166,8 @@ static acpi_status acpi_tb_load_namespace(void) (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if ((!ACPI_COMPARE_NAME + if (!acpi_gbl_root_table_list.tables[i].address || + (!ACPI_COMPARE_NAME (&(acpi_gbl_root_table_list.tables[i].signature), ACPI_SIG_SSDT) && diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 08ef57bc8d63f..4831691bc4772 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -199,6 +199,15 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); */ ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +/* + * Optionally use 32-bit FACS table addresses. + * It is reported that some platforms fail to resume from system suspending + * if 64-bit FACS table address is selected: + * https://bugzilla.kernel.org/show_bug.cgi?id=74021 + * Default is TRUE, favor the 32-bit addresses. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_facs_addresses, TRUE); + /* * Optionally truncate I/O addresses to 16 bits. Provides compatibility * with other ACPI implementations. NOTE: During ACPICA initialization, From c0f2312598c9adaf4ab819f4d61a1ceaa8705d36 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:26 +0800 Subject: [PATCH 0348/2091] ACPICA: Tables: Fix an issue that FACS initialization is performed twice commit c04be18448355441a0c424362df65b6422e27bda upstream. ACPICA commit 90f5332a15e9d9ba83831ca700b2b9f708274658 This patch adds a new FACS initialization flag for acpi_tb_initialize(). acpi_enable_subsystem() might be invoked several times in OS bootup process, and we don't want FACS initialization to be invoked twice. Lv Zheng. Link: https://github.com/acpica/acpica/commit/90f5332a Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/utxfinit.c | 10 ++++++---- include/acpi/actypes.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c index 083a768918892..42a32a66ef22a 100644 --- a/drivers/acpi/acpica/utxfinit.c +++ b/drivers/acpi/acpica/utxfinit.c @@ -179,10 +179,12 @@ acpi_status __init acpi_enable_subsystem(u32 flags) * Obtain a permanent mapping for the FACS. This is required for the * Global Lock and the Firmware Waking Vector */ - status = acpi_tb_initialize_facs(); - if (ACPI_FAILURE(status)) { - ACPI_WARNING((AE_INFO, "Could not map the FACS table")); - return_ACPI_STATUS(status); + if (!(flags & ACPI_NO_FACS_INIT)) { + status = acpi_tb_initialize_facs(); + if (ACPI_FAILURE(status)) { + ACPI_WARNING((AE_INFO, "Could not map the FACS table")); + return_ACPI_STATUS(status); + } } #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 1c3002e1db20c..181427ef3549c 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -572,6 +572,7 @@ typedef u64 acpi_integer; #define ACPI_NO_ACPI_ENABLE 0x10 #define ACPI_NO_DEVICE_INIT 0x20 #define ACPI_NO_OBJECT_INIT 0x40 +#define ACPI_NO_FACS_INIT 0x80 /* * Initialization state From 24b2b68ee25b25737af7c3b37a63f516d57e8d23 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 1 Jul 2015 14:43:34 +0800 Subject: [PATCH 0349/2091] ACPICA: Tables: Enable default 64-bit FADT addresses favor commit 0ea61381788a37d864f9841b0fe97d40f7058f3b upstream. ACPICA commit 4da56eeae0749dfe8491285c1e1fad48f6efafd8 The following commit temporarily disables correct 64-bit FADT addresses favor during the period the root cause of the bug is not fixed: Commit: 85dbd5801f62b66e2aa7826aaefcaebead44c8a6 ACPICA: Tables: Restore old behavor to favor 32-bit FADT addresses. With enough protections, this patch re-enables 64-bit FADT addresses by default. If regressions are reported against such change, this patch should be bisected and reverted. Note that 64-bit FACS favor and 64-bit firmware waking vector favor are excluded by this commit in order not to break OSPMs. Lv Zheng. Link: https://bugzilla.kernel.org/show_bug.cgi?id=74021 Link: https://github.com/acpica/acpica/commit/4da56eea Reported-and-tested-by: Oswald Buddenhagen Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/acpi/acpixf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 4831691bc4772..f5ed1f17f0616 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -195,9 +195,9 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); * address. Although ACPICA adheres to the ACPI specification which * requires the use of the corresponding 64-bit address if it is non-zero, * some machines have been found to have a corrupted non-zero 64-bit - * address. Default is TRUE, favor the 32-bit addresses. + * address. Default is FALSE, do not favor the 32-bit addresses. */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, FALSE); /* * Optionally use 32-bit FACS table addresses. From 1d7a398b461183e920bc591af399580f9623c849 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 8 Jul 2015 15:26:39 +0800 Subject: [PATCH 0350/2091] ACPI / PCI: Fix regressions caused by resource_size_t overflow with 32-bit kernel commit 1fb01ca93a1348a1469b8777326cd7632483de77 upstream. Zoltan Boszormenyi reported this regression: "There's a Realtek RTL8111/8168/8411 (PCI ID 10ec:8168, Subsystem ID 1565:230e) network chip on the mainboard. After the r8169 driver loaded the IRQs in the machine went berserk. Keyboard keypressed arrived with considerable latency and duplicated, so no real work was possible. The machine responded to the power button but didn't actually power down. It just stuck at the powering down message. I had to press the power button for 4 seconds to power it down. The computer is a POS machine with a big battery inside. Because of this, either ACPI or the Realtek chip kept the bad state and after rebooting, the network chip didn't even show up in lspci. Not even the PXE ROM announced itself during boot. I had to disconnect the battery to beat some sense back to the computer. The regression happens with 4.0.5, 4.1.0-rc8 and 4.1.0-final. 3.18.16 was good." The regression is caused by commit 593669c2ac0f (x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation). Since commit 593669c2ac0f, x86 PCI ACPI host bridge driver validates ACPI resources by first converting an ACPI resource to a 'struct resource' structure and then applying checks against the converted resource structure. The 'start' and 'end' fields in 'struct resource' are defined to be type of resource_size_t, which may be 32 bits or 64 bits depending on CONFIG_PHYS_ADDR_T_64BIT. This may cause incorrect resource validation results with 32-bit kernels because 64-bit ACPI resource descriptors may get truncated when converting to 32-bit 'start' and 'end' fields in 'struct resource'. It eventually affects PCI resource allocation subsystem and makes some PCI devices and the system behave abnormally due to incorrect resource assignment. So enhance the ACPI resource parsing interfaces to ignore ACPI resource descriptors with address/offset above 4G when running in 32-bit mode. With the fix applied, the behavior of the machine was restored to how 3.18.16 worked, i.e. the memory range that is over 4GB is ignored again, and lspci -vvxxx shows that everything is at the same memory window as they were with 3.18.16. Reported-and-tested-by: Boszormenyi Zoltan Fixes: 593669c2ac0f (x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation) Signed-off-by: Jiang Liu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 8244f013f2109..f1c966e050784 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -193,6 +193,7 @@ static bool acpi_decode_space(struct resource_win *win, u8 iodec = attr->granularity == 0xfff ? ACPI_DECODE_10 : ACPI_DECODE_16; bool wp = addr->info.mem.write_protect; u64 len = attr->address_length; + u64 start, end, offset = 0; struct resource *res = &win->res; /* @@ -204,9 +205,6 @@ static bool acpi_decode_space(struct resource_win *win, pr_debug("ACPI: Invalid address space min_addr_fix %d, max_addr_fix %d, len %llx\n", addr->min_address_fixed, addr->max_address_fixed, len); - res->start = attr->minimum; - res->end = attr->maximum; - /* * For bridges that translate addresses across the bridge, * translation_offset is the offset that must be added to the @@ -214,12 +212,22 @@ static bool acpi_decode_space(struct resource_win *win, * primary side. Non-bridge devices must list 0 for all Address * Translation offset bits. */ - if (addr->producer_consumer == ACPI_PRODUCER) { - res->start += attr->translation_offset; - res->end += attr->translation_offset; - } else if (attr->translation_offset) { + if (addr->producer_consumer == ACPI_PRODUCER) + offset = attr->translation_offset; + else if (attr->translation_offset) pr_debug("ACPI: translation_offset(%lld) is invalid for non-bridge device.\n", attr->translation_offset); + start = attr->minimum + offset; + end = attr->maximum + offset; + + win->offset = offset; + res->start = start; + res->end = end; + if (sizeof(resource_size_t) < sizeof(u64) && + (offset != win->offset || start != res->start || end != res->end)) { + pr_warn("acpi resource window ([%#llx-%#llx] ignored, not CPU addressable)\n", + attr->minimum, attr->maximum); + return false; } switch (addr->resource_type) { @@ -236,8 +244,6 @@ static bool acpi_decode_space(struct resource_win *win, return false; } - win->offset = attr->translation_offset; - if (addr->producer_consumer == ACPI_PRODUCER) res->flags |= IORESOURCE_WINDOW; From 06ab12e64f674aedd402505e2ad89b1765cc86e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 May 2015 22:26:04 +0200 Subject: [PATCH 0351/2091] serial: samsung: only use earlycon for console commit 357d56151976a78d90dc3dfac01777de0ef05212 upstream. A configuration that enables earlycon but not the core console code causes a link error: drivers/built-in.o: In function `setup_earlycon': drivers/tty/serial/earlycon.c:70: undefined reference to `uart_parse_earlycon' That error can be triggered by the newly added samsung earlycon support, which is missing a 'select' statement. As suggested by Peter Hurley, solves the problem by moving the 'select SERIAL_EARLYCON' statement to the samsung console driver option, as it is done by all other console drivers. Signed-off-by: Arnd Bergmann Fixes: b94ba0328d3b3 ("serial: samsung: Add support for early console") Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index f8120c1bde147..8cd35348fc193 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -241,7 +241,6 @@ config SERIAL_SAMSUNG tristate "Samsung SoC serial support" depends on PLAT_SAMSUNG || ARCH_EXYNOS select SERIAL_CORE - select SERIAL_EARLYCON help Support for the on-chip UARTs on the Samsung S3C24XX series CPUs, providing /dev/ttySAC0, 1 and 2 (note, some machines may not @@ -277,6 +276,7 @@ config SERIAL_SAMSUNG_CONSOLE bool "Support for console on Samsung SoC serial port" depends on SERIAL_SAMSUNG=y select SERIAL_CORE_CONSOLE + select SERIAL_EARLYCON help Allow selection of the S3C24XX on-board serial ports for use as an virtual console. From f213f0f73da0b349931fa3efaf2af79b03efde99 Mon Sep 17 00:00:00 2001 From: Ding Wang Date: Mon, 18 May 2015 20:14:15 +0800 Subject: [PATCH 0352/2091] mmc: card: Fixup request missing in mmc_blk_issue_rw_rq commit 29535f7b797df35cc9b6b3bca635591cdd3dd2a8 upstream. The current handler of MMC_BLK_CMD_ERR in mmc_blk_issue_rw_rq function may cause new coming request permanent missing when the ongoing request (previoulsy started) complete end. The problem scenario is as follows: (1) Request A is ongoing; (2) Request B arrived, and finally mmc_blk_issue_rw_rq() is called; (3) Request A encounters the MMC_BLK_CMD_ERR error; (4) In the error handling of MMC_BLK_CMD_ERR, suppose mmc_blk_cmd_err() end request A completed and return zero. Continue the error handling, suppose mmc_blk_reset() reset device success; (5) Continue the execution, while loop completed because variable ret is zero now; (6) Finally, mmc_blk_issue_rw_rq() return without processing request B. The process related to the missing request may wait that IO request complete forever, possibly crashing the application or hanging the system. Fix this issue by starting new request when reset success. Signed-off-by: Ding Wang Fixes: 67716327eec7 ("mmc: block: add eMMC hardware reset support") Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/card/block.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index e4a52ce9242fe..31d2627d9d4d6 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1912,9 +1912,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) break; case MMC_BLK_CMD_ERR: ret = mmc_blk_cmd_err(md, card, brq, req, ret); - if (!mmc_blk_reset(md, card->host, type)) - break; - goto cmd_abort; + if (mmc_blk_reset(md, card->host, type)) + goto cmd_abort; + if (!ret) + goto start_new_req; + break; case MMC_BLK_RETRY: if (retry++ < 5) break; From 55df32920758f51d584f5a60a8b82919019c9c4d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 5 Jun 2015 11:40:08 +0200 Subject: [PATCH 0353/2091] mmc: sdhci: Restore behavior while creating OCR mask commit 5fd26c7ecb32082745b0bd33c8e35badd1cb5a91 upstream. Commit 3a48edc4bd68 ("mmc: sdhci: Use mmc core regulator infrastucture") changed the behavior for how to assign the ocr_avail mask for the mmc host. More precisely it started to mask the bits instead of assigning them. Restore the behavior, but also make it clear that an OCR mask created from an external regulator overrides the other ones. The OCR mask is determined by one of the following with this priority: 1. Supported ranges of external regulator if one supplies VDD 2. Host OCR mask if set by the driver (based on DT properties) 3. The capabilities reported by the controller itself Fixes: 3a48edc4bd68 ("mmc: sdhci: Use mmc core regulator infrastucture") Cc: Tim Kryger Reported-by: Yangbo Lu Signed-off-by: Ulf Hansson Reviewed-by: Tim Kryger Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 9231cdfe27575..d3dbb28057e9b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3315,13 +3315,14 @@ int sdhci_add_host(struct sdhci_host *host) SDHCI_MAX_CURRENT_MULTIPLIER; } - /* If OCR set by external regulators, use it instead */ + /* If OCR set by host, use it instead. */ + if (host->ocr_mask) + ocr_avail = host->ocr_mask; + + /* If OCR set by external regulators, give it highest prio. */ if (mmc->ocr_avail) ocr_avail = mmc->ocr_avail; - if (host->ocr_mask) - ocr_avail &= host->ocr_mask; - mmc->ocr_avail = ocr_avail; mmc->ocr_avail_sdio = ocr_avail; if (host->ocr_avail_sdio) From 32419b851e1fbad53a3908cc3bea923dfb6a1416 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 8 May 2015 10:47:43 +0200 Subject: [PATCH 0354/2091] PM / clk: Fix clock error check in __pm_clk_add() commit 3fc3a0be0dab352e065d1dad7d3f81953ed0d4bc upstream. In the final iteration of commit 245bd6f6af8a62a2 ("PM / clock_ops: Add pm_clk_add_clk()"), a refcount increment was added by Grygorii Strashko. However, the accompanying IS_ERR() check operates on the wrong clock pointer, which is always zero at this point, i.e. not an error. This may lead to a NULL pointer dereference later, when __clk_get() tries to dereference an error pointer. Check the passed clock pointer instead to fix this. Signed-off-by: Geert Uytterhoeven Fixes: 245bd6f6af8a62a2 ("PM / clock_ops: Add pm_clk_add_clk()") Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/clock_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 7fdd0172605af..c7b0fcebf168c 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -93,7 +93,7 @@ static int __pm_clk_add(struct device *dev, const char *con_id, return -ENOMEM; } } else { - if (IS_ERR(ce->clk) || !__clk_get(clk)) { + if (IS_ERR(clk) || !__clk_get(clk)) { kfree(ce); return -ENOENT; } From f354666d1be19af7ae671105c693bdb4633eef6b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 5 Jun 2015 15:47:27 +0100 Subject: [PATCH 0355/2091] RDMA/ocrdma: fix double free on pd commit 4dc544427991e3cef38ce3ae124b7e6557063bd3 upstream. A reorganisation of the PD allocation and deallocation in commit 9ba1377daa ("RDMA/ocrdma: Move PD resource management to driver.") introduced a double free on pd, as detected by static analysis by smatch: drivers/infiniband/hw/ocrdma/ocrdma_verbs.c:682 ocrdma_alloc_pd() error: double free of 'pd'^ The original call to ocrdma_mbx_dealloc_pd() (which does not kfree pd) was replaced with a call to _ocrdma_dealloc_pd() (which does kfree pd). The kfree following this call causes the double free, so just remove it to fix the problem. Fixes: 9ba1377daa ("RDMA/ocrdma: Move PD resource management to driver.") Signed-off-by: Colin Ian King Acked-By: Devesh Sharma Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 9dcb66077d6cb..219f2122f9b96 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -679,7 +679,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, ocrdma_release_ucontext_pd(uctx); } else { status = _ocrdma_dealloc_pd(dev, pd); - kfree(pd); } exit: return ERR_PTR(status); From 0bcd77743dd60a8a64a9a39e25007968f8cc1c37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 May 2015 14:45:29 -0700 Subject: [PATCH 0356/2091] tty: remove platform_sysrq_reset_seq commit ffb6e0c9a0572f8e5f8e9337a1b40ac2ec1493a1 upstream. The platform_sysrq_reset_seq code was intended as a way for an embedded platform to provide its own sysrq sequence at compile time. After over two years, nobody has started using it in an upstream kernel, and the platforms that were interested in it have moved on to devicetree, which can be used to configure the sequence without requiring kernel changes. The method is also incompatible with the way that most architectures build support for multiple platforms into a single kernel. Now the code is producing warnings when built with gcc-5.1: drivers/tty/sysrq.c: In function 'sysrq_init': drivers/tty/sysrq.c:959:33: warning: array subscript is above array bounds [-Warray-bounds] key = platform_sysrq_reset_seq[i]; We could fix this, but it seems unlikely that it will ever be used, so let's just remove the code instead. We still have the option to pass the sequence either in DT, using the kernel command line, or using the /sys/module/sysrq/parameters/reset_seq file. Fixes: 154b7a489a ("Input: sysrq - allow specifying alternate reset sequence") Signed-off-by: Arnd Bergmann Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 843f2cdc280b9..9ffdfcf2ec6ed 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -55,9 +55,6 @@ static int __read_mostly sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static bool __read_mostly sysrq_always_enabled; -unsigned short platform_sysrq_reset_seq[] __weak = { KEY_RESERVED }; -int sysrq_reset_downtime_ms __weak; - static bool sysrq_on(void) { return sysrq_enabled || sysrq_always_enabled; @@ -569,6 +566,7 @@ void handle_sysrq(int key) EXPORT_SYMBOL(handle_sysrq); #ifdef CONFIG_INPUT +static int sysrq_reset_downtime_ms; /* Simple translation table for the SysRq keys */ static const unsigned char sysrq_xlate[KEY_CNT] = @@ -949,23 +947,8 @@ static bool sysrq_handler_registered; static inline void sysrq_register_handler(void) { - unsigned short key; int error; - int i; - - /* First check if a __weak interface was instantiated. */ - for (i = 0; i < ARRAY_SIZE(sysrq_reset_seq); i++) { - key = platform_sysrq_reset_seq[i]; - if (key == KEY_RESERVED || key > KEY_MAX) - break; - - sysrq_reset_seq[sysrq_reset_seq_len++] = key; - } - /* - * DT configuration takes precedence over anything that would - * have been defined via the __weak interface. - */ sysrq_of_get_keyreset_config(); error = input_register_handler(&sysrq_handler); From c791ad1e41a2b1c8df71590d08dbe4b1a1c3b1d5 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 24 Jun 2015 16:56:59 -0700 Subject: [PATCH 0357/2091] mm/hugetlb: introduce minimum hugepage order commit 641844f5616d7c6597309f560838f996466d7aac upstream. Currently the initial value of order in dissolve_free_huge_page is 64 or 32, which leads to the following warning in static checker: mm/hugetlb.c:1203 dissolve_free_huge_pages() warn: potential right shift more than type allows '9,18,64' This is a potential risk of infinite loop, because 1 << order (== 0) is used in for-loop like this: for (pfn =3D start_pfn; pfn < end_pfn; pfn +=3D 1 << order) ... So this patch fixes it by using global minimum_order calculated at boot time. text data bss dec hex filename 28313 469 84236 113018 1b97a mm/hugetlb.o 28256 473 84236 112965 1b945 mm/hugetlb.o (patched) Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") Reported-by: Dan Carpenter Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 271e4432734c3..8c4c1f9f9a9a9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -40,6 +40,11 @@ int hugepages_treat_as_movable; int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; +/* + * Minimum page order among possible hugepage sizes, set to a proper value + * at boot time. + */ +static unsigned int minimum_order __read_mostly = UINT_MAX; __initdata LIST_HEAD(huge_boot_pages); @@ -1188,19 +1193,13 @@ static void dissolve_free_huge_page(struct page *page) */ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) { - unsigned int order = 8 * sizeof(void *); unsigned long pfn; - struct hstate *h; if (!hugepages_supported()) return; - /* Set scan step to minimum hugepage size */ - for_each_hstate(h) - if (order > huge_page_order(h)) - order = huge_page_order(h); - VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order)); - for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) + VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order)); + for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) dissolve_free_huge_page(pfn_to_page(pfn)); } @@ -1627,10 +1626,14 @@ static void __init hugetlb_init_hstates(void) struct hstate *h; for_each_hstate(h) { + if (minimum_order > huge_page_order(h)) + minimum_order = huge_page_order(h); + /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } + VM_BUG_ON(minimum_order == UINT_MAX); } static char * __init memfmt(char *buf, unsigned long n) From 01fed2338abfbfd9719bc9ae0f2673c145f24955 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Jun 2015 00:35:16 +0200 Subject: [PATCH 0358/2091] PM / sleep: Increase default DPM watchdog timeout to 60 commit fff3b16d2754a061a3549c4307a186423a0128fd upstream. Many harddisks (mostly WD ones) have firmware problems and take too long, more than 10 seconds, to resume from suspend. And this often exceeds the default DPM watchdog timeout (12 seconds), resulting in a kernel panic out of sudden. Since most distros just take the default as is, we should give a bit more safer value. This patch increases the default value from 12 seconds to one minute, which has been confirmed to be long enough for such problematic disks. Link: https://bugzilla.kernel.org/show_bug.cgi?id=91921 Fixes: 70fea60d888d (PM / Sleep: Detect device suspend/resume lockup and log event) Signed-off-by: Takashi Iwai Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 7e01f78f04177..9e302315e33db 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -187,7 +187,7 @@ config DPM_WATCHDOG config DPM_WATCHDOG_TIMEOUT int "Watchdog timeout in seconds" range 1 120 - default 12 + default 60 depends on DPM_WATCHDOG config PM_TRACE From 534cc62858a02a6fd4664b5f2802f75732c5ba16 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 25 Jun 2015 09:06:55 +0200 Subject: [PATCH 0359/2091] firmware: dmi_scan: Only honor end-of-table for 64-bit tables commit 17cd5bd5391e6e7b363d66335e1bc6760ae969b9 upstream. A 32-bit entry point to a DMI table says how many structures the table contains. The SMBIOS specification explicitly says that end-of-table markers should be ignored if they are not actually at the end of the DMI table. So only honor the end-of-table marker for tables accessed through 64-bit entry points, as they do not specify a structure count. Fixes: fc43026278 ("dmi: add support for SMBIOS 3.0 64-bit entry point") Signed-off-by: Jean Delvare Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi_scan.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 97b1616aa3918..bba843c2b0ace 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -89,9 +89,9 @@ static void dmi_table(u8 *buf, /* * Stop when we have seen all the items the table claimed to have - * (SMBIOS < 3.0 only) OR we reach an end-of-table marker OR we run - * off the end of the table (should never happen but sometimes does - * on bogus implementations.) + * (SMBIOS < 3.0 only) OR we reach an end-of-table marker (SMBIOS + * >= 3.0 only) OR we run off the end of the table (should never + * happen but sometimes does on bogus implementations.) */ while ((!dmi_num || i < dmi_num) && (data - buf + sizeof(struct dmi_header)) <= dmi_len) { @@ -110,8 +110,13 @@ static void dmi_table(u8 *buf, /* * 7.45 End-of-Table (Type 127) [SMBIOS reference spec v3.0.0] + * For tables behind a 64-bit entry point, we have no item + * count and no exact table length, so stop on end-of-table + * marker. For tables behind a 32-bit entry point, we have + * seen OEM structures behind the end-of-table marker on + * some systems, so don't trust it. */ - if (dm->type == DMI_ENTRY_END_OF_TABLE) + if (!dmi_num && dm->type == DMI_ENTRY_END_OF_TABLE) break; data += 2; From 9116f601d95504745db41d077ba7ee8606f73996 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 Jun 2015 15:01:05 -0700 Subject: [PATCH 0360/2091] compiler-intel: fix wrong compiler barrier() macro commit b86a50c3b5414eafdbee7f34af4a201a4a7817c2 upstream. Cleanup commit 73679e508201 ("compiler-intel.h: Remove duplicate definition") removed the double definition of __memory_barrier() intrinsics. However, in doing so, it also removed the preceding #undef barrier by accident, meaning, the actual barrier() macro from compiler-gcc.h with inline asm is still in place as __GNUC__ is provided. Subsequently, barrier() can never be defined as __memory_barrier() from compiler.h since it already has a definition in place and if we trust the comment in compiler-intel.h, ecc doesn't support gcc specific asm statements. I don't have an ecc at hand (unsure if that's still used in the field?) and only found this by accident during code review, a revert of that cleanup would be simplest option. Fixes: 73679e508201 ("compiler-intel.h: Remove duplicate definition") Signed-off-by: Daniel Borkmann Reviewed-by: Pranith Kumar Cc: Pranith Kumar Cc: H. Peter Anvin Cc: mancha security Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-intel.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 0c9a2f2c2802f..d4c71132d07f0 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,10 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier #undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier() __memory_barrier() #define barrier_data(ptr) barrier() #define RELOC_HIDE(ptr, off) \ From f843b096c026fba1c2c584a5b9644bb2974bbff7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 25 Jun 2015 15:02:08 -0700 Subject: [PATCH 0361/2091] __bitmap_parselist: fix bug in empty string handling commit 2528a8b8f457d7432552d0e2b6f0f4046bb702f4 upstream. bitmap_parselist("", &mask, nmaskbits) will erroneously set bit zero in the mask. The same bug is visible in cpumask_parselist() since it is layered on top of the bitmask code, e.g. if you boot with "isolcpus=", you will actually end up with cpu zero isolated. The bug was introduced in commit 4b060420a596 ("bitmap, irq: add smp_affinity_list interface to /proc/irq") when bitmap_parselist() was generalized to support userspace as well as kernelspace. Fixes: 4b060420a596 ("bitmap, irq: add smp_affinity_list interface to /proc/irq") Signed-off-by: Chris Metcalf Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/bitmap.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 64c0926f5dd8e..40162f87ea2d7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -506,12 +506,12 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, unsigned a, b; int c, old_c, totaldigits; const char __user __force *ubuf = (const char __user __force *)buf; - int exp_digit, in_range; + int at_start, in_range; totaldigits = c = 0; bitmap_zero(maskp, nmaskbits); do { - exp_digit = 1; + at_start = 1; in_range = 0; a = b = 0; @@ -540,11 +540,10 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, break; if (c == '-') { - if (exp_digit || in_range) + if (at_start || in_range) return -EINVAL; b = 0; in_range = 1; - exp_digit = 1; continue; } @@ -554,16 +553,18 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, b = b * 10 + (c - '0'); if (!in_range) a = b; - exp_digit = 0; + at_start = 0; totaldigits++; } if (!(a <= b)) return -EINVAL; if (b >= nmaskbits) return -ERANGE; - while (a <= b) { - set_bit(a, maskp); - a++; + if (!at_start) { + while (a <= b) { + set_bit(a, maskp); + a++; + } } } while (buflen && c == ','); return 0; From ae41bfc68161ea5d280091ccb9593e9f68d5db44 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 25 Jun 2015 15:01:44 -0700 Subject: [PATCH 0362/2091] security_syslog() should be called once only commit d194e5d666225b04c7754471df0948f645b6ab3a upstream. The final version of commit 637241a900cb ("kmsg: honor dmesg_restrict sysctl on /dev/kmsg") lost few hooks, as result security_syslog() are processed incorrectly: - open of /dev/kmsg checks syslog access permissions by using check_syslog_permissions() where security_syslog() is not called if dmesg_restrict is set. - syslog syscall and /proc/kmsg calls do_syslog() where security_syslog can be executed twice (inside check_syslog_permissions() and then directly in do_syslog()) With this patch security_syslog() is called once only in all syslog-related operations regardless of dmesg_restrict value. Fixes: 637241a900cb ("kmsg: honor dmesg_restrict sysctl on /dev/kmsg") Signed-off-by: Vasily Averin Cc: Kees Cook Cc: Josh Boyer Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/printk/printk.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c099b082cd027..bff0169e1ad88 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -484,11 +484,11 @@ int check_syslog_permissions(int type, bool from_file) * already done the capabilities checks at open time. */ if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; + goto ok; if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) - return 0; + goto ok; /* * For historical reasons, accept CAP_SYS_ADMIN too, with * a warning. @@ -498,10 +498,11 @@ int check_syslog_permissions(int type, bool from_file) "CAP_SYS_ADMIN but no CAP_SYSLOG " "(deprecated).\n", current->comm, task_pid_nr(current)); - return 0; + goto ok; } return -EPERM; } +ok: return security_syslog(type); } @@ -1263,10 +1264,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) if (error) goto out; - error = security_syslog(type); - if (error) - return error; - switch (type) { case SYSLOG_ACTION_CLOSE: /* Close log */ break; From 80879086fee9a4bb608689d3e0dc2ddb184aa48b Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 9 Jun 2015 13:35:33 +0800 Subject: [PATCH 0363/2091] mac80211: fix the beacon csa counter for mesh and ibss commit 8df734e865b74d9f273216482a45a38269dc767a upstream. The csa counter has moved from sdata to beacon/presp but it is not updated accordingly for mesh and ibss. Fix this. Fixes: af296bdb8da4 ("mac80211: move csa counters from sdata to beacon/presp") Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 1 + net/mac80211/ibss.c | 1 + net/mac80211/mesh.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ff347a0eebd4f..f06d42267306e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3356,6 +3356,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* Update CSA counters */ if (sdata->vif.csa_active && (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT || sdata->vif.type == NL80211_IFTYPE_ADHOC) && params->n_csa_offsets) { int i; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bfef1b2150504..a9c9d961f039c 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -146,6 +146,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, csa_settings->chandef.chan->center_freq); presp->csa_counter_offsets[0] = (pos - presp->head); *pos++ = csa_settings->count; + presp->csa_current_counter = csa_settings->count; } /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d4684242e78bf..817098add1d67 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -680,6 +680,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) *pos++ = 0x0; *pos++ = ieee80211_frequency_to_channel( csa->settings.chandef.chan->center_freq); + bcn->csa_current_counter = csa->settings.count; bcn->csa_counter_offsets[0] = hdr_len + 6; *pos++ = csa->settings.count; *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; From a35fe5b6b3657f510fb48016c6454603ffb93f7b Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 19 Apr 2015 11:41:04 +0300 Subject: [PATCH 0364/2091] iwlwifi: mvm: fix ROC reference accounting commit c779273b37bec14c33feeab11c4d457a24bc64e0 upstream. commit b112889c5af8124 ("iwlwifi: mvm: add Aux ROC request/response flow") added aux ROC flow in addition to the existing ROC flow. While doing it, it moved the ROC reference release to a common work item, which is being called for both the ROC and aux ROC flows. This resulted in invalid reference accounting, as no reference was taken in case of aux ROC, while a reference was released on completion. Fix it by adding a reference for the aux ROC as well, and release only the relevant references on completion (according to the set bits). While at it, convert cancel_work_sync() to flush_work(), in order to make sure the references are being cleaned properly. Fixes: b112889c5af8 ("iwlwifi: mvm: add Aux ROC request/response flow") Signed-off-by: Eliad Peller Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/debugfs.c | 5 +++-- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/iwlwifi/mvm/time-event.c | 15 +++++++++------ 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/iwlwifi/mvm/debugfs.c index 9ac04c1ea7063..8c17b943cc6fa 100644 --- a/drivers/net/wireless/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/iwlwifi/mvm/debugfs.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +32,7 @@ * BSD LICENSE * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1356,6 +1356,7 @@ static ssize_t iwl_dbgfs_d0i3_refs_read(struct file *file, PRINT_MVM_REF(IWL_MVM_REF_UCODE_DOWN); PRINT_MVM_REF(IWL_MVM_REF_SCAN); PRINT_MVM_REF(IWL_MVM_REF_ROC); + PRINT_MVM_REF(IWL_MVM_REF_ROC_AUX); PRINT_MVM_REF(IWL_MVM_REF_P2P_CLIENT); PRINT_MVM_REF(IWL_MVM_REF_AP_IBSS); PRINT_MVM_REF(IWL_MVM_REF_USER); diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index dda9f7b5f3423..60c138a9bf4ff 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1404,7 +1404,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) * The work item could be running or queued if the * ROC time event stops just as we get here. */ - cancel_work_sync(&mvm->roc_done_wk); + flush_work(&mvm->roc_done_wk); iwl_trans_stop_device(mvm->trans); diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index cf70f681d1acb..6af21daaaaef9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -275,6 +275,7 @@ enum iwl_mvm_ref_type { IWL_MVM_REF_UCODE_DOWN, IWL_MVM_REF_SCAN, IWL_MVM_REF_ROC, + IWL_MVM_REF_ROC_AUX, IWL_MVM_REF_P2P_CLIENT, IWL_MVM_REF_AP_IBSS, IWL_MVM_REF_USER, diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c index fd7b0d36f9a62..a7448cf016887 100644 --- a/drivers/net/wireless/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -32,7 +32,7 @@ * BSD LICENSE * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. - * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -108,12 +108,14 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk) * in the case that the time event actually completed in the firmware * (which is handled in iwl_mvm_te_handle_notif). */ - if (test_and_clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status)) + if (test_and_clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status)) { queues |= BIT(IWL_MVM_OFFCHANNEL_QUEUE); - if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) + iwl_mvm_unref(mvm, IWL_MVM_REF_ROC); + } + if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) { queues |= BIT(mvm->aux_queue); - - iwl_mvm_unref(mvm, IWL_MVM_REF_ROC); + iwl_mvm_unref(mvm, IWL_MVM_REF_ROC_AUX); + } synchronize_net(); @@ -393,6 +395,7 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, } else if (le32_to_cpu(notif->action) == TE_V2_NOTIF_HOST_EVENT_START) { set_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); te_data->running = true; + iwl_mvm_ref(mvm, IWL_MVM_REF_ROC_AUX); ieee80211_ready_on_channel(mvm->hw); /* Start TE */ } else { IWL_DEBUG_TE(mvm, From 7b53ca5c69fd9054c0fc3af97717176a8dacdb4f Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 22 May 2015 10:57:22 +0200 Subject: [PATCH 0365/2091] cfg80211: ignore netif running state when changing iftype commit 6cbfb1bb66e4e85da5db78e8ff429a85bd84ce64 upstream. It was possible for mac80211 to be coerced into an unexpected flow causing sdata union to become corrupted. Station pointer was put into sdata->u.vlan.sta memory location while it was really master AP's sdata->u.ap.next_beacon. This led to station entry being later freed as next_beacon before __sta_info_flush() in ieee80211_stop_ap() and a subsequent invalid pointer dereference crash. The problem was that ieee80211_ptr->use_4addr wasn't cleared on interface type changes. This could be reproduced with the following steps: # host A and host B have just booted; no # wpa_s/hostapd running; all vifs are down host A> iw wlan0 set type station host A> iw wlan0 set 4addr on host A> printf 'interface=wlan0\nssid=4addrcrash\nchannel=1\nwds_sta=1' > /tmp/hconf host A> hostapd -B /tmp/conf host B> iw wlan0 set 4addr on host B> ifconfig wlan0 up host B> iw wlan0 connect -w hostAssid host A> pkill hostapd # host A crashed: [ 127.928192] BUG: unable to handle kernel NULL pointer dereference at 00000000000006c8 [ 127.929014] IP: [] __sta_info_flush+0xac/0x158 ... [ 127.934578] [] ieee80211_stop_ap+0x139/0x26c [ 127.934578] [] ? dump_trace+0x279/0x28a [ 127.934578] [] __cfg80211_stop_ap+0x84/0x191 [ 127.934578] [] cfg80211_stop_ap+0x3f/0x58 [ 127.934578] [] nl80211_stop_ap+0x1b/0x1d [ 127.934578] [] genl_family_rcv_msg+0x259/0x2b5 Note: This isn't a revert of f8cdddb8d61d ("cfg80211: check iface combinations only when iface is running") as far as functionality is considered because b6a550156bc ("cfg80211/mac80211: move more combination checks to mac80211") moved the logic somewhere else already. Fixes: f8cdddb8d61d ("cfg80211: check iface combinations only when iface is running") Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 70051ab52f4f3..7e4e3fffe7ce4 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -944,7 +944,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, ntype == NL80211_IFTYPE_P2P_CLIENT)) return -EBUSY; - if (ntype != otype && netif_running(dev)) { + if (ntype != otype) { dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); From 063c47a030bf17eac2d9010f0a899be5f7f4db87 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 22 May 2015 10:22:40 +0200 Subject: [PATCH 0366/2091] mac80211: prevent possible crypto tx tailroom corruption commit ab499db80fcf07c18e4053f91a619500f663e90e upstream. There was a possible race between ieee80211_reconfig() and ieee80211_delayed_tailroom_dec(). This could result in inability to transmit data if driver crashed during roaming or rekeying and subsequent skbs with insufficient tailroom appeared. This race was probably never seen in the wild because a device driver would have to crash AND recover within 0.5s which is very unlikely. I was able to prove this race exists after changing the delay to 10s locally and crashing ath10k via debugfs immediately after GTK rekeying. In case of ath10k the counter went below 0. This was harmless but other drivers which actually require tailroom (e.g. for WEP ICV or MMIC) could end up with the counter at 0 instead of >0 and introduce insufficient skb tailroom failures because mac80211 would not resize skbs appropriately anymore. Fixes: 8d1f7ecd2af5 ("mac80211: defer tailroom counter manipulation when roaming") Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index df3051d96afff..e86daed83c6fb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -249,6 +249,7 @@ static void ieee80211_restart_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, restart_work); + struct ieee80211_sub_if_data *sdata; /* wait for scan work complete */ flush_workqueue(local->workqueue); @@ -257,6 +258,8 @@ static void ieee80211_restart_work(struct work_struct *work) "%s called with hardware scan in progress\n", __func__); rtnl_lock(); + list_for_each_entry(sdata, &local->interfaces, list) + flush_delayed_work(&sdata->dec_tailroom_needed_wk); ieee80211_scan_cancel(local); ieee80211_reconfig(local); rtnl_unlock(); From 8b7c99ee18a5a872406ffcffdc60fd4a7744770a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 2 May 2015 00:52:00 -0700 Subject: [PATCH 0367/2091] e1000e: Cleanup handling of VLAN_HLEN as a part of max frame size commit 8084b86dcfbc4b4822868c1dbdb429b5c08154e2 upstream. When the VLAN_HLEN was added to the calculation for the maximum frame size there seems to have been a number of issues added to the driver. The first issue is that in some cases the maximum frame size for a device never really reached the actual maximum frame size as the VLAN header length was not included the calculation for that value. As a result some parts only supported a maximum frame size of either 1496 in the case of parts that didn't support jumbo frames, and 8996 in the case of the parts that do. The second issue is the fact that there were several checks that weren't updated so as a result setting an MTU of 1500 was treated as enabling jumbo frames as the calculated value was 1522 instead of 1518. I have addressed those by replacing ETH_FRAME_LEN with VLAN_ETH_FRAME_LEN where appropriate. The final issue was the fact that lowering the MTU below 1500 would cause the driver to allocate 2K buffers for the rings. This is an old issue that was fixed several years ago in igb/ixgbe and I am addressing now by just replacing == with a <= so that we always just round up to 1522 for anything that isn't a jumbo frame. Fixes: c751a3d58cf2d ("e1000e: Correctly include VLAN_HLEN when changing interface MTU") Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/82571.c | 2 +- drivers/net/ethernet/intel/e1000e/ich8lan.c | 10 +++++----- drivers/net/ethernet/intel/e1000e/netdev.c | 18 ++++++++---------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index dc79ed85030b7..32e77755a9c61 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2010,7 +2010,7 @@ const struct e1000_info e1000_82573_info = { .flags2 = FLAG2_DISABLE_ASPM_L1 | FLAG2_DISABLE_ASPM_L0S, .pba = 20, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_82571, .mac_ops = &e82571_mac_ops, .phy_ops = &e82_phy_ops_m88, diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9d81c03174334..e2498dbf3c3b6 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1563,7 +1563,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) ((adapter->hw.mac.type >= e1000_pch2lan) && (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) { adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES; - adapter->max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN; + adapter->max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; hw->mac.ops.blink_led = NULL; } @@ -5681,7 +5681,7 @@ const struct e1000_info e1000_ich8_info = { | FLAG_HAS_FLASH | FLAG_APME_IN_WUC, .pba = 8, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5754,7 +5754,7 @@ const struct e1000_info e1000_pch2_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5774,7 +5774,7 @@ const struct e1000_info e1000_pch_lpt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5794,7 +5794,7 @@ const struct e1000_info e1000_pch_spt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c509a5c900f52..68913d1035421 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3807,7 +3807,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* reset Packet Buffer Allocation to default */ ew32(PBA, pba); - if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { + if (adapter->max_frame_size > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) { /* To maintain wire speed transmits, the Tx FIFO should be * large enough to accommodate two full transmit packets, * rounded up to the next 1KB and expressed in KB. Likewise, @@ -4196,9 +4196,9 @@ static int e1000_sw_init(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; adapter->rx_ps_bsize0 = 128; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + adapter->max_frame_size = netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->tx_ring_count = E1000_DEFAULT_TXD; adapter->rx_ring_count = E1000_DEFAULT_RXD; @@ -5781,17 +5781,17 @@ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, static int e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + VLAN_HLEN + ETH_HLEN + ETH_FCS_LEN; + int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; /* Jumbo frame support */ - if ((max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) && + if ((max_frame > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) && !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { e_err("Jumbo Frames not supported.\n"); return -EINVAL; } /* Supported frame sizes */ - if ((new_mtu < ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN) || + if ((new_mtu < (VLAN_ETH_ZLEN + ETH_FCS_LEN)) || (max_frame > adapter->max_hw_frame_size)) { e_err("Unsupported MTU setting\n"); return -EINVAL; @@ -5831,10 +5831,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) adapter->rx_buffer_len = 4096; /* adjust allocation if LPE protects us, and we aren't using SBP */ - if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || - (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN - + ETH_FCS_LEN; + if (max_frame <= (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; if (netif_running(netdev)) e1000e_up(adapter); From 4f0b316fff5b5b90ab8f5f9292b464f0c26d19e3 Mon Sep 17 00:00:00 2001 From: Damian Eppel Date: Fri, 26 Jun 2015 15:23:04 +0200 Subject: [PATCH 0368/2091] clocksource: exynos_mct: Avoid blocking calls in the cpu hotplug notifier commit 56a94f13919c0db5958611b388e1581b4852f3c9 upstream. Whilst testing cpu hotplug events on kernel configured with DEBUG_PREEMPT and DEBUG_ATOMIC_SLEEP we get following BUG message, caused by calling request_irq() and free_irq() in the context of hotplug notification (which is in this case atomic context). [ 40.785859] CPU1: Software reset [ 40.786660] BUG: sleeping function called from invalid context at mm/slub.c:1241 [ 40.786668] in_atomic(): 1, irqs_disabled(): 128, pid: 0, name: swapper/1 [ 40.786678] Preemption disabled at:[< (null)>] (null) [ 40.786681] [ 40.786692] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.19.0-rc4-00024-g7dca860 #36 [ 40.786698] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 40.786728] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 40.786747] [] (show_stack) from [] (dump_stack+0x70/0xbc) [ 40.786767] [] (dump_stack) from [] (kmem_cache_alloc+0xd8/0x170) [ 40.786785] [] (kmem_cache_alloc) from [] (request_threaded_irq+0x64/0x128) [ 40.786804] [] (request_threaded_irq) from [] (exynos4_local_timer_setup+0xc0/0x13c) [ 40.786820] [] (exynos4_local_timer_setup) from [] (exynos4_mct_cpu_notify+0x30/0xa8) [ 40.786838] [] (exynos4_mct_cpu_notify) from [] (notifier_call_chain+0x44/0x84) [ 40.786857] [] (notifier_call_chain) from [] (__cpu_notify+0x28/0x44) [ 40.786873] [] (__cpu_notify) from [] (secondary_start_kernel+0xec/0x150) [ 40.786886] [] (secondary_start_kernel) from [<40008764>] (0x40008764) Interrupts cannot be requested/freed in the CPU_STARTING/CPU_DYING notifications which run on the hotplugged cpu with interrupts and preemption disabled. To avoid the issue, request the interrupts for all possible cpus in the boot code. The interrupts are marked NO_AUTOENABLE to avoid a racy request_irq/disable_irq() sequence. The flag prevents the request_irq() code from enabling the interrupt immediately. The interrupt is then enabled in the CPU_STARTING notifier of the hotplugged cpu and again disabled with disable_irq_nosync() in the CPU_DYING notifier. [ tglx: Massaged changelog to match the patch ] Fixes: 7114cd749a12 ("clocksource: exynos_mct: use (request/free)_irq calls for local timer registration") Reported-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Tested-by: Marcin Jabrzyk Signed-off-by: Damian Eppel Cc: m.szyprowski@samsung.com Cc: kyungmin.park@samsung.com Cc: daniel.lezcano@linaro.org Cc: kgene@kernel.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1435324984-7328-1-git-send-email-d.eppel@samsung.com Signed-off-by: Thomas Gleixner Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/exynos_mct.c | 43 ++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 83564c9cfdbe3..c844616028d20 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -466,15 +466,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); if (mct_int_type == MCT_INT_SPI) { - evt->irq = mct_irqs[MCT_L0_IRQ + cpu]; - if (request_irq(evt->irq, exynos4_mct_tick_isr, - IRQF_TIMER | IRQF_NOBALANCING, - evt->name, mevt)) { - pr_err("exynos-mct: cannot register IRQ %d\n", - evt->irq); + + if (evt->irq == -1) return -EIO; - } - irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu)); + + irq_force_affinity(evt->irq, cpumask_of(cpu)); + enable_irq(evt->irq); } else { enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0); } @@ -487,10 +484,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) static void exynos4_local_timer_stop(struct clock_event_device *evt) { evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); - if (mct_int_type == MCT_INT_SPI) - free_irq(evt->irq, this_cpu_ptr(&percpu_mct_tick)); - else + if (mct_int_type == MCT_INT_SPI) { + if (evt->irq != -1) + disable_irq_nosync(evt->irq); + } else { disable_percpu_irq(mct_irqs[MCT_L0_IRQ]); + } } static int exynos4_mct_cpu_notify(struct notifier_block *self, @@ -522,7 +521,7 @@ static struct notifier_block exynos4_mct_cpu_nb = { static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base) { - int err; + int err, cpu; struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick); struct clk *mct_clk, *tick_clk; @@ -549,7 +548,25 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem WARN(err, "MCT: can't request IRQ %d (%d)\n", mct_irqs[MCT_L0_IRQ], err); } else { - irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0)); + for_each_possible_cpu(cpu) { + int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + struct mct_clock_event_device *pcpu_mevt = + per_cpu_ptr(&percpu_mct_tick, cpu); + + pcpu_mevt->evt.irq = -1; + + irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); + if (request_irq(mct_irq, + exynos4_mct_tick_isr, + IRQF_TIMER | IRQF_NOBALANCING, + pcpu_mevt->name, pcpu_mevt)) { + pr_err("exynos-mct: cannot register IRQ (cpu%d)\n", + cpu); + + continue; + } + pcpu_mevt->evt.irq = mct_irq; + } } err = register_cpu_notifier(&exynos4_mct_cpu_nb); From 65f5cac612390fdf05f21df8633f7ed11f778f22 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sun, 18 Jan 2015 15:44:40 +0300 Subject: [PATCH 0369/2091] ideapad_laptop: Lenovo G50-30 fix rfkill reports wireless blocked commit 4fa9dabcffc8e16601307d3d56b58c68d9716ba4 upstream. Lenovo G30-50 does not have a hardware wireless switch and wireless is always blocked. BugLink: https://bugs.launchpad.net/bugs/1397021 Signed-off-by: Dmitry Tunin Signed-off-by: Philippe Coval [dvhart@linux.intel.com: Reordered dmi id per Phillippe's later version] Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index b496db87bc050..6e825201f09a8 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -836,6 +836,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G40-30"), }, }, + { + .ident = "Lenovo G50-30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"), + }, + }, { .ident = "Lenovo Yoga 2 11 / 13 / Pro", .matches = { From 0b99ffc6ea42e16fa4c5b01a414ac0f26766a5f4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 13 Jun 2015 15:23:33 +0200 Subject: [PATCH 0370/2091] ideapad: fix software rfkill setting commit 4b200b4604bec3388426159f1656109d19fadf6e upstream. This fixes a several year old regression that I found while trying to get the Yoga 3 11 to work. The ideapad_rfk_set function is meant to send a command to the embedded controller through ACPI, but as of c1f73658ed, it sends the index of the rfkill device instead of the command, and ignores the opcode field. This changes it back to the original behavior, which indeed flips the rfkill state as seen in the debugfs interface. Signed-off-by: Arnd Bergmann Fixes: c1f73658ed ("ideapad: pass ideapad_priv as argument (part 2)") Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 6e825201f09a8..cb7cd8d79329a 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -464,8 +464,9 @@ static const struct ideapad_rfk_data ideapad_rfk_data[] = { static int ideapad_rfk_set(void *data, bool blocked) { struct ideapad_rfk_priv *priv = data; + int opcode = ideapad_rfk_data[priv->dev].opcode; - return write_ec_cmd(priv->priv->adev->handle, priv->dev, !blocked); + return write_ec_cmd(priv->priv->adev->handle, opcode, !blocked); } static struct rfkill_ops ideapad_rfk_ops = { From a35b0d6cc0ec26fb607f6cf7bc7be5756c5de63e Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 18 Jun 2015 00:12:27 +0900 Subject: [PATCH 0371/2091] of/address: use atomic allocation in pci_register_io_range() commit 294240ffe784e951dc2ef070da04fa31ef6db3a0 upstream. When kzalloc() is called under spin_lock(), GFP_ATOMIC should be used to avoid sleeping allocation. The call tree is: of_pci_range_to_resource() --> pci_register_io_range() <-- takes spin_lock(&io_range_lock); --> kzalloc() Signed-off-by: Jingoo Han Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 6906a3f61bd86..8bfda6ade2c02 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -712,7 +712,7 @@ int __weak pci_register_io_range(phys_addr_t addr, resource_size_t size) } /* add the range to the list */ - range = kzalloc(sizeof(*range), GFP_KERNEL); + range = kzalloc(sizeof(*range), GFP_ATOMIC); if (!range) { err = -ENOMEM; goto end_register; From 4e5c0806a19975c7d9ab9171af53d4e8125342aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 23 Jun 2015 10:11:19 +0200 Subject: [PATCH 0372/2091] dell-laptop: Fix allocating & freeing SMI buffer page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b8830a4e71b15d0364ac8e6c55301eea73f211da upstream. This commit fix kernel crash when probing for rfkill devices in dell-laptop driver failed. Function free_page() was incorrectly used on struct page * instead of virtual address of SMI buffer. This commit also simplify allocating page for SMI buffer by using __get_free_page() function instead of sequential call of functions alloc_page() and page_address(). Signed-off-by: Pali Rohár Acked-by: Michal Hocko Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell-laptop.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index d688d806a8a51..2c1d5f5432a9a 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -305,7 +305,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }; static struct calling_interface_buffer *buffer; -static struct page *bufferpage; static DEFINE_MUTEX(buffer_mutex); static int hwswitch_state; @@ -1896,12 +1895,11 @@ static int __init dell_init(void) * Allocate buffer below 4GB for SMI data--only 32-bit physical addr * is passed to SMI handler. */ - bufferpage = alloc_page(GFP_KERNEL | GFP_DMA32); - if (!bufferpage) { + buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32); + if (!buffer) { ret = -ENOMEM; goto fail_buffer; } - buffer = page_address(bufferpage); ret = dell_setup_rfkill(); @@ -1965,7 +1963,7 @@ static int __init dell_init(void) cancel_delayed_work_sync(&dell_rfkill_work); dell_cleanup_rfkill(); fail_rfkill: - free_page((unsigned long)bufferpage); + free_page((unsigned long)buffer); fail_buffer: platform_device_del(platform_device); fail_platform_device2: From 4473d8ccadce054ba37c02f1cf131ae007a5e32e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 22 Jun 2015 13:53:48 +0200 Subject: [PATCH 0373/2091] ovl: lookup whiteouts outside iterate_dir() commit cdb672795876d7bc1870aed9a2d7cb59f43d1d96 upstream. If jffs2 can deadlock on overlayfs readdir because it takes the same lock on ->iterate() as in ->lookup(). Fix by moving whiteout checking outside iterate_dir(). Optimized by collecting potential whiteouts (DT_CHR) in a temporary list and if non-empty iterating throug these and checking for a 0/0 chardev. Signed-off-by: Miklos Szeredi Fixes: 49c21e1cacd7 ("ovl: check whiteout while reading directory") Reported-by: Roman Yeryomin Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/readdir.c | 77 +++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 907870e81a72e..70e9af5516004 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -23,6 +23,7 @@ struct ovl_cache_entry { u64 ino; struct list_head l_node; struct rb_node node; + struct ovl_cache_entry *next_maybe_whiteout; bool is_whiteout; char name[]; }; @@ -39,7 +40,7 @@ struct ovl_readdir_data { struct rb_root root; struct list_head *list; struct list_head middle; - struct dentry *dir; + struct ovl_cache_entry *first_maybe_whiteout; int count; int err; }; @@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, return NULL; } -static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, +static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, p->is_whiteout = false; if (d_type == DT_CHR) { - struct dentry *dentry; - const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) { - kfree(p); - return NULL; - } - - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); - - dentry = lookup_one_len(name, dir, len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } - revert_creds(old_cred); - put_cred(override_cred); + p->next_maybe_whiteout = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p; } return p; } @@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } - p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) return -ENOMEM; @@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, if (p) { list_move_tail(&p->l_node, &rdd->middle); } else { - p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else @@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); } +static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +{ + int err; + struct ovl_cache_entry *p; + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + err = mutex_lock_killable(&dir->d_inode->i_mutex); + if (!err) { + while (rdd->first_maybe_whiteout) { + p = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_len(p->name, dir, p->len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + } + mutex_unlock(&dir->d_inode->i_mutex); + } + revert_creds(old_cred); + put_cred(override_cred); + + return err; +} + static inline int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd) { @@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->dir = realpath->dentry; + rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { rdd->count = 0; @@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath, if (err >= 0) err = rdd->err; } while (!err && rdd->count); + + if (!err && rdd->first_maybe_whiteout) + err = ovl_check_whiteouts(realpath->dentry, rdd); + fput(realfile); return err; From 34f94b18c76f5912815cb1612f40deb7fcb4d0f6 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 8 Apr 2015 19:59:20 +0300 Subject: [PATCH 0374/2091] of: return NUMA_NO_NODE from fallback of_node_to_nid() commit c8fff7bc5bba6bd59cad40441c189c4efe7190f6 upstream. Node 0 might be offline as well as any other numa node, in this case kernel cannot handle memory allocation and crashes. Signed-off-by: Konstantin Khlebnikov Fixes: 0c3f061c195c ("of: implement of_node_to_nid as a weak function") Signed-off-by: Grant Likely Signed-off-by: Greg Kroah-Hartman --- drivers/of/base.c | 2 +- include/linux/of.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index f0650265febf9..5ed97246c2e7f 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL(of_n_size_cells); #ifdef CONFIG_NUMA int __weak of_node_to_nid(struct device_node *np) { - return numa_node_id(); + return NUMA_NO_NODE; } #endif diff --git a/include/linux/of.h b/include/linux/of.h index b871ff9d81d72..8135d507d0895 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -673,7 +673,10 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else -static inline int of_node_to_nid(struct device_node *device) { return 0; } +static inline int of_node_to_nid(struct device_node *device) +{ + return NUMA_NO_NODE; +} #endif static inline struct device_node *of_find_matching_node( From 54472f76f659f91ddad03a79723a8a229bb12fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 29 Apr 2015 20:38:46 +0200 Subject: [PATCH 0375/2091] watchdog: omap: assert the counter being stopped before reprogramming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 530c11d432727c697629ad5f9d00ee8e2864d453 upstream. The omap watchdog has the annoying behaviour that writes to most registers don't have any effect when the watchdog is already running. Quoting the AM335x reference manual: To modify the timer counter value (the WDT_WCRR register), prescaler ratio (the WDT_WCLR[4:2] PTV bit field), delay configuration value (the WDT_WDLY[31:0] DLY_VALUE bit field), or the load value (the WDT_WLDR[31:0] TIMER_LOAD bit field), the watchdog timer must be disabled by using the start/stop sequence (the WDT_WSPR register). Currently the timer is stopped in the .probe callback but still there are possibilities that yield to a situation where omap_wdt_start is entered with the timer running (e.g. when /dev/watchdog is closed without stopping and then reopened). In such a case programming the timeout silently fails! To circumvent this stop the timer before reprogramming. Assuming one of the first things the watchdog user does is setting the timeout explicitly nothing too bad should happen because this explicit setting works fine. Fixes: 7768a13c252a ("[PATCH] OMAP: Add Watchdog driver support") Signed-off-by: Uwe Kleine-König Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/omap_wdt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 1e6be9e405779..c9c97dacf4526 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -132,6 +132,13 @@ static int omap_wdt_start(struct watchdog_device *wdog) pm_runtime_get_sync(wdev->dev); + /* + * Make sure the watchdog is disabled. This is unfortunately required + * because writing to various registers with the watchdog running has no + * effect. + */ + omap_wdt_disable(wdev); + /* initialize prescaler */ while (readl_relaxed(base + OMAP_WATCHDOG_WPS) & 0x01) cpu_relax(); From 8409afae50e61b3f8297483cf8b8abf52f951909 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 May 2015 01:08:08 -0700 Subject: [PATCH 0376/2091] gpiolib: Add missing dummies for the unified device properties interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 496e7ce2a46562938edcb74f65b26068ee8895f6 upstream. If GPIOLIB=n: drivers/leds/leds-gpio.c: In function ‘gpio_leds_create’: drivers/leds/leds-gpio.c:187: error: implicit declaration of function ‘devm_get_gpiod_from_child’ drivers/leds/leds-gpio.c:187: warning: assignment makes pointer from integer without a cast Add dummies for fwnode_get_named_gpiod() and devm_get_gpiod_from_child() for the !GPIOLIB case to fix this. Signed-off-by: Geert Uytterhoeven Fixes: 40b7318319281b1b ("gpio: Support for unified device properties interface") Acked-by: Alexandre Courbot Acked-by: Linus Walleij Signed-off-by: Bryan Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/gpio/consumer.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 3a7c9ffd5ab93..da042657dc31d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -406,6 +406,21 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) return -EINVAL; } +/* Child properties interface */ +struct fwnode_handle; + +static inline struct gpio_desc *fwnode_get_named_gpiod( + struct fwnode_handle *fwnode, const char *propname) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct gpio_desc *devm_get_gpiod_from_child( + struct device *dev, const char *con_id, struct fwnode_handle *child) +{ + return ERR_PTR(-ENOSYS); +} + #endif /* CONFIG_GPIOLIB */ /* From c4087d1c9897eade8ea4122956d4357f5d6a2043 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 29 Apr 2015 16:36:43 +0000 Subject: [PATCH 0377/2091] clk: Fix JSON output in debugfs commit 7cb81136d2efe0f5ed9d965857f4756a15e6c338 upstream. key/value pairs in a JSON object must be separated by a comma. After adding the properties "accuracy" and "phase" the JSON output of /sys/kernel/debug/clk/clk_dump is invalid. So add the missing commas to fix it. Fixes: 5279fc402ae5 ("clk: add clk accuracy retrieval support") Signed-off-by: Stefan Wahren [sboyd@codeaurora.org: Added comment in function] Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 5b0f41868b425..9f9cadd00bc83 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -230,11 +230,12 @@ static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level) if (!c) return; + /* This should be JSON format, i.e. elements separated with a comma */ seq_printf(s, "\"%s\": { ", c->name); seq_printf(s, "\"enable_count\": %d,", c->enable_count); seq_printf(s, "\"prepare_count\": %d,", c->prepare_count); - seq_printf(s, "\"rate\": %lu", clk_core_get_rate(c)); - seq_printf(s, "\"accuracy\": %lu", clk_core_get_accuracy(c)); + seq_printf(s, "\"rate\": %lu,", clk_core_get_rate(c)); + seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy(c)); seq_printf(s, "\"phase\": %d", clk_core_get_phase(c)); } From 9baf2fc882dfe349c227f4d82dd475973dfc8ccd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 13 May 2015 15:54:40 +0900 Subject: [PATCH 0378/2091] clk: ti: dra7-atl-clock: Fix possible ERR_PTR dereference commit e0cdcda508f110b7ec190dc7c5eb2869ba73a535 upstream. of_clk_get_from_provider() returns ERR_PTR on failure. The dra7-atl-clock driver was not checking its return value and immediately used it in __clk_get_hw(). __clk_get_hw() dereferences supplied clock, if it is not NULL, so in that case it would dereference an ERR_PTR. Fixes: 9ac33b0ce81f ("CLK: TI: Driver for DRA7 ATL (Audio Tracking Logic)") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ti/clk-dra7-atl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index d86bc46b93bdf..0a1df821860fd 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -252,6 +252,11 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) } clk = of_clk_get_from_provider(&clkspec); + if (IS_ERR(clk)) { + pr_err("%s: failed to get atl clock %d from provider\n", + __func__, i); + return PTR_ERR(clk); + } cdesc = to_atl_desc(__clk_get_hw(clk)); cdesc->cinfo = cinfo; From e420c99545b699d262cf507244eef0959380e46b Mon Sep 17 00:00:00 2001 From: Hai Li Date: Thu, 25 Jun 2015 18:35:33 -0400 Subject: [PATCH 0379/2091] clk: qcom: Use parent rate when set rate to pixel RCG clock commit 6d451367bfa16fc103604bacd258f534c65d1540 upstream. Since the parent rate has been recalculated, pixel RCG clock should rely on it to find the correct M/N values during set_rate, instead of calling __clk_round_rate() to its parent again. Signed-off-by: Hai Li Tested-by: Archit Taneja Fixes: 99cbd064b059 ("clk: qcom: Support display RCG clocks") [sboyd@codeaurora.org: Silenced unused parent variable warning] Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/clk-rcg2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7e..92936f0912d2f 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -530,19 +530,16 @@ static int clk_pixel_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct freq_tbl f = *rcg->freq_tbl; const struct frac_entry *frac = frac_table_pixel; - unsigned long request, src_rate; + unsigned long request; int delta = 100000; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; - int index = qcom_find_src_index(hw, rcg->parent_map, f.src); - struct clk *parent = clk_get_parent_by_index(hw->clk, index); for (; frac->num; frac++) { request = (rate * frac->den) / frac->num; - src_rate = __clk_round_rate(parent, request); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((parent_rate < (request - delta)) || + (parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, From 759367b8eaa40930166671679a00f5ec97e2c5fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2015 19:41:51 -0400 Subject: [PATCH 0380/2091] pNFS: Fix a memory leak when attempted pnfs fails commit 1ca018d28d96d07788474abf66a5f3e9594841f5 upstream. pnfs_do_write() expects the call to pnfs_write_through_mds() to free the pgio header and to release the layout segment before exiting. The problem is that nfs_pgio_data_destroy() doesn't actually do this; it only frees the memory allocated by nfs_generic_pgio(). Ditto for pnfs_do_read()... Fix in both cases is to add a call to hdr->release(hdr). Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6a..219ee6a3f1b3d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1865,6 +1865,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1980,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* From 48ac6c92fabea8eaf75d1f25255a502b1c354f48 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Jun 2015 15:37:58 -0400 Subject: [PATCH 0381/2091] pNFS/flexfiles: Fix the reset of struct pgio_header when resending commit d620876990f02788d5a663075df007ffb91bdfad upstream. hdr->good_bytes needs to be set to the length of the request, not zero. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6c..6f5f0f425e86b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -631,7 +631,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } From 4e4c360e0ec888eb2373185e0c204b1f4896046d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:52 -0400 Subject: [PATCH 0382/2091] NFS: Fix size of NFSACL SETACL operations commit d683cc49daf7c5afca8cd9654aaa1bf63cdf2ad9 upstream. When encoding the NFSACL SETACL operation, reserve just the estimated size of the ACL rather than a fixed maximum. This eliminates needless zero padding on the wire that the server ignores. Fixes: ee5dc7732bd5 ('NFS: Fix "kernel BUG at fs/nfs/nfs3xdr.c:1338!"') Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs3xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88be..9b04c2e6fffc3 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? From 9b0702a4f1925ad80e04b1f78012116446e53222 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 15 May 2015 11:45:31 -0400 Subject: [PATCH 0383/2091] fixing infinite OPEN loop in 4.0 stateid recovery commit e8d975e73e5fa05f983fbf2723120edcf68e0b38 upstream. Problem: When an operation like WRITE receives a BAD_STATEID, even though recovery code clears the RECLAIM_NOGRACE recovery flag before recovering the open state, because of clearing delegation state for the associated inode, nfs_inode_find_state_and_recover() gets called and it makes the same state with RECLAIM_NOGRACE flag again. As a results, when we restart looking over the open states, we end up in the infinite loop instead of breaking out in the next test of state flags. Solution: unset the RECLAIM_NOGRACE set because of calling of nfs_inode_find_state_and_recover() after returning from calling recover_open() function. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca22650..ddef1dc80cf7d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1482,6 +1482,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } From 380db129455056b9edb8b8643023d5edbcb91595 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:56 -0400 Subject: [PATCH 0384/2091] nfs: increase size of EXCHANGE_ID name string buffer commit 764ad8ba8cd4c6f836fca9378f8c5121aece0842 upstream. The current buffer is much too small if you have a relatively long hostname. Bring it up to the size of the one that SETCLIENTID has. Reported-by: Michael Skralivetsky Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe96..e9e9a8dcfb477 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1142,7 +1142,7 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) +#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; From a400ef50e78554c779742d45c6f7bfa4fa39d990 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2015 19:56:22 -0400 Subject: [PATCH 0385/2091] NFS: Ensure we set NFS_CONTEXT_RESEND_WRITES when requeuing writes commit c70701131f7a8edea91fc49d11796d342cff7c62 upstream. If a write attempt fails, and the write is queued up for resending to the server, as opposed to being dropped, then we need to set the appropriate flag so that nfs_file_fsync() does the right thing. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 1 + fs/nfs/write.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 219ee6a3f1b3d..d47c188682b16 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1821,6 +1821,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a19..daf3556428458 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1289,6 +1289,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); From 1488271989894c89ddac1fa6764edb89421c2160 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Jun 2015 12:10:23 -0400 Subject: [PATCH 0386/2091] nfs: fix potential credential leak in ff_layout_update_mirror_cred commit a24221dca1868101c9b4b5adde4a6a5b1a3a64a7 upstream. If we have two tasks racing to update a mirror's credentials, then they can end up leaking one (or more) sets of credentials. The first task will set mirror->cred and then the second task will just overwrite it. Use a cmpxchg to ensure that the creds are only set once. If we get to the point where we would set mirror->cred and find that they're already set, then we just release the creds that were just found. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12b..c19b9a88f7484 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; From 746af0098bf877cac19f821997cfb21a530e343d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Jun 2015 12:10:24 -0400 Subject: [PATCH 0387/2091] nfs: always update creds in mirror, even when we have an already connected ds commit 0c8315dd56577445dd1afe6b9cfa06b7efdf2f82 upstream. A ds can be associated with more than one mirror, but we currently skip setting a mirror's credentials if we find that it's already set up with a connected client. The upshot is that we can end up sending DS writes with MDS credentials instead of properly setting them up. Fix nfs4_ff_layout_prepare_ds to always verify that the mirror's credentials are set up, even when we have a DS that's already connected. Reported-by: Tom Haynes Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index c19b9a88f7484..f13e1969eedd9 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -387,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -431,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: From 213f7d2bbfe91396a5c18ccc4057bcee6674f455 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 15:10:25 -0400 Subject: [PATCH 0388/2091] SUNRPC: Fix a memory leak in the backchannel code commit 88de6af24f2b48b06c514d3c3d0a8f22fafe30bd upstream. req->rq_private_buf isn't initialised when xprt_setup_backchannel calls xprt_free_allocation. Fixes: fb7a0b9addbdb ("nfs41: New backchannel helper routines") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/backchannel_rqst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9dd0ea8db463a..28504dfd3dad5 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -60,7 +60,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) dprintk("RPC: free allocations for req= %p\n", req); WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); - xbufp = &req->rq_private_buf; + xbufp = &req->rq_rcv_buf; free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); From 9974db06b353ada77b1c11c13a34ffd00227235a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jul 2015 16:04:19 -0400 Subject: [PATCH 0389/2091] 9p: forgetting to cancel request on interrupted zero-copy RPC commit a84b69cb6e0a41e86bc593904faa6def3b957343 upstream. If we'd already sent a request and decide to abort it, we *must* issue TFLUSH properly and not just blindly reuse the tag, or we'll get seriously screwed when response eventually arrives and we confuse it for response to later request that had reused the same tag. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 6f4c4c88db84e..28f36e4556f96 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -843,7 +843,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (err < 0) { if (err == -EIO) c->status = Disconnected; - goto reterr; + if (err != -ERESTARTSYS) + goto reterr; } if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); From 4932ba1eb094b3e7ef56ca9d11751543dce6c1e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:34:29 -0400 Subject: [PATCH 0390/2091] 9p: don't leave a half-initialized inode sitting around commit 0a73d0a204a4a04a1e110539c5a524ae51f91d6d upstream. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_inode.c | 3 +-- fs/9p/vfs_inode_dotl.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 703342e309f57..53f1e8a217071 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 9861c7c951a6d..4d3ecfb55fcf8 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } From a6a8032fb8622cf5ba914fbb5e8555f411d327d7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 24 Jun 2015 17:24:33 +0300 Subject: [PATCH 0391/2091] rbd: use GFP_NOIO in rbd_obj_request_create() commit 5a60e87603c4c533492c515b7f62578189b03c9c upstream. rbd_obj_request_create() is called on the main I/O path, so we need to use GFP_NOIO to make sure allocation doesn't blow back on us. Not all callers need this, but I'm still hardcoding the flag inside rather than making it a parameter because a) this is going to stable, and b) those callers shouldn't really use rbd_obj_request_create() and will be fixed in the future. More memory allocation fixes will follow. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec6c5c6e1ac94..53f253574abe7 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2001,11 +2001,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, rbd_assert(obj_request_type_valid(type)); size = strlen(object_name) + 1; - name = kmalloc(size, GFP_KERNEL); + name = kmalloc(size, GFP_NOIO); if (!name) return NULL; - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); + obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); if (!obj_request) { kfree(name); return NULL; From fb74882f63287d2a19edbc3a7a425ff21de771b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 28 Jun 2015 14:18:16 +0100 Subject: [PATCH 0392/2091] agp/intel: Fix typo in needs_ilk_vtd_wa() commit 8b572a4200828b4e75cc22ed2f494b58d5372d65 upstream. In needs_ilk_vtd_wa(), we pass in the GPU device but compared it against the ids for the mobile GPU and the mobile host bridge. That latter is impossible and so likely was just a typo for the desktop GPU device id (which is also buggy). Fixes commit da88a5f7f7d434e2cde1b3e19d952e6d84533662 Author: Chris Wilson Date: Wed Feb 13 09:31:53 2013 +0000 drm/i915: Disable WC PTE updates to w/a buggy IOMMU on ILK Reported-by: Ting-Wei Lan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91127 References: https://bugzilla.freedesktop.org/show_bug.cgi?id=60391 Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/char/agp/intel-gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 0b4188b9af7cd..c6dea3f6917bd 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -581,7 +581,7 @@ static inline int needs_ilk_vtd_wa(void) /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB || + if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && intel_iommu_gfx_mapped) return 1; From 634c7dc4993bfa57e66d82ca1394c472cc16e57a Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 22 Jun 2015 18:39:43 +0100 Subject: [PATCH 0393/2091] ARM: dove: fix legacy dove IRQ numbers commit 5d6bed2a9c8bc161bff4cc7cede00f2e0e27a7e7 upstream. v3.18 changed handle_IRQ() to call __handle_domain_irq(), which now rejects attempts to deliver IRQ0. Since IRQ 0 is used as the timer interrupt (just like the PIT on x86), this causes boot to fail as the bogomips calibration never completes. Fix this by shuffling all interrupts up by one. Fixes: a71b092a9c68 ("ARM: Convert handle_IRQ to use __handle_domain_irq") Signed-off-by: Russell King Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-dove/include/mach/irqs.h | 118 ++++++++++++------------- arch/arm/mach-dove/irq.c | 8 +- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/arch/arm/mach-dove/include/mach/irqs.h b/arch/arm/mach-dove/include/mach/irqs.h index 03d401d20453e..3f29e6bca0586 100644 --- a/arch/arm/mach-dove/include/mach/irqs.h +++ b/arch/arm/mach-dove/include/mach/irqs.h @@ -14,73 +14,73 @@ /* * Dove Low Interrupt Controller */ -#define IRQ_DOVE_BRIDGE 0 -#define IRQ_DOVE_H2C 1 -#define IRQ_DOVE_C2H 2 -#define IRQ_DOVE_NAND 3 -#define IRQ_DOVE_PDMA 4 -#define IRQ_DOVE_SPI1 5 -#define IRQ_DOVE_SPI0 6 -#define IRQ_DOVE_UART_0 7 -#define IRQ_DOVE_UART_1 8 -#define IRQ_DOVE_UART_2 9 -#define IRQ_DOVE_UART_3 10 -#define IRQ_DOVE_I2C 11 -#define IRQ_DOVE_GPIO_0_7 12 -#define IRQ_DOVE_GPIO_8_15 13 -#define IRQ_DOVE_GPIO_16_23 14 -#define IRQ_DOVE_PCIE0_ERR 15 -#define IRQ_DOVE_PCIE0 16 -#define IRQ_DOVE_PCIE1_ERR 17 -#define IRQ_DOVE_PCIE1 18 -#define IRQ_DOVE_I2S0 19 -#define IRQ_DOVE_I2S0_ERR 20 -#define IRQ_DOVE_I2S1 21 -#define IRQ_DOVE_I2S1_ERR 22 -#define IRQ_DOVE_USB_ERR 23 -#define IRQ_DOVE_USB0 24 -#define IRQ_DOVE_USB1 25 -#define IRQ_DOVE_GE00_RX 26 -#define IRQ_DOVE_GE00_TX 27 -#define IRQ_DOVE_GE00_MISC 28 -#define IRQ_DOVE_GE00_SUM 29 -#define IRQ_DOVE_GE00_ERR 30 -#define IRQ_DOVE_CRYPTO 31 +#define IRQ_DOVE_BRIDGE (1 + 0) +#define IRQ_DOVE_H2C (1 + 1) +#define IRQ_DOVE_C2H (1 + 2) +#define IRQ_DOVE_NAND (1 + 3) +#define IRQ_DOVE_PDMA (1 + 4) +#define IRQ_DOVE_SPI1 (1 + 5) +#define IRQ_DOVE_SPI0 (1 + 6) +#define IRQ_DOVE_UART_0 (1 + 7) +#define IRQ_DOVE_UART_1 (1 + 8) +#define IRQ_DOVE_UART_2 (1 + 9) +#define IRQ_DOVE_UART_3 (1 + 10) +#define IRQ_DOVE_I2C (1 + 11) +#define IRQ_DOVE_GPIO_0_7 (1 + 12) +#define IRQ_DOVE_GPIO_8_15 (1 + 13) +#define IRQ_DOVE_GPIO_16_23 (1 + 14) +#define IRQ_DOVE_PCIE0_ERR (1 + 15) +#define IRQ_DOVE_PCIE0 (1 + 16) +#define IRQ_DOVE_PCIE1_ERR (1 + 17) +#define IRQ_DOVE_PCIE1 (1 + 18) +#define IRQ_DOVE_I2S0 (1 + 19) +#define IRQ_DOVE_I2S0_ERR (1 + 20) +#define IRQ_DOVE_I2S1 (1 + 21) +#define IRQ_DOVE_I2S1_ERR (1 + 22) +#define IRQ_DOVE_USB_ERR (1 + 23) +#define IRQ_DOVE_USB0 (1 + 24) +#define IRQ_DOVE_USB1 (1 + 25) +#define IRQ_DOVE_GE00_RX (1 + 26) +#define IRQ_DOVE_GE00_TX (1 + 27) +#define IRQ_DOVE_GE00_MISC (1 + 28) +#define IRQ_DOVE_GE00_SUM (1 + 29) +#define IRQ_DOVE_GE00_ERR (1 + 30) +#define IRQ_DOVE_CRYPTO (1 + 31) /* * Dove High Interrupt Controller */ -#define IRQ_DOVE_AC97 32 -#define IRQ_DOVE_PMU 33 -#define IRQ_DOVE_CAM 34 -#define IRQ_DOVE_SDIO0 35 -#define IRQ_DOVE_SDIO1 36 -#define IRQ_DOVE_SDIO0_WAKEUP 37 -#define IRQ_DOVE_SDIO1_WAKEUP 38 -#define IRQ_DOVE_XOR_00 39 -#define IRQ_DOVE_XOR_01 40 -#define IRQ_DOVE_XOR0_ERR 41 -#define IRQ_DOVE_XOR_10 42 -#define IRQ_DOVE_XOR_11 43 -#define IRQ_DOVE_XOR1_ERR 44 -#define IRQ_DOVE_LCD_DCON 45 -#define IRQ_DOVE_LCD1 46 -#define IRQ_DOVE_LCD0 47 -#define IRQ_DOVE_GPU 48 -#define IRQ_DOVE_PERFORM_MNTR 49 -#define IRQ_DOVE_VPRO_DMA1 51 -#define IRQ_DOVE_SSP_TIMER 54 -#define IRQ_DOVE_SSP 55 -#define IRQ_DOVE_MC_L2_ERR 56 -#define IRQ_DOVE_CRYPTO_ERR 59 -#define IRQ_DOVE_GPIO_24_31 60 -#define IRQ_DOVE_HIGH_GPIO 61 -#define IRQ_DOVE_SATA 62 +#define IRQ_DOVE_AC97 (1 + 32) +#define IRQ_DOVE_PMU (1 + 33) +#define IRQ_DOVE_CAM (1 + 34) +#define IRQ_DOVE_SDIO0 (1 + 35) +#define IRQ_DOVE_SDIO1 (1 + 36) +#define IRQ_DOVE_SDIO0_WAKEUP (1 + 37) +#define IRQ_DOVE_SDIO1_WAKEUP (1 + 38) +#define IRQ_DOVE_XOR_00 (1 + 39) +#define IRQ_DOVE_XOR_01 (1 + 40) +#define IRQ_DOVE_XOR0_ERR (1 + 41) +#define IRQ_DOVE_XOR_10 (1 + 42) +#define IRQ_DOVE_XOR_11 (1 + 43) +#define IRQ_DOVE_XOR1_ERR (1 + 44) +#define IRQ_DOVE_LCD_DCON (1 + 45) +#define IRQ_DOVE_LCD1 (1 + 46) +#define IRQ_DOVE_LCD0 (1 + 47) +#define IRQ_DOVE_GPU (1 + 48) +#define IRQ_DOVE_PERFORM_MNTR (1 + 49) +#define IRQ_DOVE_VPRO_DMA1 (1 + 51) +#define IRQ_DOVE_SSP_TIMER (1 + 54) +#define IRQ_DOVE_SSP (1 + 55) +#define IRQ_DOVE_MC_L2_ERR (1 + 56) +#define IRQ_DOVE_CRYPTO_ERR (1 + 59) +#define IRQ_DOVE_GPIO_24_31 (1 + 60) +#define IRQ_DOVE_HIGH_GPIO (1 + 61) +#define IRQ_DOVE_SATA (1 + 62) /* * DOVE General Purpose Pins */ -#define IRQ_DOVE_GPIO_START 64 +#define IRQ_DOVE_GPIO_START 65 #define NR_GPIO_IRQS 64 /* diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 4a5a7aedcb763..df0223f76fa92 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -126,14 +126,14 @@ __exception_irq_entry dove_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_LOW_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_LOW_OFF); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_HIGH_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_HIGH_OFF); if (stat) { - unsigned int hwirq = 32 + __fls(stat); + unsigned int hwirq = 33 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -144,8 +144,8 @@ void __init dove_init_irq(void) { int i; - orion_irq_init(0, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); - orion_irq_init(32, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); + orion_irq_init(1, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); + orion_irq_init(33, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(dove_legacy_handle_irq); From 7e7ef0282a07dc7fc267c6e753f1a2e0919f7e01 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 1 Jul 2015 13:38:52 +0300 Subject: [PATCH 0394/2091] EDAC, octeon: Fix broken build due to model helper renames commit 75a15a7864c9e281c74a1670b10b69d1d7ff1c82 upstream. Commit debe6a623d3c ("MIPS: OCTEON: Update octeon-model.h code for new SoCs.") renamed some SoC model helper functions, but forgot to update the EDAC drivers resulting in build failures. Fix that. Signed-off-by: Aaro Koskinen Acked-by: David Daney Cc: Mauro Carvalho Chehab Cc: Ralf Baechle Cc: linux-edac Cc: linux-mips@linux-mips.org Link: http://lkml.kernel.org/r/1435747132-10954-1-git-send-email-aaro.koskinen@nokia.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/octeon_edac-l2c.c | 2 +- drivers/edac/octeon_edac-lmc.c | 2 +- drivers/edac/octeon_edac-pc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 7e98084d36451..afea7fc625ccb 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -151,7 +151,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) l2c->ctl_name = "octeon_l2c_err"; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_l2t_err l2t_err; union cvmx_l2d_err l2d_err; diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index bb19e0732681c..cda6dab5067a5 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -234,7 +234,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) layers[0].size = 1; layers[0].is_virt_csrow = false; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_lmcx_mem_cfg0 cfg0; cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 0f83c33a7d1fc..2ab6cf24c9598 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -73,7 +73,7 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + if (OCTEON_IS_OCTEON2()) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); From 409e901e4d22f51cbe19a9ec1f943fbbdcc35853 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jul 2015 16:11:05 -0400 Subject: [PATCH 0395/2091] p9_client_write(): avoid double p9_free_req() commit 67e808fbb0404a12d9b9830a44bbb48d447d8bc9 upstream. Braino in "9p: switch p9_client_write() to passing it struct iov_iter *"; if response is impossible to parse and we discard the request, get the out of the loop right there. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/9p/client.c b/net/9p/client.c index 28f36e4556f96..81925b9233185 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1648,6 +1648,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (*err) { trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); + break; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); From 2dd7172e36d015013859e5afd3abaeffa92e9397 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 24 Jun 2015 13:14:18 -0700 Subject: [PATCH 0396/2091] ARM64: smp: Fix suspicious RCU usage with ipi tracepoints commit be081d9bf3e163a9ed1ca2f0f14f08424c7f9016 upstream. John Stultz reported an RCU splat on ARM with ipi trace events enabled. It looks like the same problem exists on ARM64. At this point in the IPI handling path we haven't called irq_enter() yet, so RCU doesn't know that we're about to exit idle and properly warns that we're using RCU from an idle CPU. Use trace_ipi_entry_rcuidle() instead of trace_ipi_entry() so that RCU is informed about our exit from idle. Cc: John Stultz Cc: Nicolas Pitre Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Fixes: 45ed695ac10a ("ARM64: add IPI tracepoints") Signed-off-by: Stephen Boyd Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2cb008177252f..d3a202b85ba64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -569,7 +569,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -612,7 +612,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } From 83893dd22e36d0da4b27ed7d8930669d84044d23 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 25 Jun 2015 05:47:39 -0700 Subject: [PATCH 0397/2091] arm64: bpf: fix out-of-bounds read in bpf2a64_offset() commit 8eee539ddea09bccae2426f09b0ba6a18b72b691 upstream. Problems occur when bpf_to or bpf_from has value prog->len - 1 (e.g., "Very long jump backwards" in test_bpf where the last instruction is a jump): since ctx->offset has length prog->len, ctx->offset[bpf_to + 1] or ctx->offset[bpf_from + 1] will cause an out-of-bounds read, leading to a bogus jump offset and kernel panic. This patch moves updating ctx->offset to after calling build_insn(), and changes indexing to use bpf_to and bpf_from without + 1. Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Cc: Zi Shen Lim Cc: Will Deacon Acked-by: Alexei Starovoitov Signed-off-by: Xi Wang Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit_comp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dc6a4842683aa..c81ddd4ff7f78 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -113,9 +113,9 @@ static inline void emit_a64_mov_i(const int is64, const int reg, static inline int bpf2a64_offset(int bpf_to, int bpf_from, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to + 1]; + int to = ctx->offset[bpf_to]; /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from + 1] - 1; + int from = ctx->offset[bpf_from] - 1; return to - from; } @@ -640,10 +640,11 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + ret = build_insn(insn, ctx); + if (ctx->image == NULL) ctx->offset[i] = ctx->idx; - ret = build_insn(insn, ctx); if (ret > 0) { i++; continue; From 1aca08fe1fba6dd5e895a379519c57a5918d5281 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 25 Jun 2015 18:39:15 -0700 Subject: [PATCH 0398/2091] arm64: bpf: fix endianness conversion bugs commit d63903bbc30c7ccad040851dfdb4da12d9a17bcf upstream. Upper bits should be zeroed in endianness conversion: - even when there's no need to change endianness (i.e., BPF_FROM_BE on big endian or BPF_FROM_LE on little endian); - after rev16. This patch fixes such bugs by emitting extra instructions to clear upper bits. Cc: Zi Shen Lim Acked-by: Alexei Starovoitov Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Signed-off-by: Xi Wang Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit.h | 4 ++++ arch/arm64/net/bpf_jit_comp.c | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index de0a81a539a01..98a26ce82d266 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -110,6 +110,10 @@ /* Rd = Rn >> shift; signed */ #define A64_ASR(sf, Rd, Rn, shift) A64_SBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31) +/* Zero extend */ +#define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) +#define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) + /* Move wide (immediate) */ #define A64_MOVEW(sf, Rd, imm16, shift, type) \ aarch64_insn_gen_movewide(Rd, imm16, shift, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c81ddd4ff7f78..c047598b09e05 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -289,23 +289,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_END | BPF_FROM_BE: #ifdef CONFIG_CPU_BIG_ENDIAN if (BPF_SRC(code) == BPF_FROM_BE) - break; + goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ if (BPF_SRC(code) == BPF_FROM_LE) - break; + goto emit_bswap_uxt; #endif switch (imm) { case 16: emit(A64_REV16(is64, dst, dst), ctx); + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: emit(A64_REV32(is64, dst, dst), ctx); + /* upper 32 bits already cleared */ break; case 64: emit(A64_REV64(dst, dst), ctx); break; } break; +emit_bswap_uxt: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit(A64_UXTW(is64, dst, dst), ctx); + break; + case 64: + /* nop */ + break; + } + break; /* dst = imm */ case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: From 6b344aadf96db338f644f5d205596b82e6248d07 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 1 Jul 2015 14:08:31 +0200 Subject: [PATCH 0399/2091] arm64: Don't report clear pmds and puds as huge commit fd28f5d439fca77348c129d5b73043a56f8a0296 upstream. The current pmd_huge() and pud_huge() functions simply check if the table bit is not set and reports the entries as huge in that case. This is counter-intuitive as a clear pmd/pud cannot also be a huge pmd/pud, and it is inconsistent with at least arm and x86. To prevent others from making the same mistake as me in looking at code that calls these functions and to fix an issue with KVM on arm64 that causes memory corruption due to incorrect page reference counting resulting from this mistake, let's change the behavior. Signed-off-by: Christoffer Dall Reviewed-by: Steve Capper Acked-by: Marc Zyngier Fixes: 084bd29810a5 ("ARM64: mm: HugeTLB support.") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/hugetlbpage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2de9d2e59d968..0eeb4f0930a08 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,13 +40,13 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) int pmd_huge(pmd_t pmd) { - return !(pmd_val(pmd) & PMD_TABLE_BIT); + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } int pud_huge(pud_t pud) { #ifndef __PAGETABLE_PMD_FOLDED - return !(pud_val(pud) & PUD_TABLE_BIT); + return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); #else return 0; #endif From a0238976f43770541b4f899415f0e75800df2d4a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 24 Jun 2015 16:40:04 +0530 Subject: [PATCH 0400/2091] perf bench numa: Fix to show proper convergence stats commit 2b42b09b88c831ba4da2d669581dde371c38c2af upstream. With commit: e1e455f4f4d3 (perf tools: Work around lack of sched_getcpu in glibc < 2.6), perf_bench numa mem with -c or -m option is not able to correctly calculate convergence. With the above commit, sched_getcpu always seems to return -1. The intention of commit e1e455f was to add a sched_getcpu in glibc < 2.6. Hence keep the sched_getcpu definition under an ifdef. This regression happened occurred between v4.0 and v4.1 Signed-off-by: Srikar Dronamraju Acked-by: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Vinson Lee Fixes: e1e455f4f4d3 ("perf tools: Work around lack of sched_getcpu in glibc < 2.6") Link: http://lkml.kernel.org/r/20150624111004.GA5220@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/cloexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 85b523885f9d7..2babddaa24813 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,11 +7,15 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) int __weak sched_getcpu(void) { errno = ENOSYS; return -1; } +#endif +#endif static int perf_flag_probe(void) { From ec5ea3004ddc46c09c6805119cedb7b8f516b997 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 19 Jun 2015 21:37:56 +0100 Subject: [PATCH 0401/2091] ARM: 8393/1: smp: Fix suspicious RCU usage with ipi tracepoints commit 398f74569cebbf06bc6b069442bcd0e9616ca465 upstream. John Stultz reports an RCU splat on boot with ARM ipi trace events enabled. =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-00033-gb5bed2f #153 Not tainted ------------------------------- include/trace/events/ipi.h:68 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.0-rc7-00033-gb5bed2f #153 Hardware name: Qualcomm (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (handle_IPI+0x428/0x604) [] (handle_IPI) from [] (gic_handle_irq+0x54/0x5c) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x7c) Exception stack(0xc09f3f48 to 0xc09f3f90) 3f40: 00000001 00000001 00000000 c09f73b8 c09f4528 c0a5de9c 3f60: c076b4f0 00000000 00000000 c09ef108 c0a5cec1 00000001 00000000 c09f3f90 3f80: c026bf60 c0210ab8 20000113 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0x20/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0x2c0/0x5dc) [] (cpu_startup_entry) from [] (start_kernel+0x358/0x3c4) [] (start_kernel) from [<8020807c>] (0x8020807c) At this point in the IPI handling path we haven't called irq_enter() yet, so RCU doesn't know that we're about to exit idle and properly warns that we're using RCU from an idle CPU. Use trace_ipi_entry_rcuidle() instead of trace_ipi_entry() so that RCU is informed about our exit from idle. Fixes: 365ec7b17327 ("ARM: add IPI tracepoints") Reported-by: John Stultz Tested-by: John Stultz Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Stephen Boyd Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b87581855..f11d825270761 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -576,7 +576,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -635,7 +635,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } From abf09a2bc0b62809acf20486190071696f5f1d7b Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 1 Jul 2015 23:08:10 +0100 Subject: [PATCH 0402/2091] ARM: 8397/1: fix vdsomunge not to depend on glibc specific error.h commit 13ee9fdba96577eb1583dcd7b15767ef623fae12 upstream. If the host toolchain is not glibc based then the arm kernel build fails with arch/arm/vdso/vdsomunge.c:53:19: fatal error: error.h: No such file or directory error.h is a glibc only header (ie not available in musl, newlib and bsd libcs). Changed the error reporting to standard conforming code to avoid depending on specific C implementations. Signed-off-by: Szabolcs Nagy Acked-by: Will Deacon Fixes: 8512287a8165 ("ARM: 8330/1: add VDSO user-space code") Signed-off-by: Nathan Lynch Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/vdsomunge.c | 56 +++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 9005b07296c8b..aedec81d11988 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,13 +45,11 @@ * it does. */ -#define _GNU_SOURCE - #include #include #include -#include #include +#include #include #include #include @@ -82,11 +80,25 @@ #define EF_ARM_ABI_FLOAT_HARD 0x400 #endif +static int failed; +static const char *argv0; static const char *outfile; +static void fail(const char *fmt, ...) +{ + va_list ap; + + failed = 1; + fprintf(stderr, "%s: ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + static void cleanup(void) { - if (error_message_count > 0 && outfile != NULL) + if (failed && outfile != NULL) unlink(outfile); } @@ -119,68 +131,66 @@ int main(int argc, char **argv) int infd; atexit(cleanup); + argv0 = argv[0]; if (argc != 3) - error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + fail("Usage: %s [infile] [outfile]\n", argv[0]); infile = argv[1]; outfile = argv[2]; infd = open(infile, O_RDONLY); if (infd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", infile); + fail("Cannot open %s: %s\n", infile, strerror(errno)); if (fstat(infd, &stat) != 0) - error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + fail("Failed stat for %s: %s\n", infile, strerror(errno)); inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); if (inbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", infile); + fail("Failed to map %s: %s\n", infile, strerror(errno)); close(infd); inhdr = inbuf; if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) - error(EXIT_FAILURE, 0, "Not an ELF file"); + fail("Not an ELF file\n"); if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) - error(EXIT_FAILURE, 0, "Unsupported ELF class"); + fail("Unsupported ELF class\n"); swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; if (read_elf_half(inhdr->e_type, swap) != ET_DYN) - error(EXIT_FAILURE, 0, "Not a shared object"); + fail("Not a shared object\n"); - if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { - error(EXIT_FAILURE, 0, "Unsupported architecture %#x", - inhdr->e_machine); - } + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) + fail("Unsupported architecture %#x\n", inhdr->e_machine); e_flags = read_elf_word(inhdr->e_flags, swap); if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { - error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", - EF_ARM_EABI_VERSION(e_flags)); + fail("Unsupported EABI version %#x\n", + EF_ARM_EABI_VERSION(e_flags)); } if (e_flags & EF_ARM_ABI_FLOAT_HARD) - error(EXIT_FAILURE, 0, - "Unexpected hard-float flag set in e_flags"); + fail("Unexpected hard-float flag set in e_flags\n"); clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (outfd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + fail("Cannot open %s: %s\n", outfile, strerror(errno)); if (ftruncate(outfd, stat.st_size) != 0) - error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + fail("Cannot truncate %s: %s\n", outfile, strerror(errno)); outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, outfd, 0); if (outbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + fail("Failed to map %s: %s\n", outfile, strerror(errno)); close(outfd); @@ -195,7 +205,7 @@ int main(int argc, char **argv) } if (msync(outbuf, stat.st_size, MS_SYNC) != 0) - error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + fail("Failed to sync %s: %s\n", outfile, strerror(errno)); return EXIT_SUCCESS; } From 53c34bd0491cadbe4cbc211327f608ebc8ba4eaa Mon Sep 17 00:00:00 2001 From: Sanidhya Kashyap Date: Sat, 21 Mar 2015 12:57:50 -0400 Subject: [PATCH 0403/2091] hpfs: kstrdup() out of memory handling commit ce657611baf902f14ae559ce4e0787ead6712067 upstream. There is a possibility of nothing being allocated to the new_opts in case of memory pressure, therefore return ENOMEM for such case. Signed-off-by: Sanidhya Kashyap Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hpfs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 7cd00d3a7c9b7..e12a06b300e0a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -424,11 +424,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; From 206f4fba0c512a45a7cc547ca7388a4e2d7c9c03 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Mar 2015 20:47:10 -0700 Subject: [PATCH 0404/2091] hpfs: hpfs_error: Remove static buffer, use vsprintf extension %pV instead commit a28e4b2b18ccb90df402da3f21e1a83c9d4f8ec1 upstream. Removing unnecessary static buffers is good. Use the vsprintf %pV extension instead. Signed-off-by: Joe Perches Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hpfs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index e12a06b300e0a..8685c655737f9 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); From bf94e2202c008de5ebb87e6f5887aba5b9aa8152 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2015 11:20:01 -0700 Subject: [PATCH 0405/2091] Fix firmware loader uevent buffer NULL pointer dereference commit 6f957724b94cb19f5c1c97efd01dd4df8ced323c upstream. The firmware class uevent function accessed the "fw_priv->buf" buffer without the proper locking and testing for NULL. This is an old bug (looks like it goes back to 2012 and commit 1244691c73b2: "firmware loader: introduce firmware_buf"), but for some reason it's triggering only now in 4.2-rc1. Shuah Khan is trying to bisect what it is that causes this to trigger more easily, but in the meantime let's just fix the bug since others are hitting it too (at least Ingo reports having seen it as well). Reported-and-tested-by: Shuah Khan Acked-by: Ming Lei Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 171841ad10089..4d1d9de4f9bfc 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -544,10 +544,8 @@ static void fw_dev_release(struct device *dev) kfree(fw_priv); } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env) { - struct firmware_priv *fw_priv = to_firmware_priv(dev); - if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout)) @@ -558,6 +556,18 @@ static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct firmware_priv *fw_priv = to_firmware_priv(dev); + int err = 0; + + mutex_lock(&fw_lock); + if (fw_priv->buf) + err = do_firmware_uevent(fw_priv, env); + mutex_unlock(&fw_lock); + return err; +} + static struct class firmware_class = { .name = "firmware", .class_attrs = firmware_class_attrs, From c86df9fa76f914d84e02caf2aaf11a22ca1820c2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 6 Jul 2015 23:18:37 +0300 Subject: [PATCH 0406/2091] mm: avoid setting up anonymous pages into file mapping commit 6b7339f4c31ad69c8e9c0b2859276e22cf72176d upstream. Reading page fault handler code I've noticed that under right circumstances kernel would map anonymous pages into file mappings: if the VMA doesn't have vm_ops->fault() and the VMA wasn't fully populated on ->mmap(), kernel would handle page fault to not populated pte with do_anonymous_page(). Let's change page fault handler to use do_anonymous_page() only on anonymous VMA (->vm_ops == NULL) and make sure that the VMA is not shared. For file mappings without vm_ops->fault() or shred VMA without vm_ops, page fault on pte_none() entry would lead to SIGBUS. Signed-off-by: Kirill A. Shutemov Acked-by: Oleg Nesterov Cc: Andrew Morton Cc: Willy Tarreau Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 22e037e3364e0..2a9e09870c201 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2669,6 +2669,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3097,6 +3101,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3242,13 +3249,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); From eee18465135ca687330666ce1886603c3c265ae5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 20 Jul 2015 14:29:58 -0700 Subject: [PATCH 0407/2091] x86/mpx: Do not set ->vm_ops on MPX VMAs commit a89652769470d12cd484ee3d3f7bde0742be8d96 upstream. MPX setups private anonymous mapping, but uses vma->vm_ops too. This can confuse core VM, as it relies on vm->vm_ops to distinguish file VMAs from anonymous. As result we will get SIGBUS, because handle_pte_fault() thinks it's file VMA without vm_ops->fault and it doesn't know how to handle the situation properly. Let's fix that by not setting ->vm_ops. We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because ->vm_flags won't match. The only thing left is name of VMA. I'm not sure if it's part of ABI, or we can just drop it. The patch keep it by providing arch_vma_name() on x86. Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@sr71.net Link: http://lkml.kernel.org/r/20150720212958.305CC3E9@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mmap.c | 7 +++++++ arch/x86/mm/mpx.c | 20 +------------------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 9d518d693b4b7..844b06d67df4d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_MPX) + return "[mpx]"; + return NULL; +} diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c439ec4782160..4d1c11c07fe1f 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -18,26 +18,9 @@ #include #include -static const char *mpx_mapping_name(struct vm_area_struct *vma) -{ - return "[mpx]"; -} - -static struct vm_operations_struct mpx_vma_ops = { - .name = mpx_mapping_name, -}; - -static int is_mpx_vma(struct vm_area_struct *vma) -{ - return (vma->vm_ops == &mpx_vma_ops); -} - /* * This is really a simplified "vm_mmap". it only handles MPX * bounds tables (the bounds directory is user-allocated). - * - * Later on, we use the vma->vm_ops to uniquely identify these - * VMAs. */ static unsigned long mpx_mmap(unsigned long len) { @@ -83,7 +66,6 @@ static unsigned long mpx_mmap(unsigned long len) ret = -ENOMEM; goto out; } - vma->vm_ops = &mpx_vma_ops; if (vm_flags & VM_LOCKED) { up_write(&mm->mmap_sem); @@ -661,7 +643,7 @@ static int zap_bt_entries(struct mm_struct *mm, * so stop immediately and return an error. This * probably results in a SIGSEGV. */ - if (!is_mpx_vma(vma)) + if (!(vma->vm_flags & VM_MPX)) return -EINVAL; len = min(vma->vm_end, end) - addr; From 89e419960fb6a260f6a112821507d516117d5aa1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 Aug 2015 09:30:08 -0700 Subject: [PATCH 0408/2091] Linux 4.1.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e3cdec4898be2..36f3225cdf1fb 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Series 4800 From bbf9f2c9cf06ccb1543c23501f77f6d34868d009 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:45 +1000 Subject: [PATCH 0409/2091] cxl: Fix off by one error allowing subsequent mmap page to be accessed commit 10a5894f2dedd8a26b3132497445b314c0d952c4 upstream. It was discovered that if a process mmaped their problem state area they were able to access one page more than expected, potentially allowing them to access the problem state area of an unrelated process. This was due to a simple off by one error in the mmap fault handler introduced in 0712dc7e73e59d79bcead5d5520acf4e9e917e87 ("cxl: Fix issues when unmapping contexts"), which is fixed in this patch. Fixes: 0712dc7e73e5 ("cxl: Fix issues when unmapping contexts") Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index d1b55fe62817d..e4dc8cdf67a34 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -113,11 +113,11 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { area = ctx->afu->psn_phys; - if (offset > ctx->afu->adapter->ps_size) + if (offset >= ctx->afu->adapter->ps_size) return VM_FAULT_SIGBUS; } else { area = ctx->psn_phys; - if (offset > ctx->psn_size) + if (offset >= ctx->psn_size) return VM_FAULT_SIGBUS; } From 4a0c377cda3533ff80c60c4fd46f787aff4715c3 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 10 Jul 2015 09:04:25 +1000 Subject: [PATCH 0410/2091] cxl: Check if afu is not null in cxl_slbia commit 2c069a118fe1d80c47dca84e1561045fc7f3cc9e upstream. The pointer to an AFU in the adapter's list of AFUs can be null if we're in the process of removing AFUs. The afu_list_lock doesn't guard against this. Say we have 2 slices, and we're in the process of removing cxl. - We remove the AFUs in order (see cxl_remove). In cxl_remove_afu for AFU 0, we take the lock, set adapter->afu[0] = NULL, and release the lock. - Then we get an slbia. In cxl_slbia we take the lock, and set afu = adapter->afu[0], which is NULL. - Therefore our attempt to check afu->enabled will blow up. Therefore, check if afu is a null pointer before dereferencing it. Signed-off-by: Daniel Axtens Acked-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 8ccddceead667..de350dd46218d 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -73,7 +73,7 @@ static inline void cxl_slbia_core(struct mm_struct *mm) spin_lock(&adapter->afu_list_lock); for (slice = 0; slice < adapter->slices; slice++) { afu = adapter->afu[slice]; - if (!afu->enabled) + if (!afu || !afu->enabled) continue; rcu_read_lock(); idr_for_each_entry(&afu->contexts_idr, ctx, id) From 5f0f854a0a8185240c934c77e8c9092313ba932e Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Tue, 7 Jul 2015 01:39:23 +0530 Subject: [PATCH 0411/2091] powerpc/powernv: Fix race in updating core_idle_state commit b32aadc1a8ed84afbe924cd2ced31cd6a2e67074 upstream. core_idle_state is maintained for each core. It uses 0-7 bits to track whether a thread in the core has entered fastsleep or winkle. 8th bit is used as a lock bit. The lock bit is set in these 2 scenarios- - The thread is first in subcore to wakeup from sleep/winkle. - If its the last thread in the core about to enter sleep/winkle While the lock bit is set, if any other thread in the core wakes up, it loops until the lock bit is cleared before proceeding in the wakeup path. This helps prevent race conditions w.r.t fastsleep workaround and prevents threads from switching to process context before core/subcore resources are restored. But, in the path to sleep/winkle entry, we currently don't check for lock-bit. This exposes us to following race when running with subcore on- First thread in the subcorea Another thread in the same waking up core entering sleep/winkle lwarx r15,0,r14 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 [Code to restore subcore state] lwarx r15,0,r14 [clear thread bit] stwcx. r15,0,r14 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS stw r15,0(r14) Here, after the thread entering sleep clears its thread bit in core_idle_state, the value is overwritten by the thread waking up. In such cases when the core enters fastsleep, code mistakes an idle thread as running. Because of this, the first thread waking up from fastsleep which is supposed to resync timebase skips it. So we can end up having a core with stale timebase value. This patch fixes the above race by looping on the lock bit even while entering the idle states. Signed-off-by: Shreyas B. Prabhu Fixes: 7b54e9f213f76 'powernv/powerpc: Add winkle support for offline cpus' Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/idle_power7.S | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce42..112ccf4975620 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -51,6 +51,22 @@ .text +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT From d474669e907ea28024def319bfcdef6a2c21d6ac Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 10 Jul 2015 10:11:07 -0700 Subject: [PATCH 0412/2091] Revert "Input: synaptics - allocate 3 slots to keep stability in image sensors" commit dbf3c370862d73fcd2c74ca55e254bb02143238d upstream. This reverts commit 63c4fda3c0bb841b1aad1298fc7fe94058fc79f8 as it causes issues with detecting 3-finger taps. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100481 Acked-by: Benjamin Tissoires --- drivers/input/mouse/synaptics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 35c8d0ceabeeb..3a32caf06bf1d 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1199,7 +1199,7 @@ static void set_input_params(struct psmouse *psmouse, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0); - input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK); + input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK); /* Image sensors can signal 4 and 5 finger clicks */ __set_bit(BTN_TOOL_QUADTAP, dev->keybit); From 78eb5efb6f79ca8fbd03cd0082e6bb22ca37fb48 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 1 Jul 2015 17:18:37 -0400 Subject: [PATCH 0413/2091] parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results commit 01ab60570427caa24b9debc369e452e86cd9beb4 upstream. The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/pgtable.h | 55 ++++++---- arch/parisc/include/asm/tlbflush.h | 53 +++++----- arch/parisc/kernel/cache.c | 105 ++++++++++++------- arch/parisc/kernel/entry.S | 163 ++++++++++++++--------------- arch/parisc/kernel/traps.c | 4 - 5 files changed, 212 insertions(+), 168 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 0a183756d6ec2..f93c4a4e65803 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,7 +16,7 @@ #include #include -extern spinlock_t pa_dbit_lock; +extern spinlock_t pa_tlb_lock; /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock; */ #define kern_addr_valid(addr) (1) +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ + +static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); +} + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock; *(pteptr) = (pteval); \ } while(0) -extern void purge_tlb_entries(struct mm_struct *, unsigned long); +#define pte_inserted(x) \ + ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ + == (_PAGE_PRESENT|_PAGE_ACCESSED)) -#define set_pte_at(mm, addr, ptep, pteval) \ - do { \ +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_dbit_lock, flags); \ - set_pte(ptep, pteval); \ - purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_dbit_lock, flags); \ + spin_lock_irqsave(&pa_tlb_lock, flags); \ + old_pte = *ptep; \ + set_pte(ptep, pteval); \ + if (pte_inserted(old_pte)) \ + purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - pte_clear(mm,addr,ptep); - purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + set_pte(ptep, __pte(0)); + if (pte_inserted(old_pte)) + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; } @@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 9d086a599fa05..e84b96478193c 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -13,6 +13,9 @@ * active at any one time on the Merced bus. This tlb purge * synchronisation is fairly lightweight and harmless so we activate * it on all systems not just the N class. + + * It is also used to ensure PTE updates are atomic and consistent + * with the TLB. */ extern spinlock_t pa_tlb_lock; @@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *); #define smp_flush_tlb_all() flush_tlb_all() +int __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma, start, end) \ + __flush_tlb_range((vma)->vm_mm->context, start, end) + +#define flush_tlb_kernel_range(start, end) \ + __flush_tlb_range(0, start, end) + /* * flush_tlb_mm() * - * XXX This code is NOT valid for HP-UX compatibility processes, - * (although it will probably work 99% of the time). HP-UX - * processes are free to play with the space id's and save them - * over long periods of time, etc. so we have to preserve the - * space and just flush the entire tlb. We need to check the - * personality in order to do that, but the personality is not - * currently being set correctly. - * - * Of course, Linux processes could do the same thing, but - * we don't support that (and the compilers, dynamic linker, - * etc. do not do that). + * The code to switch to a new context is NOT valid for processes + * which play with the space id's. Thus, we have to preserve the + * space and just flush the entire tlb. However, the compilers, + * dynamic linker, etc, do not manipulate space id's, so there + * could be a significant performance benefit in switching contexts + * and not flushing the whole tlb. */ static inline void flush_tlb_mm(struct mm_struct *mm) @@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm) BUG_ON(mm == &init_mm); /* Should never happen */ #if 1 || defined(CONFIG_SMP) + /* Except for very small threads, flushing the whole TLB is + * faster than using __flush_tlb_range. The pdtlb and pitlb + * instructions are very slow because of the TLB broadcast. + * It might be faster to do local range flushes on all CPUs + * on PA 2.0 systems. + */ flush_tlb_all(); #else /* FIXME: currently broken, causing space id and protection ids - * to go out of sync, resulting in faults on userspace accesses. + * to go out of sync, resulting in faults on userspace accesses. + * This approach needs further investigation since running many + * small applications (e.g., GCC testsuite) is faster on HP-UX. */ if (mm) { if (mm->context != 0) @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, { unsigned long flags, sid; - /* For one page, it's not worth testing the split_tlb variable */ - - mb(); sid = vma->vm_mm->context; purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - pitlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); purge_tlb_end(flags); } - -void __flush_tlb_range(unsigned long sid, - unsigned long start, unsigned long end); - -#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) - -#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) - #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index f6448c7c62b51..cda6dbbe98426 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; + +#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; - unsigned long size; + unsigned long size, start; alltime = mfctl(16); flush_data_cache(); @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void) /* Racy, but if we see an intermediate value, it's ok too... */ parisc_cache_flush_threshold = size * alltime / rangetime; - parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); + parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); if (!parisc_cache_flush_threshold) parisc_cache_flush_threshold = FLUSH_THRESHOLD; if (parisc_cache_flush_threshold > cache_info.dc_size) parisc_cache_flush_threshold = cache_info.dc_size; - printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus()); + printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + parisc_cache_flush_threshold/1024); + + /* calculate TLB flush threshold */ + + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + size = PAGE_SIZE; + start = (unsigned long) _text; + rangetime = mfctl(16); + while (start < (unsigned long) _end) { + flush_tlb_kernel_range(start, start + PAGE_SIZE); + start += PAGE_SIZE; + size += PAGE_SIZE; + } + rangetime = mfctl(16) - rangetime; + + printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime, size, rangetime); + + parisc_tlb_flush_threshold = size * alltime / rangetime; + parisc_tlb_flush_threshold *= num_online_cpus(); + parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); + if (!parisc_tlb_flush_threshold) + parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; + + printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + parisc_tlb_flush_threshold/1024); } extern void purge_kernel_dcache_page_asm(unsigned long); @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, } EXPORT_SYMBOL(copy_user_page); -void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) -{ - unsigned long flags; - - /* Note: purge_tlb_entries can be called at startup with - no context. */ - - purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(purge_tlb_entries); - -void __flush_tlb_range(unsigned long sid, unsigned long start, - unsigned long end) +/* __flush_tlb_range() + * + * returns 1 if all TLBs were flushed. + */ +int __flush_tlb_range(unsigned long sid, unsigned long start, + unsigned long end) { - unsigned long npages; + unsigned long flags, size; - npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ + size = (end - start); + if (size >= parisc_tlb_flush_threshold) { flush_tlb_all(); - else { - unsigned long flags; + return 1; + } + /* Purge TLB entries for small ranges using the pdtlb and + pitlb instructions. These instructions execute locally + but cause a purge request to be broadcast to other TLBs. */ + if (likely(!split_tlb)) { + while (start < end) { + purge_tlb_start(flags); + mtsp(sid, 1); + pdtlb(start); + purge_tlb_end(flags); + start += PAGE_SIZE; + } + return 0; + } + + /* split TLB case */ + while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); - if (split_tlb) { - while (npages--) { - pdtlb(start); - pitlb(start); - start += PAGE_SIZE; - } - } else { - while (npages--) { - pdtlb(start); - start += PAGE_SIZE; - } - } + pdtlb(start); + pitlb(start); purge_tlb_end(flags); + start += PAGE_SIZE; } + return 0; } static void cacheflush_h_tmp_function(void *dummy) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 75819617f93b9..c5ef4081b01d2 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -45,7 +45,7 @@ .level 2.0 #endif - .import pa_dbit_lock,data + .import pa_tlb_lock,data /* space_to_prot macro creates a prot id from a space id */ @@ -420,8 +420,8 @@ SHLREG %r9,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ - shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd - LDREG %r0(\pmd),\pte /* pmd is now pte */ + shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ + LDREG %r0(\pmd),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm @@ -453,57 +453,53 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_dbit_lock lock. */ - .macro dbit_lock spc,tmp,tmp1 + /* Acquire pa_tlb_lock lock and recheck page is still present. */ + .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_dbit_lock),\tmp + load32 PA(pa_tlb_lock),\tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop + LDREG 0(\ptp),\pte + bb,<,n \pte,_PAGE_PRESENT_BIT,2f + b \fault + stw \spc,0(\tmp) 2: #endif .endm - /* Release pa_dbit_lock lock without reloading lock address. */ - .macro dbit_unlock0 spc,tmp + /* Release pa_tlb_lock lock without reloading lock address. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif .endm - /* Release pa_dbit_lock lock. */ - .macro dbit_unlock1 spc,tmp + /* Release pa_tlb_lock lock. */ + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_dbit_lock),\tmp - dbit_unlock0 \spc,\tmp + load32 PA(pa_tlb_lock),\tmp + tlb_unlock0 \spc,\tmp #endif .endm /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep spc,ptep,pte,tmp,tmp1 -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_accessed ptp,pte,tmp,tmp1 ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 - STREG \tmp,0(\ptep) + STREG \tmp,0(\ptp) .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty spc,ptep,pte,tmp -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_dirty ptp,pte,tmp ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte - STREG \pte,0(\ptep) + STREG \pte,0(\ptp) .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1148,14 +1144,14 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1174,14 +1170,14 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1202,20 +1198,20 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1235,21 +1231,20 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1269,16 +1264,16 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1297,16 +1292,16 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 - idtlbt pte,prot - dbit_unlock1 spc,t0 + idtlbt pte,prot + tlb_unlock1 spc,t0 rfir nop @@ -1406,14 +1401,14 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1430,14 +1425,14 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1458,20 +1453,20 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1482,20 +1477,20 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1516,16 +1511,16 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1536,16 +1531,16 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1568,14 +1563,14 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop #else @@ -1588,8 +1583,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1600,8 +1595,8 @@ dbit_trap_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop @@ -1612,16 +1607,16 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 - idtlbt pte,prot - dbit_unlock0 spc,t0 + idtlbt pte,prot + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 47ee620d15d27..7f67c4c96a7a3 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -43,10 +43,6 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -DEFINE_SPINLOCK(pa_dbit_lock); -#endif - static void parisc_show_stack(struct task_struct *task, unsigned long *sp, struct pt_regs *regs); From 0ab58712e54f3a3258072feb15ab14bfbc42fa02 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 13 Jul 2015 11:32:43 +0200 Subject: [PATCH 0414/2091] parisc: mm: Fix a memory leak related to pmd not attached to the pgd commit 4c4ac9a48ac512c6b5a6cca06cfad2ad96e8caaa upstream. Commit 0e0da48dee8d ("parisc: mm: don't count preallocated pmds") introduced a memory leak. After this commit, the 'return' statement in pmd_free is executed in all cases. Even for pmd that are not attached to the pgd. So 'free_pages' can never be called anymore, leading to a memory leak. Signed-off-by: Christophe JAILLET Acked-by: Kirill A. Shutemov Acked-by: Mikulas Patocka Acked-by: Helge Deller Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/pgalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 3a08eae3318fe..3edbb9fc91b4e 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -72,7 +72,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) + if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) { /* * This is the permanent pmd attached to the pgd; * cannot free it. @@ -81,6 +81,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) */ mm_inc_nr_pmds(mm); return; + } free_pages((unsigned long)pmd, PMD_ORDER); } From 7185dab0dbe1f8e2cae26dd759475aa146e1f071 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sat, 11 Jul 2015 21:33:06 +0200 Subject: [PATCH 0415/2091] ARM: pxa: fix dm9000 platform data regression commit a927ef895e288e79f1bfed221f27d7bfa37e907f upstream. Since dm9000 driver added support for a vcc regulator, platform data based platforms have their ethernet broken, as the regulator claiming returns -EPROBE_DEFER and prevents dm9000 loading. This patch fixes this for all pxa boards using dm9000, by using the specific regulator_has_full_constraints() function. This was discovered and tested on the cm-x300 board. Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000") Signed-off-by: Robert Jarzmik Acked-by: Igor Grinberg Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/capc7117.c | 3 +++ arch/arm/mach-pxa/cm-x2xx.c | 3 +++ arch/arm/mach-pxa/cm-x300.c | 2 ++ arch/arm/mach-pxa/colibri-pxa270.c | 3 +++ arch/arm/mach-pxa/em-x270.c | 2 ++ arch/arm/mach-pxa/icontrol.c | 3 +++ arch/arm/mach-pxa/trizeps4.c | 3 +++ arch/arm/mach-pxa/vpac270.c | 3 +++ arch/arm/mach-pxa/zeus.c | 2 ++ 9 files changed, 24 insertions(+) diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c index c092730749b9d..bf366b39fa611 100644 --- a/arch/arm/mach-pxa/capc7117.c +++ b/arch/arm/mach-pxa/capc7117.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,8 @@ static void __init capc7117_init(void) capc7117_uarts_init(); capc7117_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(CAPC7117, diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c index bb99f59a36d88..a17a91eb8e9a3 100644 --- a/arch/arm/mach-pxa/cm-x2xx.c +++ b/arch/arm/mach-pxa/cm-x2xx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -466,6 +467,8 @@ static void __init cmx2xx_init(void) cmx2xx_init_ac97(); cmx2xx_init_touchscreen(); cmx2xx_init_leds(); + + regulator_has_full_constraints(); } static void __init cmx2xx_init_irq(void) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 4d3588d26c2a1..5851f4c254c16 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -835,6 +835,8 @@ static void __init cm_x300_init(void) cm_x300_init_ac97(); cm_x300_init_wi2wi(); cm_x300_init_bl(); + + regulator_has_full_constraints(); } static void __init cm_x300_fixup(struct tag *tags, char **cmdline) diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c index 5f9d9303b346d..3503826333c74 100644 --- a/arch/arm/mach-pxa/colibri-pxa270.c +++ b/arch/arm/mach-pxa/colibri-pxa270.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void) printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n", colibri_pxa270_baseboard); } + + regulator_has_full_constraints(); } /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 51531ecffca85..9d7072b040458 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -1306,6 +1306,8 @@ static void __init em_x270_init(void) em_x270_init_i2c(); em_x270_init_camera(); em_x270_userspace_consumers_init(); + + regulator_has_full_constraints(); } MACHINE_START(EM_X270, "Compulab EM-X270") diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index c98511c5abd10..9b0eb0252af6f 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "generic.h" @@ -185,6 +186,8 @@ static void __init icontrol_init(void) mxm_8x10_mmc_init(); icontrol_can_init(); + + regulator_has_full_constraints(); } MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM") diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 872dcb20e7578..066e3a250ee03 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -534,6 +535,8 @@ static void __init trizeps4_init(void) BCR_writew(trizeps_conxs_bcr); board_backlight_power(1); + + regulator_has_full_constraints(); } static void __init trizeps4_map_io(void) diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index aa89488f961ec..54122a983ae37 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -711,6 +712,8 @@ static void __init vpac270_init(void) vpac270_ts_init(); vpac270_rtc_init(); vpac270_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(VPAC270, "Voipac PXA270") diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index ac2ae5c71ab45..6158566fa0f74 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -868,6 +868,8 @@ static void __init zeus_init(void) i2c_register_board_info(0, ARRAY_AND_SIZE(zeus_i2c_devices)); pxa2xx_set_spi_info(3, &pxa2xx_spi_ssp3_master_info); spi_register_board_info(zeus_spi_board_info, ARRAY_SIZE(zeus_spi_board_info)); + + regulator_has_full_constraints(); } static struct map_desc zeus_io_desc[] __initdata = { From c0c4945b2c39ca6a543664cbccce26b400d66562 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 7 Jul 2015 17:27:57 +0300 Subject: [PATCH 0416/2091] ARM: dts: dra7x-evm: Prevent glitch on DCAN1 pinmux commit 2acb5c301edf39ab6d066687ce70da1166e4de9e upstream. Driver core sets "default" pinmux on on probe and CAN driver sets "sleep" pinmux during register. This causes a small window where the CAN pins are in "default" state with the DCAN module being disabled. Change the "default" state to be like sleep so this glitch is avoided. Add a new "active" state that is used by the driver when CAN is actually active. Signed-off-by: Roger Quadros Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7-evm.dts | 5 +++-- arch/arm/boot/dts/dra72-evm.dts | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index aa465904f6cc4..096f68be99e2b 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -686,7 +686,8 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index ce0390f081d92..6b05f6a0ba84a 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -497,9 +497,10 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; &qspi { From 0784a0b533fc6526b2f899ce9fe50d745f791fba Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 17 Jun 2015 17:52:43 +0300 Subject: [PATCH 0417/2091] ARM: dts: am57xx-beagle-x15: Provide supply for usb2_phy2 commit 9ab402aed38b95d9ce453108622be0fc6f167568 upstream. Without this USB2 breaks if USB1 is disabled or USB1 initializes after USB2 e.g. due to deferred probing. Fixes: 5a0f93c6576a ("ARM: dts: Add am57xx-beagle-x15") Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am57xx-beagle-x15.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 7128fad991ac3..c9df40e5cd3b2 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -544,6 +544,10 @@ phy-supply = <&ldousb_reg>; }; +&usb2_phy2 { + phy-supply = <&ldousb_reg>; +}; + &usb1 { dr_mode = "host"; pinctrl-names = "default"; From 50c301372e86ce52a897ead3f55044811a9ebb57 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 8 Jul 2015 13:21:55 +0100 Subject: [PATCH 0418/2091] ARM: 8404/1: dma-mapping: fix off-by-one error in bitmap size check commit 462859aa7bbe1ac83ec4377a0a06fe60778f3f27 upstream. nr_bitmaps member of mapping structure stores the number of already allocated bitmaps and it is interpreted as loop iterator (it starts from 0 not from 1), so a comparison against number of possible bitmap extensions should include this fact. This patch fixes this by changing the extension failure condition. This issue has been introduced by commit 4d852ef8c2544ce21ae41414099a7504c61164a0 ("arm: dma-mapping: Add support to extend DMA IOMMU mappings"). Reported-by: Hyungwon Hwang Signed-off-by: Marek Szyprowski Reviewed-by: Hyungwon Hwang Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7e7583ddd6076..6e4b9ff22ef3c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1953,7 +1953,7 @@ static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) { int next_bitmap; - if (mapping->nr_bitmaps > mapping->extensions) + if (mapping->nr_bitmaps >= mapping->extensions) return -EINVAL; next_bitmap = mapping->nr_bitmaps; From 9af14dc3d33cba85698bbfea62bcf8fee56a7c76 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 30 Jun 2015 14:48:24 +0200 Subject: [PATCH 0419/2091] ARM: imx6: gpc: always enable PU domain if CONFIG_PM is not set commit d438462c20a300139c2e5e65b96cadaa21b58d9a upstream. If CONFIG_PM is not set the PU power domain needs to be enabled always, otherwise there are two failure scenarios which will hang the system if one of the devices in the PU domain is accessed. 1. New DTs (4.1+) drop the "always-on" property from the PU regulator, so if it isn't properly enabled by the GPC code it will be disabled at the end of boot. 2. If the bootloader already disabled the PU domain the GPC explicitly needs to enable it again, even if the kernel doesn't do any power management. This is a bit hypothetical, as it requires to boot a mainline kernel on a downstream bootloader, as no mainline bootloader disables the PM domains. Signed-off-by: Lucas Stach Acked-by: Philipp Zabel Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/gpc.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 6d0893a3828eb..78b6fd0b86e69 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -291,8 +291,6 @@ void __init imx_gpc_check_dt(void) } } -#ifdef CONFIG_PM_GENERIC_DOMAINS - static void _imx6q_pm_pu_power_off(struct generic_pm_domain *genpd) { int iso, iso2sw; @@ -399,7 +397,6 @@ static struct genpd_onecell_data imx_gpc_onecell_data = { static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) { struct clk *clk; - bool is_off; int i; imx6q_pu_domain.reg = pu_reg; @@ -416,18 +413,13 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) } imx6q_pu_domain.num_clks = i; - is_off = IS_ENABLED(CONFIG_PM); - if (is_off) { - _imx6q_pm_pu_power_off(&imx6q_pu_domain.base); - } else { - /* - * Enable power if compiled without CONFIG_PM in case the - * bootloader disabled it. - */ - imx6q_pm_pu_power_on(&imx6q_pu_domain.base); - } + /* Enable power always in case bootloader disabled it. */ + imx6q_pm_pu_power_on(&imx6q_pu_domain.base); + + if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) + return 0; - pm_genpd_init(&imx6q_pu_domain.base, NULL, is_off); + pm_genpd_init(&imx6q_pu_domain.base, NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); @@ -437,13 +429,6 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) return -EINVAL; } -#else -static inline int imx_gpc_genpd_init(struct device *dev, struct regulator *reg) -{ - return 0; -} -#endif /* CONFIG_PM_GENERIC_DOMAINS */ - static int imx_gpc_probe(struct platform_device *pdev) { struct regulator *pu_reg; From a6130c5e14b2f824c0b7e5e9dddd46529616c6af Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:01 +0100 Subject: [PATCH 0420/2091] MIPS: Fix erroneous JR emulation for MIPS R6 commit 143fefc8f315cd10e046e6860913c421c3385cb1 upstream. Commit 5f9f41c474befb4ebbc40b27f65bb7d649241581 ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") added support for emulating the JR instruction on MIPS R6 cores but that introduced a bug which could be triggered when hitting a JALR opcode because the code used the wrong field in the 'r_format' struct to determine the instruction opcode. This lead to crashes because an emulated JALR instruction was treated as a JR one when the R6 emulator was turned off. Fixes: 5f9f41c474be ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10583/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/cp1emu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 22b9b2cb9219f..6983fcd481319 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ - if (NO_R6EMU && insn.r_format.opcode == jr_op) + if (NO_R6EMU && insn.r_format.func == jr_op) break; *contpc = regs->regs[insn.r_format.rs]; return 1; From 7ed06198d9b38b8a6c8e8a0944a4a6c3f03e5a35 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 10 Jul 2015 09:29:10 +0100 Subject: [PATCH 0421/2091] MIPS: c-r4k: Fix cache flushing for MT cores commit cccf34e9411c41b0cbfb41980fe55fc8e7c98fd2 upstream. MT_SMP is not the only SMP option for MT cores. The MT_SMP option allows more than one VPE per core to appear as a secondary CPU in the system. Because of how CM works, it propagates the address-based cache ops to the secondary cores but not the index-based ones. Because of that, the code does not use IPIs to flush the L1 caches on secondary cores because the CM would have done that already. However, the CM functionality is independent of the type of SMP kernel so even in non-MT kernels, IPIs are not necessary. As a result of which, we change the conditional to depend on the CM presence. Moreover, since VPEs on the same core share the same L1 caches, there is no need to send an IPI on all of them so we calculate a suitable cpumask with only one VPE per core. Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10654/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/smp.h | 1 + arch/mips/kernel/smp.c | 44 ++++++++++++++++++++++++++++++++++++- arch/mips/mm/c-r4k.c | 14 +++++++++--- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 2b25d1ba1ea03..16f1ea9ab1912 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index faa46ebd9ddae..d0744cc77ea7f 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu) } } +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +static inline void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpu_data[i].package == cpu_data[k].package && + cpu_data[i].core == cpu_data[k].core) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + cpumask_copy(&cpu_foreign_map, &temp_foreign_map); +} + struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); @@ -146,6 +176,8 @@ asmlinkage void start_secondary(void) set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); + calculate_cpu_foreign_map(); + cpumask_set_cpu(cpu, &cpu_callin_map); synchronise_count_slave(cpu); @@ -173,9 +205,18 @@ void __irq_entry smp_call_function_interrupt(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU: + * Remove this CPU. Be a bit slow here and + * set the bits for every online CPU so we don't miss + * any IPI whilst taking this VPE down. */ + + cpumask_copy(&cpu_foreign_map, cpu_online_mask); + + /* Make it visible to every other CPU */ + smp_mb(); + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); local_irq_disable(); while (1); } @@ -197,6 +238,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); + calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(cpu_possible_mask); #endif diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 2e03ab1735911..dca0efc078c15 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -37,6 +37,7 @@ #include /* for run_uncached() */ #include #include +#include /* * Special Variant of smp_call_function for use by cache functions: @@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) { preempt_disable(); -#ifndef CONFIG_MIPS_MT_SMP - smp_call_function(func, info, 1); -#endif + /* + * The Coherent Manager propagates address-based cache ops to other + * cores but not index-based ops. However, r4k_on_each_cpu is used + * in both cases so there is no easy way to tell what kind of op is + * executed to the other cores. The best we can probably do is + * to restrict that call when a CM is not present because both + * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. + */ + if (!mips_cm_present()) + smp_call_function_many(&cpu_foreign_map, func, info, 1); func(info); preempt_enable(); } From 219485e36468309cc71f7580d633a0b416d8e985 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 10 Jul 2015 16:00:24 +0100 Subject: [PATCH 0422/2091] MIPS: Require O32 FP64 support for MIPS64 with O32 compat commit 4e9d324d4288b082497c30bc55b8ad13acc7cf01 upstream. MIPS32r6 code requires FP64 (ie. FR=1) support. Building a kernel with support for MIPS32r6 binaries but without support for O32 with FP64 is therefore a problem which can lead to incorrectly executed userland. CONFIG_MIPS_O32_FP64_SUPPORT is already selected when the kernel is configured for MIPS32r6, but not when the kernel is configured for MIPS64r6 with O32 compat support. Select CONFIG_MIPS_O32_FP64_SUPPORT in such configurations to prevent building kernels which execute MIPS32r6 userland incorrectly. Signed-off-by: Paul Burton Cc: Markos Chandras Cc: linux-mips@linux-mips.org Cc: Matthew Fortune Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10674/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f5016656494f6..a3b1ffe50aa07 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1417,6 +1417,7 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA select GENERIC_CSUM + select MIPS_O32_FP64_SUPPORT if MIPS32_O32 help Choose this option to build a kernel for release 6 or later of the MIPS64 architecture. New MIPS processors, starting with the Warrior From cea0f568222b53b21242c50445776fccd4a1b8b8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Thu, 16 Jul 2015 15:30:04 +0100 Subject: [PATCH 0423/2091] MIPS: fpu.h: Allow 64-bit FPU on a 64-bit MIPS R6 CPU commit fcc53b5f6c38acbf5d311ffc3e0da517491c6f7b upstream. Commit 6134d94923d0 ("MIPS: asm: fpu: Allow 64-bit FPU on MIPS32 R6") added support for 64-bit FPU on a 32-bit MIPS R6 processor but it missed the 64-bit CPU case leading to FPU failures when requesting FR=1 mode (which is always the case for MIPS R6 userland) when running a 32-bit kernel on a 64-bit CPU. We also fix the MIPS R2 case. Signed-off-by: Markos Chandras Fixes: 6134d94923d0 ("MIPS: asm: fpu: Allow 64-bit FPU on MIPS32 R6") Reviewed-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10734/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/fpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 084780b355aa5..1b06251898352 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -74,7 +74,7 @@ static inline int __enable_fpu(enum fpu_mode mode) goto fr_common; case FPU_64BIT: -#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6) \ +#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \ || defined(CONFIG_64BIT)) /* we only have a 32-bit FPU */ return SIGFPE; From 153fa24b8f6763c51915c59feed10dad045bd880 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: [PATCH 0424/2091] can: replace timestamp as unique skb attribute commit d3b58c47d330de8c29898fe9746f7530408f8a59 upstream. Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 7 ++----- drivers/net/can/slcan.c | 2 +- drivers/net/can/vcan.c | 3 --- include/linux/can/skb.h | 2 ++ net/can/af_can.c | 12 +++++++----- net/can/bcm.c | 2 ++ net/can/raw.c | 7 ++++--- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e9b1810d319f0..aede704605c66 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,9 +440,6 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -578,7 +575,6 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -589,6 +585,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); @@ -607,7 +604,6 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -618,6 +614,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame)); memset(*cfd, 0, sizeof(struct canfd_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index f64f5290d6f8f..a23a7af8eb9a0 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,7 +207,6 @@ static void slc_bump(struct slcan *sl) if (!skb) return; - __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; @@ -215,6 +214,7 @@ static void slc_bump(struct slcan *sl) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 0ce868de855df..674f367087c54 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,9 +78,6 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx_ni(skb); } diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457aa..51bb6532785c3 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 689c818ed0077..62c635f2bcfc5 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453585be7..a1ba6875c2a20 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748cbb4ec..2e67b1423cd32 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) From 78574b4bdc49fb0d7d5d3218beb069d259b2b631 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:32:46 +0300 Subject: [PATCH 0425/2091] can: rcar_can: fix IRQ check commit 5e63e6baa159fa8c787cf783dbf3d77fbea97331 upstream. rcar_can_probe() regards 0 as a wrong IRQ #, despite platform_get_irq() that it calls returns negative error code in that case. This leads to the following being printed to the console when attempting to open the device: error requesting interrupt fffffffa because rcar_can_open() calls request_irq() with a negative IRQ #, and that function naturally fails with -EINVAL. Check for the negative error codes instead and propagate them upstream instead of just returning -ENODEV. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 7deb80dcbe8c0..93017c09cfc34 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -758,8 +758,9 @@ static int rcar_can_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); + err = irq; goto fail; } From 436eacc4992c0b51ebea61a8a7ac81807f44eb6e Mon Sep 17 00:00:00 2001 From: "J.D. Schroeder" Date: Wed, 8 Jul 2015 14:38:12 +0300 Subject: [PATCH 0426/2091] can: c_can: Fix default pinmux glitch at init commit 033365191136c97f88c81b7bd0011414db28bb4e upstream. The previous change 3973c526ae9c (net: can: c_can: Disable pins when CAN interface is down) causes a slight glitch on the pinctrl settings when used. Since commit ab78029 (drivers/pinctrl: grab default handles from device core), the device core will automatically set the default pins. This causes the pins to be momentarily set to the default and then to the sleep state in register_c_can_dev(). By adding an optional "enable" state, boards can set the default pin state to be disabled and avoid the glitch when the switch from default to sleep first occurs. If the "enable" state is not available c_can_pinctrl_select_state() falls back to using the "default" pinctrl state. [Roger Q] - Forward port to v4.2 and use pinctrl_get_select(). Signed-off-by: J.D. Schroeder Signed-off-by: Roger Quadros Reviewed-by: Grygorii Strashko Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/c_can/c_can.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 041525d2595ce..5d214d1353320 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -592,6 +592,7 @@ static int c_can_start(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); int err; + struct pinctrl *p; /* basic c_can configuration */ err = c_can_chip_config(dev); @@ -604,8 +605,13 @@ static int c_can_start(struct net_device *dev) priv->can.state = CAN_STATE_ERROR_ACTIVE; - /* activate pins */ - pinctrl_pm_select_default_state(dev->dev.parent); + /* Attempt to use "active" if available else use "default" */ + p = pinctrl_get_select(priv->device, "active"); + if (!IS_ERR(p)) + pinctrl_put(p); + else + pinctrl_pm_select_default_state(priv->device); + return 0; } From 23596d7deb0401e36aef49e96d252d8095cdff79 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:33:53 +0300 Subject: [PATCH 0427/2091] can: rcar_can: print signed IRQ # commit c1a4c87b06fa564d6e2760a12d4e5a09badc684b upstream. Printing IRQ # using "%x" and "%u" unsigned formats isn't quite correct as 'ndev->irq' is of type *int*, so the "%d" format needs to be used instead. While fixing this, beautify the dev_info() message in rcar_can_probe() a bit. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/rcar_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 93017c09cfc34..2f9ebad4ff567 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -526,7 +526,7 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); @@ -824,7 +824,7 @@ static int rcar_can_probe(struct platform_device *pdev) devm_can_led_init(ndev); - dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n", priv->regs, ndev->irq); return 0; From 8a5d1e374b59ffaf10bd64c319cdca7560d49c4c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 18 May 2015 18:33:27 +0200 Subject: [PATCH 0428/2091] can: mcp251x: fix resume when device is down commit 25b401c1816ae64bcc5dcb1d39ab41812522a0ce upstream. If a valid power regulator or a dummy regulator is used (which happens to be the case when no regulator is specified), restart_work is queued no matter whether the device was running or not at suspend time. Since work queues get initialized in the ndo_open callback, resuming leads to a NULL pointer exception. Reverse exactly the steps executed at suspend time: - Enable the power regulator in any case - Enable the transceiver regulator if the device was running, even in case we have a power regulator - Queue restart_work only in case the device was running Fixes: bf66f3736a94 ("can: mcp251x: Move to threaded interrupts instead of workqueues.") Signed-off-by: Stefan Agner Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251x.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bf63fee4e743a..34c625ea28017 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1221,17 +1221,16 @@ static int __maybe_unused mcp251x_can_resume(struct device *dev) struct spi_device *spi = to_spi_device(dev); struct mcp251x_priv *priv = spi_get_drvdata(spi); - if (priv->after_suspend & AFTER_SUSPEND_POWER) { + if (priv->after_suspend & AFTER_SUSPEND_POWER) mcp251x_power_enable(priv->power, 1); + + if (priv->after_suspend & AFTER_SUSPEND_UP) { + mcp251x_power_enable(priv->transceiver, 1); queue_work(priv->wq, &priv->restart_work); } else { - if (priv->after_suspend & AFTER_SUSPEND_UP) { - mcp251x_power_enable(priv->transceiver, 1); - queue_work(priv->wq, &priv->restart_work); - } else { - priv->after_suspend = 0; - } + priv->after_suspend = 0; } + priv->force_quit = 0; enable_irq(spi->irq); return 0; From 025a294ac3b4e1010f2bb25304aaa3d2d4770389 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jul 2015 02:42:38 +0100 Subject: [PATCH 0429/2091] freeing unlinked file indefinitely delayed commit 75a6f82a0d10ef8f13cd8fe7212911a0252ab99e upstream. Normally opening a file, unlinking it and then closing will have the inode freed upon close() (provided that it's not otherwise busy and has no remaining links, of course). However, there's one case where that does *not* happen. Namely, if you open it by fhandle with cold dcache, then unlink() and close(). In normal case you get d_delete() in unlink(2) notice that dentry is busy and unhash it; on the final dput() it will be forcibly evicted from dcache, triggering iput() and inode removal. In this case, though, we end up with *two* dentries - disconnected (created by open-by-fhandle) and regular one (used by unlink()). The latter will have its reference to inode dropped just fine, but the former will not - it's considered hashed (it is on the ->s_anon list), so it will stay around until the memory pressure will finally do it in. As the result, we have the final iput() delayed indefinitely. It's trivial to reproduce - void flush_dcache(void) { system("mount -o remount,rw /"); } static char buf[20 * 1024 * 1024]; main() { int fd; union { struct file_handle f; char buf[MAX_HANDLE_SZ]; } x; int m; x.f.handle_bytes = sizeof(x); chdir("/root"); mkdir("foo", 0700); fd = open("foo/bar", O_CREAT | O_RDWR, 0600); close(fd); name_to_handle_at(AT_FDCWD, "foo/bar", &x.f, &m, 0); flush_dcache(); fd = open_by_handle_at(AT_FDCWD, &x.f, O_RDWR); unlink("foo/bar"); write(fd, buf, sizeof(buf)); system("df ."); /* 20Mb eaten */ close(fd); system("df ."); /* should've freed those 20Mb */ flush_dcache(); system("df ."); /* should be the same as #2 */ } will spit out something like Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 283282 21692 93% / - inode gets freed only when dentry is finally evicted (here we trigger than by remount; normally it would've happened in response to memory pressure hell knows when). Acked-by: J. Bruce Fields Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 50bb3c207621c..5d03eb0ec0acc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -776,6 +776,9 @@ void dput(struct dentry *dentry) if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; From 16e28fc11cd6a2d04a488486cb7c945168dc2a8d Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:33 +0300 Subject: [PATCH 0430/2091] x86/init: Clear 'init_level4_pgt' earlier commit d0f77d4d04b222a817925d33ba3589b190bfa863 upstream. Currently x86_64_start_kernel() has two KASAN related function calls. The first call maps shadow to early_level4_pgt, the second maps shadow to init_level4_pgt. If we move clear_page(init_level4_pgt) earlier, we could hide KASAN low level detail from generic x86_64 initialization code. The next patch will do it. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-2-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5a4668136e989..65c8985e3eb28 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -166,6 +166,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); + clear_page(init_level4_pgt); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -177,7 +179,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; From 2fe36f4f2f2ae489cb7ab431b9a35947d426bbbb Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 2 Jul 2015 12:09:34 +0300 Subject: [PATCH 0431/2091] x86/kasan: Fix KASAN shadow region page tables commit 5d5aa3cfca5cf74cd928daf3674642e6004328d1 upstream. Currently KASAN shadow region page tables created without respect of physical offset (phys_base). This causes kernel halt when phys_base is not zero. So let's initialize KASAN shadow region page tables in kasan_early_init() using __pa_nodebug() which considers phys_base. This patch also separates x86_64_start_kernel() from KASAN low level details by moving kasan_map_early_shadow(init_level4_pgt) into kasan_early_init(). Remove the comment before clear_bss() which stopped bringing much profit to the code readability. Otherwise describing all the new order dependencies would be too verbose. Signed-off-by: Alexander Popov Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-3-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kasan.h | 8 ++------ arch/x86/kernel/head64.c | 7 ++----- arch/x86/kernel/head_64.S | 29 ----------------------------- arch/x86/mm/kasan_init_64.c | 36 ++++++++++++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422fbad8a..74a2a8dc99089 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -14,15 +14,11 @@ #ifndef __ASSEMBLY__ -extern pte_t kasan_zero_pte[]; -extern pte_t kasan_zero_pmd[]; -extern pte_t kasan_zero_pud[]; - #ifdef CONFIG_KASAN -void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_early_init(void); void __init kasan_init(void); #else -static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 65c8985e3eb28..f129a9af63574 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -161,13 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); - kasan_map_early_shadow(early_level4_pgt); - - /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); clear_page(init_level4_pgt); + kasan_early_init(); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -182,8 +181,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; - kasan_map_early_shadow(init_level4_pgt); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index df7e78057ae00..7e5da2cbe59ed 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -516,38 +516,9 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -#ifdef CONFIG_KASAN -#define FILL(VAL, COUNT) \ - .rept (COUNT) ; \ - .quad (VAL) ; \ - .endr - -NEXT_PAGE(kasan_zero_pte) - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pmd) - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pud) - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) - -#undef FILL -#endif - - #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_KASAN -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -NEXT_PAGE(kasan_zero_page) - .skip PAGE_SIZE -#endif diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906c6b9f8..0e4a05fa34d7a 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -11,7 +11,19 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -extern unsigned char kasan_zero_page[PAGE_SIZE]; +static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; static int __init map_range(struct range *range) { @@ -36,7 +48,7 @@ static void __init clear_pgds(unsigned long start, pgd_clear(pgd_offset_k(start)); } -void __init kasan_map_early_shadow(pgd_t *pgd) +static void __init kasan_map_early_shadow(pgd_t *pgd) { int i; unsigned long start = KASAN_SHADOW_START; @@ -166,6 +178,26 @@ static struct notifier_block kasan_die_notifier = { }; #endif +void __init kasan_early_init(void) +{ + int i; + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; + + for (i = 0; i < PTRS_PER_PTE; i++) + kasan_zero_pte[i] = __pte(pte_val); + + for (i = 0; i < PTRS_PER_PMD; i++) + kasan_zero_pmd[i] = __pmd(pmd_val); + + for (i = 0; i < PTRS_PER_PUD; i++) + kasan_zero_pud[i] = __pud(pud_val); + + kasan_map_early_shadow(early_level4_pgt); + kasan_map_early_shadow(init_level4_pgt); +} + void __init kasan_init(void) { int i; From bcb87bb5a5c1f990e584c6003e50869087bc2646 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:35 +0300 Subject: [PATCH 0432/2091] x86/kasan: Flush TLBs after switching CR3 commit 241d2c54c62fa0939fc9a9512b48ac3434e90a89 upstream. load_cr3() doesn't cause tlb_flush if PGE enabled. This may cause tons of false positive reports spamming the kernel to death. To fix this __flush_tlb_all() should be called explicitly after CR3 changed. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-4-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kasan_init_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0e4a05fa34d7a..5d26642e4e8a0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -208,6 +208,7 @@ void __init kasan_init(void) memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); load_cr3(early_level4_pgt); + __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -234,5 +235,6 @@ void __init kasan_init(void) memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + __flush_tlb_all(); init_task.kasan_depth = 0; } From 6b12a75d0777a320a691a5f7a38aab2f4e80a733 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:36 +0300 Subject: [PATCH 0433/2091] x86/kasan: Fix boot crash on AMD processors commit d4f86beacc21d538dc41e1fc75a22e084f547edf upstream. While populating zero shadow wrong bits in upper level page tables used. __PAGE_KERNEL_RO that was used for pgd/pud/pmd has _PAGE_BIT_GLOBAL set. Global bit is present only in the lowest level of the page translation hierarchy (ptes), and it should be zero in upper levels. This bug seems doesn't cause any troubles on Intel cpus, while on AMDs it cause kernel crash on boot. Use _KERNPG_TABLE bits for pgds/puds/pmds to fix this. Reported-by: Borislav Petkov Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-5-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/kasan_init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 5d26642e4e8a0..9a54dbe980643 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -85,7 +85,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { WARN_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PMD_SIZE; pmd = pmd_offset(pud, addr); } @@ -111,7 +111,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { WARN_ON(!pud_none(*pud)); set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PUD_SIZE; pud = pud_offset(pgd, addr); } @@ -136,7 +136,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end) while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { WARN_ON(!pgd_none(*pgd)); set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PGDIR_SIZE; pgd = pgd_offset_k(addr); } From f662ffe3a409bef287c5f0b235d9095b5aa72436 Mon Sep 17 00:00:00 2001 From: "Vutla, Lokesh" Date: Thu, 2 Jul 2015 18:33:28 +0530 Subject: [PATCH 0434/2091] crypto: omap-des - Fix unmapping of dma channels commit acb33cc541d7a5495b16a133702d4c401ea4e294 upstream. dma_unmap_sg() is being called twice after completing the task. Looks like this is a copy paste error when creating des driver. With this the following warn appears during boot: [ 4.210457] ------------[ cut here ]------------ [ 4.215114] WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1080 check_unmap+0x710/0x9a0() [ 4.222899] omap-des 480a5000.des: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000000ab2ce000] [size=8 bytes] [ 4.236785] Modules linked in: [ 4.239860] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.39-02999-g1bc045a-dirty #182 [ 4.247918] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 4.255710] [] (show_stack) from [] (dump_stack+0x84/0xb8) [ 4.262977] [] (dump_stack) from [] (warn_slowpath_common+0x68/0x8c) [ 4.271107] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 4.279854] [] (warn_slowpath_fmt) from [] (check_unmap+0x710/0x9a0) [ 4.287991] [] (check_unmap) from [] (debug_dma_unmap_sg+0x90/0x19c) [ 4.296128] [] (debug_dma_unmap_sg) from [] (omap_des_done_task+0x1cc/0x3e4) [ 4.304963] [] (omap_des_done_task) from [] (tasklet_action+0x84/0x124) [ 4.313370] [] (tasklet_action) from [] (__do_softirq+0xf0/0x20c) [ 4.321235] [] (__do_softirq) from [] (irq_exit+0x98/0xec) [ 4.328500] [] (irq_exit) from [] (handle_IRQ+0x50/0xb0) [ 4.335589] [] (handle_IRQ) from [] (gic_handle_irq+0x28/0x5c) Removing the duplicate call to dma_unmap_sg(). Reported-by: Tomi Valkeinen Signed-off-by: Lokesh Vutla Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/omap-des.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 46307098f8bab..0a70e46d54165 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -536,9 +536,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) dmaengine_terminate_all(dd->dma_lch_in); dmaengine_terminate_all(dd->dma_lch_out); - dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); - dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); - return err; } From 5e62f6843f4071e5cbb021c00737ebd0673ebf18 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Jul 2015 15:02:37 +0200 Subject: [PATCH 0435/2091] s390/process: fix sfpc inline assembly commit e47994dd44bcb4a77b4152bd0eada585934703c0 upstream. The sfpc inline assembly within execve_tail() may incorrectly set bits 28-31 of the sfpc instruction to a value which is not zero. These bits however are currently unused and therefore should be zero so we won't get surprised if these bits will be used in the future. Therefore remove the second operand from the inline assembly. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index dc5edc29b73aa..8f587d871b9f2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, asmlinkage void execve_tail(void) { current->thread.fp_regs.fpc = 0; - asm volatile("sfpc %0,%0" : : "d" (0)); + asm volatile("sfpc %0" : : "d" (0)); } /* From 88b7166f0d995edd9884a4641d2a0bca77b52090 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Jul 2015 17:58:19 +0200 Subject: [PATCH 0436/2091] s390/sclp: clear upper register halves in _sclp_print_early commit f9c87a6f46d508eae0d9ae640be98d50f237f827 upstream. If the kernel is compiled with gcc 5.1 and the XZ compression option the decompress_kernel function calls _sclp_print_early in 64-bit mode while the content of the upper register half of %r6 is non-zero. This causes a specification exception on the servc instruction in _sclp_servc. The _sclp_print_early function saves and restores the upper registers halves but it fails to clear them for the 31-bit code of the mini sclp driver. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/sclp.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index 43c3169ea49c7..ada0c07fe1a87 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -270,6 +270,8 @@ ENTRY(_sclp_print_early) jno .Lesa2 ahi %r15,-80 stmh %r6,%r15,96(%r15) # store upper register halves + basr %r13,0 + lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves .Lesa2: lr %r10,%r2 # save string pointer lhi %r2,0 @@ -291,6 +293,8 @@ ENTRY(_sclp_print_early) .Lesa3: lm %r6,%r15,120(%r15) # restore registers br %r14 +.Lzeroes: + .fill 64,4,0 .LwritedataS4: .long 0x00760005 # SCLP command for write data From 36566f6df428262b8ab122e062d72c1a81ad4576 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jul 2015 08:40:49 +0200 Subject: [PATCH 0437/2091] s390/nmi: fix vector register corruption commit cad49cfc44a5160e3fa09b18e4e7f7cacd13f27d upstream. If a machine check happens, the machine has the vector facility installed and the extended save area exists, the cpu will save vector register contents into the extended save area. This is regardless of control register 0 contents, which enables and disables the vector facility during runtime. On each machine check we should validate the vector registers. The current code however tries to validate the registers only if the running task is using vector registers in user space. However even the current code is broken and causes vector register corruption on machine checks, if user space uses them: the prefix area contains a pointer (absolute address) to the machine check extended save area. In order to save some space the save area was put into an unused area of the second prefix page. When validating vector register contents the code uses the absolute address of the extended save area, which is wrong. Due to prefixing the vector instructions will then access contents using absolute addresses instead of real addresses, where the machine stored the contents. If the above would work there is still the problem that register validition would only happen if user space uses vector registers. If kernel space uses them also, this may also lead to vector register content corruption: if the kernel makes use of vector instructions, but the current running user space context does not, the machine check handler will validate floating point registers instead of vector registers. Given the fact that writing to a floating point register may change the upper halve of the corresponding vector register, we also experience vector register corruption in this case. Fix all of these issues, and always validate vector registers on each machine check, if the machine has the vector facility installed and the extended save area is defined. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/ctl_reg.h | 5 +++- arch/s390/kernel/nmi.c | 51 +++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index cfad7fca01d61..d7697ab802f6c 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -57,7 +57,10 @@ union ctlreg0 { unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 23; + unsigned long : 4; + unsigned long afp : 1; /* AFP-register control */ + unsigned long vx : 1; /* Vector enablement control */ + unsigned long : 17; }; }; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 505c17c0ae1a6..56b550893593a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,6 +21,7 @@ #include #include #include +#include struct mcck_struct { int kill_task; @@ -129,26 +130,30 @@ static int notrace s390_revalidate_registers(struct mci *mci) } else asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area)); - /* Revalidate vector registers */ - if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!MACHINE_HAS_VX) { + /* Revalidate floating point registers */ + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } else { + /* Revalidate vector registers */ + union ctlreg0 cr0; + if (!mci->vr) { /* * Vector registers can't be restored and therefore @@ -156,8 +161,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) */ kill_task = 1; } + cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.afp = cr0.vx = 1; + __ctl_load(cr0.val, 0, 0); restore_vx_regs((__vector128 *) - S390_lowcore.vector_save_area_addr); + &S390_lowcore.vector_save_area); + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Revalidate access registers */ asm volatile( From 19d959286d62e07ce0e46c6c65ace822a6dc52e0 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 23 Jul 2015 19:26:15 +0200 Subject: [PATCH 0438/2091] s390/bpf: clear correct BPF accumulator register commit 30342fe65e511007672437741158d493472f427f upstream. Currently we assumed the following BPF to eBPF register mapping: - BPF_REG_A -> BPF_REG_7 - BPF_REG_X -> BPF_REG_8 Unfortunately this mapping is wrong. The correct mapping is: - BPF_REG_A -> BPF_REG_0 - BPF_REG_X -> BPF_REG_7 So clear the correct registers and use the BPF_REG_A and BPF_REG_X macros instead of BPF_REG_0/7. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/net/bpf_jit_comp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9afb9d602f841..dc2d7aa564401 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -415,13 +415,13 @@ static void bpf_jit_prologue(struct bpf_jit *jit) EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, BPF_REG_1, offsetof(struct sk_buff, data)); } - /* BPF compatibility: clear A (%b7) and X (%b8) registers */ - if (REG_SEEN(BPF_REG_7)) - /* lghi %b7,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_7, 0); - if (REG_SEEN(BPF_REG_8)) - /* lghi %b8,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_8, 0); + /* BPF compatibility: clear A (%b0) and X (%b7) registers */ + if (REG_SEEN(BPF_REG_A)) + /* lghi %ba,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_A, 0); + if (REG_SEEN(BPF_REG_X)) + /* lghi %bx,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_X, 0); } /* From a2bfecc423b5f96bbf9d5acb22fbceea084cc2d5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Jul 2015 09:53:49 +0200 Subject: [PATCH 0439/2091] s390/cachinfo: add missing facility check to init_cache_level() commit 0b991f5cdcd6201e5401f83ca3a672343c3bfc49 upstream. Stephen Powell reported the following crash on a z890 machine: Kernel BUG at 00000000001219d0 [verbose debug info unavailable] illegal operation: 0001 ilc:3 [#1] SMP Krnl PSW : 0704e00180000000 00000000001219d0 (init_cache_level+0x38/0xe0) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 EA:3 Krnl Code: 00000000001219c2: a7840056 brc 8,121a6e 00000000001219c6: a7190000 lghi %r1,0 #00000000001219ca: eb101000004c ecag %r1,%r0,0(%r1) >00000000001219d0: a7390000 lghi %r3,0 00000000001219d4: e310f0a00024 stg %r1,160(%r15) 00000000001219da: a7080000 lhi %r0,0 00000000001219de: a7b9f000 lghi %r11,-4096 00000000001219e2: c0a0002899d9 larl %r10,634d94 Call Trace: [<0000000000478ee2>] detect_cache_attributes+0x2a/0x2b8 [<000000000097c9b0>] cacheinfo_sysfs_init+0x60/0xc8 [<00000000001001c0>] do_one_initcall+0x98/0x1c8 [<000000000094fdc2>] kernel_init_freeable+0x212/0x2d8 [<000000000062352e>] kernel_init+0x26/0x118 [<000000000062fd2e>] kernel_thread_starter+0x6/0xc The illegal operation was executed because of a missing facility check, which should have made sure that the ECAG execution would only be executed on machines which have the general-instructions-extension facility installed. Reported-and-tested-by: Stephen Powell Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index bff5e3b6d8223..8ba32436effe4 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -138,6 +138,8 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; + if (!test_facility(34)) + return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); From bad8eab06cbce7ef086e60a5e5dff3923f831d61 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 18 Jun 2015 13:54:01 +0530 Subject: [PATCH 0440/2091] ARC: Override toplevel default -O2 with -O3 commit 97709069214eb75312c14946803b9da4d3814203 upstream. ARC kernels have historically been built with -O3, despite top level Makefile defaulting to -O2. This was facilitated by implicitly ordering of arch makefile include AFTER top level assigned -O2. An upstream fix to top level a1c48bb160f ("Makefile: Fix unrecognized cross-compiler command line options") changed the ordering, making ARC -O3 defunct. Fix that by NOT relying on any ordering whatsoever and use the proper arch override facility now present in kbuild (ARCH_*FLAGS) Depends-on: ("kbuild: Allow arch Makefiles to override {cpp,ld,c}flags") Suggested-by: Michal Marek Cc: Geert Uytterhoeven Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index db72fec0e160f..2f21e1e0ecf75 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -43,7 +43,8 @@ endif ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 -cflags-y += -O3 +# Note: No need to add to cflags-y as that happens anyways +ARCH_CFLAGS += -O3 endif # small data is default for elf32 tool-chain. If not usable, disable it From a783168dabf7c681959353b6560da3b0b1370386 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 13 Jul 2015 10:25:17 +0300 Subject: [PATCH 0441/2091] ARC: make sure instruction_pointer() returns unsigned value commit f51e2f1911122879eefefa4c592dea8bf794b39c upstream. Currently instruction_pointer() returns pt_regs->ret and so return value is of type "long", which implicitly stands for "signed long". While that's perfectly fine when dealing with 32-bit values if return value of instruction_pointer() gets assigned to 64-bit variable sign extension may happen. And at least in one real use-case it happens already. In perf_prepare_sample() return value of perf_instruction_pointer() (which is an alias to instruction_pointer() in case of ARC) is assigned to (struct perf_sample_data)->ip (which type is "u64"). And what we see if instuction pointer points to user-space application that in case of ARC lays below 0x8000_0000 "ip" gets set properly with leading 32 zeros. But if instruction pointer points to kernel address space that starts from 0x8000_0000 then "ip" is set with 32 leadig "f"-s. I.e. id instruction_pointer() returns 0x8100_0000, "ip" will be assigned with 0xffff_ffff__8100_0000. Which is obviously wrong. In particular that issuse broke output of perf, because perf was unable to associate addresses like 0xffff_ffff__8100_0000 with anything from /proc/kallsyms. That's what we used to see: ----------->8---------- 6.27% ls [unknown] [k] 0xffffffff8046c5cc 2.96% ls libuClibc-0.9.34-git.so [.] memcpy 2.25% ls libuClibc-0.9.34-git.so [.] memset 1.66% ls [unknown] [k] 0xffffffff80666536 1.54% ls libuClibc-0.9.34-git.so [.] 0x000224d6 1.18% ls libuClibc-0.9.34-git.so [.] 0x00022472 ----------->8---------- With that change perf output looks much better now: ----------->8---------- 8.21% ls [kernel.kallsyms] [k] memset 3.52% ls libuClibc-0.9.34-git.so [.] memcpy 2.11% ls libuClibc-0.9.34-git.so [.] malloc 1.88% ls libuClibc-0.9.34-git.so [.] memset 1.64% ls [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.41% ls [kernel.kallsyms] [k] __d_lookup_rcu ----------->8---------- Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 1bfeec2c0558c..2a58af7a2e3a4 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -63,7 +63,7 @@ struct callee_regs { long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) ((regs)->ret) +#define instruction_pointer(regs) (unsigned long)((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ From 32469b1200b97621d0ec24aa0eb675264b95dc7b Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 1 Jul 2015 17:19:30 +0200 Subject: [PATCH 0442/2091] kbuild: Allow arch Makefiles to override {cpp,ld,c}flags commit 61754c18752ffb78145671e94f053fb202fff041 upstream. Since commit a1c48bb1 (Makefile: Fix unrecognized cross-compiler command line options), the arch Makefile is included earlier by the main Makefile, preventing the arc architecture to set its -O3 compiler option. Since there might be more use cases for an arch Makefile to fine-tune the options, add support for ARCH_CPPFLAGS, ARCH_AFLAGS and ARCH_CFLAGS variables that are appended to the respective kbuild variables. The user still has the final say via the KCPPFLAGS, KAFLAGS and KCFLAGS variables. Reported-by: Vineet Gupta Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- Documentation/kbuild/makefiles.txt | 8 ++++++++ Makefile | 9 +++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 74b6c6d972109..d2b1c40cb6667 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -952,6 +952,14 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic mode) if this option is supported by $(AR). + ARCH_CPPFLAGS, ARCH_AFLAGS, ARCH_CFLAGS Overrides the kbuild defaults + + These variables are appended to the KBUILD_CPPFLAGS, + KBUILD_AFLAGS, and KBUILD_CFLAGS, respectively, after the + top-level Makefile has set any other flags. This provides a + means for an architecture to override the defaults. + + --- 6.2 Add prerequisites to archheaders: The archheaders: rule is used to generate header files that diff --git a/Makefile b/Makefile index 36f3225cdf1fb..b451c00332e57 100644 --- a/Makefile +++ b/Makefile @@ -783,10 +783,11 @@ endif include scripts/Makefile.kasan include scripts/Makefile.extrawarn -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments -KBUILD_CPPFLAGS += $(KCPPFLAGS) -KBUILD_AFLAGS += $(KAFLAGS) -KBUILD_CFLAGS += $(KCFLAGS) +# Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the +# last assignments +KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS) +KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS) +KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS) # Use --build-id when available. LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ From 62a3fb2346044a21c8dc4d43a7812e7a54adb39b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 1 Jul 2015 12:57:40 -0400 Subject: [PATCH 0443/2091] bio integrity: do not assume bio_integrity_pool exists if bioset exists commit bb8bd38b9a1685334b73e8c62e128cbedb875867 upstream. bio_integrity_alloc() and bio_integrity_free() assume that if a bio was allocated from a bioset that that bioset also had its bio_integrity_pool allocated using bioset_integrity_create(). This is a very bad assumption given that bioset_create() and bioset_integrity_create() are completely disjoint. Not all callers of bioset_create() have been trained to also call bioset_integrity_create() -- and they may not care to be. Fix this by falling back to kmalloc'ing 'struct bio_integrity_payload' rather than force all bioset consumers to (wastefully) preallocate a bio_integrity_pool that they very likely won't actually need (given the niche nature of the current block integrity support). Otherwise, a NULL pointer "Kernel BUG" with a trace like the following will be observed (as seen on s390x using zfcp storage) because dm-io doesn't use bioset_integrity_create() when creating its bioset: [ 791.643338] Call Trace: [ 791.643339] ([<00000003df98b848>] 0x3df98b848) [ 791.643341] [<00000000002c5de8>] bio_integrity_alloc+0x48/0xf8 [ 791.643348] [<00000000002c6486>] bio_integrity_prep+0xae/0x2f0 [ 791.643349] [<0000000000371e38>] blk_queue_bio+0x1c8/0x3d8 [ 791.643355] [<000000000036f8d0>] generic_make_request+0xc0/0x100 [ 791.643357] [<000000000036f9b2>] submit_bio+0xa2/0x198 [ 791.643406] [<000003ff801f9774>] dispatch_io+0x15c/0x3b0 [dm_mod] [ 791.643419] [<000003ff801f9b3e>] dm_io+0x176/0x2f0 [dm_mod] [ 791.643423] [<000003ff8074b28a>] do_reads+0x13a/0x1a8 [dm_mirror] [ 791.643425] [<000003ff8074b43a>] do_mirror+0x142/0x298 [dm_mirror] [ 791.643428] [<0000000000154fca>] process_one_work+0x18a/0x3f8 [ 791.643432] [<000000000015598a>] worker_thread+0x132/0x3b0 [ 791.643435] [<000000000015d49a>] kthread+0xd2/0xd8 [ 791.643438] [<00000000005bc0ca>] kernel_thread_starter+0x6/0xc [ 791.643446] [<00000000005bc0c4>] kernel_thread_starter+0x0/0xc Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5cbd5d9ea61dd..39ce74d10e2b2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -51,7 +51,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; - if (!bs) { + if (!bs || !bs->bio_integrity_pool) { bip = kmalloc(sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * nr_vecs, gfp_mask); inline_vecs = nr_vecs; @@ -104,7 +104,7 @@ void bio_integrity_free(struct bio *bio) kfree(page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset); - if (bs) { + if (bs && bs->bio_integrity_pool) { if (bip->bip_slab != BIO_POOL_NONE) bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); From 39a0ac96dfeeda6e22748be7beac0813a544f511 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Fri, 17 Jul 2015 16:24:06 -0700 Subject: [PATCH 0444/2091] dma-debug: skip debug_dma_assert_idle() when disabled commit c9d120b0b2b5069cb2ae62f8eac0cef31c8544be upstream. If dma-debug is disabled due to a memory error, DMA unmaps do not affect the dma_active_cacheline radix tree anymore, and debug_dma_assert_idle() can print false warnings. Disable debug_dma_assert_idle() when dma_debug_disabled() is true. Signed-off-by: Haggai Eran Fixes: 0abdd7a81b7e ("dma-debug: introduce debug_dma_assert_idle()") Cc: Dan Williams Cc: Joerg Roedel Cc: Vinod Koul Cc: Russell King Cc: James Bottomley Cc: Florian Fainelli Cc: Sebastian Ott Cc: Jiri Kosina Cc: Horia Geanta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/dma-debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ae4b65e17e648..dace71fe41f70 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -574,6 +574,9 @@ void debug_dma_assert_idle(struct page *page) unsigned long flags; phys_addr_t cln; + if (dma_debug_disabled()) + return; + if (!page) return; From 8ee1239a023a9a8bdbb1eaeba4e956204beedacd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jul 2015 14:10:17 +0200 Subject: [PATCH 0445/2091] genirq: Prevent resend to interrupts marked IRQ_NESTED_THREAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 75a06189fc508a2acf470b0b12710362ffb2c4b1 upstream. The resend mechanism happily calls the interrupt handler of interrupts which are marked IRQ_NESTED_THREAD from softirq context. This can result in crashes because the interrupt handler is not the proper way to invoke the device handlers. They must be invoked via handle_nested_irq. Prevent the resend even if the interrupt has no valid parent irq set. Its better to have a lost interrupt than a crashing machine. Reported-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/resend.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 9065107f083e9..7a5237a1bce5b 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -75,13 +75,21 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { #ifdef CONFIG_HARDIRQS_SW_RESEND /* - * If the interrupt has a parent irq and runs - * in the thread context of the parent irq, - * retrigger the parent. + * If the interrupt is running in the thread + * context of the parent irq we need to be + * careful, because we cannot trigger it + * directly. */ - if (desc->parent_irq && - irq_settings_is_nested_thread(desc)) + if (irq_settings_is_nested_thread(desc)) { + /* + * If the parent_irq is valid, we + * retrigger the parent, otherwise we + * do nothing. + */ + if (!desc->parent_irq) + return; irq = desc->parent_irq; + } /* Set it pending and activate the softirq: */ set_bit(irq, irqs_resend); tasklet_schedule(&resend_tasklet); From d1374d7f4f70867f7d0a7b4a89be086a143e5cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominic=20Sacr=C3=A9?= Date: Tue, 30 Jun 2015 17:41:33 +0200 Subject: [PATCH 0446/2091] ALSA: usb-audio: Add MIDI support for Steinberg MI2/MI4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0689a86ae814f39af94a9736a0a5426dd82eb107 upstream. The Steinberg MI2 and MI4 interfaces are compatible with the USB class audio spec, but the MIDI part of the devices is reported as a vendor specific interface. This patch adds entries to quirks-table.h to recognize the MIDI endpoints. Audio functionality was already working and is unaffected by this change. Signed-off-by: Dominic Sacré Signed-off-by: Albert Huitsing Acked-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 2f6d3e9a1bcd0..e4756651a52c8 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2512,6 +2512,74 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Steinberg devices */ +{ + /* Steinberg MI2 */ + USB_DEVICE_VENDOR_SPEC(0x0a4e, 0x2040), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = & (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = &(const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, +{ + /* Steinberg MI4 */ + USB_DEVICE_VENDOR_SPEC(0x0a4e, 0x4040), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = & (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = &(const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, + /* TerraTec devices */ { USB_DEVICE_VENDOR_SPEC(0x0ccd, 0x0012), From 2b16e01ae6b5c0c5e1f382fcad51a3053270374a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Jul 2015 15:19:37 +0200 Subject: [PATCH 0447/2091] ALSA: line6: Fix -EBUSY error during active monitoring commit 4d0e677523a999e1dec28e55cc314c47ba09ca12 upstream. When a monitor stream is active, the next PCM stream access results in EBUSY error because of the check in line6_stream_start(). Fix this by just skipping the submission of pending URBs when the stream is already running instead. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=101431 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/line6/pcm.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 8461d6bf992f8..204cc074adb96 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -186,12 +186,8 @@ static int line6_stream_start(struct snd_line6_pcm *line6pcm, int direction, int ret = 0; spin_lock_irqsave(&pstr->lock, flags); - if (!test_and_set_bit(type, &pstr->running)) { - if (pstr->active_urbs || pstr->unlink_urbs) { - ret = -EBUSY; - goto error; - } - + if (!test_and_set_bit(type, &pstr->running) && + !(pstr->active_urbs || pstr->unlink_urbs)) { pstr->count = 0; /* Submit all currently available URBs */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) @@ -199,7 +195,6 @@ static int line6_stream_start(struct snd_line6_pcm *line6pcm, int direction, else ret = line6_submit_audio_in_all_urbs(line6pcm); } - error: if (ret < 0) clear_bit(type, &pstr->running); spin_unlock_irqrestore(&pstr->lock, flags); From 898dbc106e50ab6de4106e2415d0f40e260903c4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 17 Jul 2015 15:22:33 +0200 Subject: [PATCH 0448/2091] ALSA: pcm: Fix lockdep warning with nonatomic PCM ops commit 67756e3191c90e7c0b94b8b2fb63de255b6cd337 upstream. With the nonatomic PCM ops, the system may spew lockdep warnings like: ============================================= [ INFO: possible recursive locking detected ] 4.2.0-rc1-jeejaval3 #12 Not tainted --------------------------------------------- aplay/4029 is trying to acquire lock: (snd_pcm_link_rwsem){.+.+.+}, at: [] snd_pcm_stream_lock+0x43/0x60 but task is already holding lock: (snd_pcm_link_rwsem){.+.+.+}, at: [] snd_pcm_action_nonatomic+0x29/0x80 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(snd_pcm_link_rwsem); lock(snd_pcm_link_rwsem); Although this is false-positive as the rwsem is taken always as read-only for these code paths, it's certainly annoying to see this at any occasion. A simple fix is to use down_read_nested() in snd_pcm_stream_lock() that can be called inside another lock. Reported-by: Vinod Koul Reported-by: Jeeja Kp Tested-by: Jeeja Kp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index d126c03361aef..75888dd38a7fc 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -85,7 +85,7 @@ static DECLARE_RWSEM(snd_pcm_link_rwsem); void snd_pcm_stream_lock(struct snd_pcm_substream *substream) { if (substream->pcm->nonatomic) { - down_read(&snd_pcm_link_rwsem); + down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING); mutex_lock(&substream->self_group.mutex); } else { read_lock(&snd_pcm_link_rwlock); From 291603b69df0e25f48bacdeeb3e4d2f041cf23f0 Mon Sep 17 00:00:00 2001 From: Mateusz Sylwestrzak Date: Sun, 19 Jul 2015 17:38:56 +0200 Subject: [PATCH 0449/2091] ALSA: hda - Add headset mic support for Acer Aspire V5-573G commit 0420694dddeb9e269a1ab2129a0119a5cea294a4 upstream. Acer Aspire V5 with the ALC282 codec is given the wrong value for the 0x19 PIN by the laptop's BIOS. Overriding it with the correct value adds support for the headset microphone which would not otherwise be visible in the system. The fix is based on commit 7819717b1134 with a similar quirk for Acer Aspire with the ALC269 codec. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96201 Signed-off-by: Mateusz Sylwestrzak Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0e75998db39fc..47af0393e83fd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5004,7 +5004,7 @@ static const struct hda_fixup alc269_fixups[] = { { 0x14, 0x90170110 }, { 0x17, 0x40000008 }, { 0x18, 0x411111f0 }, - { 0x19, 0x411111f0 }, + { 0x19, 0x01a1913c }, { 0x1a, 0x411111f0 }, { 0x1b, 0x411111f0 }, { 0x1d, 0x40f89b2d }, From e7e08353d3e6d02d5fb1ae7f7cba775e31bbb478 Mon Sep 17 00:00:00 2001 From: Maruthi Srinivas Bayyavarapu Date: Mon, 20 Jul 2015 19:56:18 +0530 Subject: [PATCH 0450/2091] ALSA: hda: add new AMD PCI IDs with proper driver caps commit 5022813ddb28b7679e8285812d52aaeb7e1e7657 upstream. Fixes audio problems on newer asics Signed-off-by: Maruthi Bayyavarapu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c403dd10d126c..44dfc7b92bc35 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2056,6 +2056,8 @@ static const struct pci_device_id azx_ids[] = { /* ATI HDMI */ { PCI_DEVICE(0x1002, 0x1308), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0x157a), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x793b), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x7919), @@ -2110,8 +2112,14 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaab0), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaac0), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaac8), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaad8), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + { PCI_DEVICE(0x1002, 0xaae8), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, /* VIA VT8251/VT8237A */ { PCI_DEVICE(0x1106, 0x3288), .driver_data = AZX_DRIVER_VIA | AZX_DCAPS_POSFIX_VIA }, From e40f560b7faaa362f7c72bc40dde20630d9eab33 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Mon, 20 Jul 2015 17:14:14 -0700 Subject: [PATCH 0451/2091] ALSA: hda - Add new GPU codec ID 0x10de007d to snd-hda commit 6c3d91193d829bf58a35a10650415b05a736ca6c upstream. Vendor ID 0x10de007d is used by a yet-to-be-named GPU chip. This chip also has the 2-ch audio swapping bug, so patch_nvhdmi is appropriate here. Signed-off-by: Aaron Plattner Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 5f44f60a63897..225b78b4ef125 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3333,6 +3333,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x10de0070, .name = "GPU 70 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0071, .name = "GPU 71 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0072, .name = "GPU 72 HDMI/DP", .patch = patch_nvhdmi }, +{ .id = 0x10de007d, .name = "GPU 7d HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x11069f80, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, { .id = 0x11069f81, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, @@ -3396,6 +3397,7 @@ MODULE_ALIAS("snd-hda-codec-id:10de0067"); MODULE_ALIAS("snd-hda-codec-id:10de0070"); MODULE_ALIAS("snd-hda-codec-id:10de0071"); MODULE_ALIAS("snd-hda-codec-id:10de0072"); +MODULE_ALIAS("snd-hda-codec-id:10de007d"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); MODULE_ALIAS("snd-hda-codec-id:11069f80"); MODULE_ALIAS("snd-hda-codec-id:11069f81"); From d1456c439644a6ba93245546e8c2613b0bdef990 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 22 Jul 2015 10:00:25 +0200 Subject: [PATCH 0452/2091] ALSA: hda - Add headset mic pin quirk for a Dell device commit cba59972a1191a0c1647a52fe745eed7a4b34b38 upstream. Without this patch, the headset mic will not work on this machine. BugLink: https://bugs.launchpad.net/bugs/1476987 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 47af0393e83fd..90c53a4d4d054 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5381,6 +5381,17 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x40000000}, {0x1d, 0x40700001}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x40000000}, + {0x14, 0x90170130}, + {0x17, 0x411111f0}, + {0x18, 0x411111f0}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x01014020}, + {0x1d, 0x4054c029}, + {0x1e, 0x411111f0}, + {0x21, 0x0221103f}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, From 96b9b980c78785631a281c73b3fb3939382b9d26 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Jul 2015 10:13:37 +0200 Subject: [PATCH 0453/2091] ALSA: hda - Apply fixup for another Toshiba Satellite S50D commit b9d9c9efc292dd0ffe172780f915ed74eba3556c upstream. Toshiba Satellite S50D has another model with a different PCI SSID (1179:fa93) while the previous fixup was for 1179:fa91. Adjust the fixup entry with SND_PCI_QUIRK_MASK() to match with both devices. Reported-by: Tim Sample Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 6c66d7e164391..25f0f45e66403 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -2920,7 +2920,8 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x148a, "HP Mini", STAC_92HD83XXX_HP_LED), SND_PCI_QUIRK_VENDOR(PCI_VENDOR_ID_HP, "HP", STAC_92HD83XXX_HP), - SND_PCI_QUIRK(PCI_VENDOR_ID_TOSHIBA, 0xfa91, + /* match both for 0xfa91 and 0xfa93 */ + SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_TOSHIBA, 0xfffd, 0xfa91, "Toshiba Satellite S50D", STAC_92HD83XXX_GPIO10_EAPD), {} /* terminator */ }; From 1484f6f9ad42a4acd6755ae2bad5ee75f88bd3cc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 29 Jul 2015 09:04:52 +0200 Subject: [PATCH 0454/2091] ALSA: hda - Apply a fixup to Dell Vostro 5480 commit 3a05d12f46cb95a6a685114819363a56e6170996 upstream. Dell Vostro 5480 (1028:069a) needs the very same quirk used for Vostro 5470 model to make bass speakers properly working. Reported-and-tested-by: Paulo Roberto de Oliveira Castro Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 90c53a4d4d054..872188e00ad34 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5114,6 +5114,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), + SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From 574169a14b5cafe227a41f7a9d9bdf6acaacaae7 Mon Sep 17 00:00:00 2001 From: Yao-Wen Mao Date: Wed, 29 Jul 2015 15:13:54 +0800 Subject: [PATCH 0455/2091] ALSA: usb-audio: add dB range mapping for some devices commit 2d1cb7f658fb9c3ba8f9dab8aca297d4dfdec835 upstream. Add the correct dB ranges of Bose Companion 5 and Drangonfly DAC 1.2. Signed-off-by: Yao-Wen Mao Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_maps.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index e5000da9e9d70..6a803eff87f71 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -341,6 +341,20 @@ static const struct usbmix_name_map scms_usb3318_map[] = { { 0 } }; +/* Bose companion 5, the dB conversion factor is 16 instead of 256 */ +static struct usbmix_dB_map bose_companion5_dB = {-5006, -6}; +static struct usbmix_name_map bose_companion5_map[] = { + { 3, NULL, .dB = &bose_companion5_dB }, + { 0 } /* terminator */ +}; + +/* Dragonfly DAC 1.2, the dB conversion factor is 1 instead of 256 */ +static struct usbmix_dB_map dragonfly_1_2_dB = {0, 5000}; +static struct usbmix_name_map dragonfly_1_2_map[] = { + { 7, NULL, .dB = &dragonfly_1_2_dB }, + { 0 } /* terminator */ +}; + /* * Control map entries */ @@ -451,6 +465,16 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x25c4, 0x0003), .map = scms_usb3318_map, }, + { + /* Bose Companion 5 */ + .id = USB_ID(0x05a7, 0x1020), + .map = bose_companion5_map, + }, + { + /* Dragonfly DAC 1.2 */ + .id = USB_ID(0x21b4, 0x0081), + .map = dragonfly_1_2_map, + }, { 0 } /* terminator */ }; From dbd7bf9912e6635fe2dbbcb927aecf179afab630 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Jul 2015 22:30:29 +0200 Subject: [PATCH 0456/2091] ALSA: hda - Fix MacBook Pro 5,2 quirk commit 649ccd08534ee26deb2e5b08509800d0e95167f5 upstream. MacBook Pro 5,2 with ALC889 codec had already a fixup entry, but this seems not working correctly, a fix for pin NID 0x15 is needed in addition. It's equivalent with the fixup for MacBook Air 1,1, so use this instead. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=102131 Reported-and-tested-by: Jeffery Miller Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 872188e00ad34..590bcfb0e82fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2224,7 +2224,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF), - SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF), + SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), From 68b9e67311ca10a0bdce6ab2939ca0ff3ebac91b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Jul 2015 19:23:38 +0200 Subject: [PATCH 0457/2091] x86, perf: Fix static_key bug in load_mm_cr4() commit a833581e372a4adae2319d8dc379493edbc444e9 upstream. Mikulas reported his K6-3 not booting. This is because the static_key API confusion struck and bit Andy, this wants to be static_key_false(). Reported-by: Mikulas Patocka Tested-by: Mikulas Patocka Signed-off-by: Peter Zijlstra (Intel) Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Valdis Kletnieks Cc: Vince Weaver Cc: hillf.zj Fixes: a66734297f78 ("perf/x86: Add /sys/devices/cpu/rdpmc=2 to allow rdpmc for all tasks") Link: http://lkml.kernel.org/r/20150709172338.GC19282@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 883f6b933fa4b..e997f70f80c49 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -23,7 +23,7 @@ extern struct static_key rdpmc_always_available; static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_true(&rdpmc_always_available) || + if (static_key_false(&rdpmc_always_available) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else From 9bf9f8b09de63b57c39968491ae9301b8114a43d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 8 Jul 2015 16:08:24 -0400 Subject: [PATCH 0458/2091] Revert "dm: only run the queue on completion if congested or no requests pending" commit 621739b00e16ca2d80411dc9b111cb15b91f3ba9 upstream. This reverts commit 9a0e609e3fd8a95c96629b9fbde6b8c5b9a1456a. (Resolved a conflict during revert due to commit bfebd1cdb4 that came after) This revert is motivated by a couple failure reports on request-based DM multipath testbeds: 1) Netapp reported that their multipath fault injection test under heavy IO load can stall longer than 300 seconds. 2) IBM reported elevated lock contention in their testbed (likely due to increased back pressure due to IO not being dispatched as quickly): https://www.redhat.com/archives/dm-devel/2015-July/msg00057.html Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2caf492890d64..e8d84566f3118 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1053,13 +1053,10 @@ static struct dm_rq_target_io *tio_from_request(struct request *rq) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { - int nr_requests_pending; - atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ - nr_requests_pending = md_in_flight(md); - if (!nr_requests_pending) + if (!md_in_flight(md)) wake_up(&md->wait); /* @@ -1071,8 +1068,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) if (run_queue) { if (md->queue->mq_ops) blk_mq_run_hw_queues(md->queue, true); - else if (!nr_requests_pending || - (nr_requests_pending >= md->queue->nr_congestion_on)) + else blk_run_queue_async(md->queue); } From dc59806d397a10f8a7c838f3de4fe0185d4299a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Jul 2015 10:46:42 +0100 Subject: [PATCH 0459/2091] irqchip/gicv3-its: Fix mapping of LPIs to collections commit 591e5bec13f15feb13fc445b6c9c59954711c4ac upstream. The GICv3 ITS architecture allows a given [DevID, EventID] pair to be translated to a [LPI, Collection] pair, where DevID is the device writing the MSI, EventID is the payload being written, LPI is the actual interrupt number, and Collection is roughly equivalent to a target CPU. Each LPI can be mapped to a separate collection, but the ITS driver insists on maintaining the collection on a device basis, instead of doing it on a per interrupt basis. This is obviously flawed, and this patch fixes it by adding a per interrupt index that indicates which collection number is in use. Reported-by: Ian Campbell Signed-off-by: Marc Zyngier Cc: Cc: Jason Cooper Link: http://lkml.kernel.org/r/1437126402-11677-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 111 +++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 36 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 1b7e155869f6c..c00e2db351ba5 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -75,6 +75,13 @@ struct its_node { #define ITS_ITT_ALIGN SZ_256 +struct event_lpi_map { + unsigned long *lpi_map; + u16 *col_map; + irq_hw_number_t lpi_base; + int nr_lpis; +}; + /* * The ITS view of a device - belongs to an ITS, a collection, owns an * interrupt translation table, and a list of interrupts. @@ -82,11 +89,8 @@ struct its_node { struct its_device { struct list_head entry; struct its_node *its; - struct its_collection *collection; + struct event_lpi_map event_map; void *itt; - unsigned long *lpi_map; - irq_hw_number_t lpi_base; - int nr_lpis; u32 nr_ites; u32 device_id; }; @@ -99,6 +103,14 @@ static struct rdists *gic_rdists; #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) +static struct its_collection *dev_event_to_col(struct its_device *its_dev, + u32 event) +{ + struct its_node *its = its_dev->its; + + return its->collections + its_dev->event_map.col_map[event]; +} + /* * ITS command descriptors - parameters to be encoded in a command * block. @@ -134,7 +146,7 @@ struct its_cmd_desc { struct { struct its_device *dev; struct its_collection *col; - u32 id; + u32 event_id; } its_movi_cmd; struct { @@ -241,7 +253,7 @@ static struct its_collection *its_build_mapd_cmd(struct its_cmd_block *cmd, its_fixup_cmd(cmd); - return desc->its_mapd_cmd.dev->collection; + return NULL; } static struct its_collection *its_build_mapc_cmd(struct its_cmd_block *cmd, @@ -260,52 +272,72 @@ static struct its_collection *its_build_mapc_cmd(struct its_cmd_block *cmd, static struct its_collection *its_build_mapvi_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_mapvi_cmd.dev, + desc->its_mapvi_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_MAPVI); its_encode_devid(cmd, desc->its_mapvi_cmd.dev->device_id); its_encode_event_id(cmd, desc->its_mapvi_cmd.event_id); its_encode_phys_id(cmd, desc->its_mapvi_cmd.phys_id); - its_encode_collection(cmd, desc->its_mapvi_cmd.dev->collection->col_id); + its_encode_collection(cmd, col->col_id); its_fixup_cmd(cmd); - return desc->its_mapvi_cmd.dev->collection; + return col; } static struct its_collection *its_build_movi_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_movi_cmd.dev, + desc->its_movi_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_MOVI); its_encode_devid(cmd, desc->its_movi_cmd.dev->device_id); - its_encode_event_id(cmd, desc->its_movi_cmd.id); + its_encode_event_id(cmd, desc->its_movi_cmd.event_id); its_encode_collection(cmd, desc->its_movi_cmd.col->col_id); its_fixup_cmd(cmd); - return desc->its_movi_cmd.dev->collection; + return col; } static struct its_collection *its_build_discard_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_discard_cmd.dev, + desc->its_discard_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_DISCARD); its_encode_devid(cmd, desc->its_discard_cmd.dev->device_id); its_encode_event_id(cmd, desc->its_discard_cmd.event_id); its_fixup_cmd(cmd); - return desc->its_discard_cmd.dev->collection; + return col; } static struct its_collection *its_build_inv_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { + struct its_collection *col; + + col = dev_event_to_col(desc->its_inv_cmd.dev, + desc->its_inv_cmd.event_id); + its_encode_cmd(cmd, GITS_CMD_INV); its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id); its_encode_event_id(cmd, desc->its_inv_cmd.event_id); its_fixup_cmd(cmd); - return desc->its_inv_cmd.dev->collection; + return col; } static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, @@ -497,7 +529,7 @@ static void its_send_movi(struct its_device *dev, desc.its_movi_cmd.dev = dev; desc.its_movi_cmd.col = col; - desc.its_movi_cmd.id = id; + desc.its_movi_cmd.event_id = id; its_send_single_command(dev->its, its_build_movi_cmd, &desc); } @@ -528,7 +560,7 @@ static void its_send_invall(struct its_node *its, struct its_collection *col) static inline u32 its_get_event_id(struct irq_data *d) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); - return d->hwirq - its_dev->lpi_base; + return d->hwirq - its_dev->event_map.lpi_base; } static void lpi_set_config(struct irq_data *d, bool enable) @@ -583,7 +615,7 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, target_col = &its_dev->its->collections[cpu]; its_send_movi(its_dev, target_col, id); - its_dev->collection = target_col; + its_dev->event_map.col_map[id] = cpu; return IRQ_SET_MASK_OK_DONE; } @@ -713,8 +745,10 @@ static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) return bitmap; } -static void its_lpi_free(unsigned long *bitmap, int base, int nr_ids) +static void its_lpi_free(struct event_lpi_map *map) { + int base = map->lpi_base; + int nr_ids = map->nr_lpis; int lpi; spin_lock(&lpi_lock); @@ -731,7 +765,8 @@ static void its_lpi_free(unsigned long *bitmap, int base, int nr_ids) spin_unlock(&lpi_lock); - kfree(bitmap); + kfree(map->lpi_map); + kfree(map->col_map); } /* @@ -1099,11 +1134,11 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, struct its_device *dev; unsigned long *lpi_map; unsigned long flags; + u16 *col_map = NULL; void *itt; int lpi_base; int nr_lpis; int nr_ites; - int cpu; int sz; dev = kzalloc(sizeof(*dev), GFP_KERNEL); @@ -1117,20 +1152,24 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); + if (lpi_map) + col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL); - if (!dev || !itt || !lpi_map) { + if (!dev || !itt || !lpi_map || !col_map) { kfree(dev); kfree(itt); kfree(lpi_map); + kfree(col_map); return NULL; } dev->its = its; dev->itt = itt; dev->nr_ites = nr_ites; - dev->lpi_map = lpi_map; - dev->lpi_base = lpi_base; - dev->nr_lpis = nr_lpis; + dev->event_map.lpi_map = lpi_map; + dev->event_map.col_map = col_map; + dev->event_map.lpi_base = lpi_base; + dev->event_map.nr_lpis = nr_lpis; dev->device_id = dev_id; INIT_LIST_HEAD(&dev->entry); @@ -1138,10 +1177,6 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, list_add(&dev->entry, &its->its_device_list); raw_spin_unlock_irqrestore(&its->lock, flags); - /* Bind the device to the first possible CPU */ - cpu = cpumask_first(cpu_online_mask); - dev->collection = &its->collections[cpu]; - /* Map device to its ITT */ its_send_mapd(dev, 1); @@ -1163,12 +1198,13 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) { int idx; - idx = find_first_zero_bit(dev->lpi_map, dev->nr_lpis); - if (idx == dev->nr_lpis) + idx = find_first_zero_bit(dev->event_map.lpi_map, + dev->event_map.nr_lpis); + if (idx == dev->event_map.nr_lpis) return -ENOSPC; - *hwirq = dev->lpi_base + idx; - set_bit(idx, dev->lpi_map); + *hwirq = dev->event_map.lpi_base + idx; + set_bit(idx, dev->event_map.lpi_map); return 0; } @@ -1288,7 +1324,8 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq, &its_irq_chip, its_dev); dev_dbg(info->scratchpad[1].ptr, "ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->lpi_base), (int)hwirq, virq + i); + (int)(hwirq - its_dev->event_map.lpi_base), + (int)hwirq, virq + i); } return 0; @@ -1300,6 +1337,9 @@ static void its_irq_domain_activate(struct irq_domain *domain, struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); + /* Bind the LPI to the first possible CPU */ + its_dev->event_map.col_map[event] = cpumask_first(cpu_online_mask); + /* Map the GIC IRQ and event to the device */ its_send_mapvi(its_dev, d->hwirq, event); } @@ -1327,17 +1367,16 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, u32 event = its_get_event_id(data); /* Mark interrupt index as unused */ - clear_bit(event, its_dev->lpi_map); + clear_bit(event, its_dev->event_map.lpi_map); /* Nuke the entry in the domain */ irq_domain_reset_irq_data(data); } /* If all interrupts have been freed, start mopping the floor */ - if (bitmap_empty(its_dev->lpi_map, its_dev->nr_lpis)) { - its_lpi_free(its_dev->lpi_map, - its_dev->lpi_base, - its_dev->nr_lpis); + if (bitmap_empty(its_dev->event_map.lpi_map, + its_dev->event_map.nr_lpis)) { + its_lpi_free(&its_dev->event_map); /* Unmap device/itt */ its_send_mapd(its_dev, 0); From a77aa615ba7c4d7511993ecf4278bd604da49991 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Jul 2015 08:24:39 -0600 Subject: [PATCH 0460/2091] scsi: fix host max depth checking for the 'queue_depth' sysfs interface commit 1278dd6809b11dc298e19d81ac0916275f7318c1 upstream. Commit 1e6f2416044c0 changed the scsi sysfs 'queue_depth' code to rejects depths higher than the scsi host template setting. But lots of hosts set this to 1, and update the settings in the scsi host when the controller/devices probing happens. This breaks (at least) mpt2sas and mpt3sas runtime setting of queue depth, returning EINVAL for all settings but '1'. And once it's set to 1, there's no way to go back up. Fixes: 1e6f2416044c0 "scsi: don't allow setting of queue_depth bigger than can_queue" Signed-off-by: Jens Axboe Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 1ac38e73df7ee..9ad41168d26df 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -859,7 +859,7 @@ sdev_store_queue_depth(struct device *dev, struct device_attribute *attr, depth = simple_strtoul(buf, NULL, 0); - if (depth < 1 || depth > sht->can_queue) + if (depth < 1 || depth > sdev->host->can_queue) return -EINVAL; retval = sht->change_queue_depth(sdev, depth); From cb6fd3e6f10bd6c73075dd9a8826e74ed6fffacf Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 16 Jul 2015 11:40:41 -0400 Subject: [PATCH 0461/2091] scsi: fix memory leak with scsi-mq commit 0c958ecc69c277b25f38f72bc6d18ab145e8167c upstream. Fix a memory leak with scsi-mq triggered by commands with large data transfer length. __sg_alloc_table() sets both table->nents and table->orig_nents to the same value. When the scatterlist is DMA-mapped, table->nents is overwritten with the (possibly smaller) size of the DMA-mapped scatterlist, while table->orig_nents retains the original size of the allocated scatterlist. scsi_free_sgtable() should therefore check orig_nents instead of nents, and all code that initializes sdb->table without calling __sg_alloc_table() should set both nents and orig_nents. Fixes: d285203cf647 ("scsi: add support for a blk-mq based I/O path.") Signed-off-by: Tony Battersby Reviewed-by: Christoph Hellwig Reviewed-by: Ewan D. Milne Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/scsi_lib.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index c95a4e943fc68..59c31bf88d922 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -944,7 +944,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, scmd->sdb.length); scmd->sdb.table.sgl = &ses->sense_sgl; scmd->sc_data_direction = DMA_FROM_DEVICE; - scmd->sdb.table.nents = 1; + scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1; scmd->cmnd[0] = REQUEST_SENSE; scmd->cmnd[4] = scmd->sdb.length; scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b1a263137a233..448ebdaa3d694 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -583,7 +583,7 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask) static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq) { - if (mq && sdb->table.nents <= SCSI_MAX_SG_SEGMENTS) + if (mq && sdb->table.orig_nents <= SCSI_MAX_SG_SEGMENTS) return; __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free); } @@ -597,8 +597,8 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents, bool mq) if (mq) { if (nents <= SCSI_MAX_SG_SEGMENTS) { - sdb->table.nents = nents; - sg_init_table(sdb->table.sgl, sdb->table.nents); + sdb->table.nents = sdb->table.orig_nents = nents; + sg_init_table(sdb->table.sgl, nents); return 0; } first_chunk = sdb->table.sgl; From 785807850c4b58522e37efaae4c9329be01aaa46 Mon Sep 17 00:00:00 2001 From: "Seymour, Shane M" Date: Thu, 2 Jul 2015 12:01:10 +0000 Subject: [PATCH 0462/2091] st: null pointer dereference panic caused by use after kref_put by st_open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e7ac6c6666bec0a354758a1298d3231e4a635362 upstream. Two SLES11 SP3 servers encountered similar crashes simultaneously following some kind of SAN/tape target issue: ... qla2xxx [0000:81:00.0]-801c:3: Abort command issued nexus=3:0:2 -- 1 2002. qla2xxx [0000:81:00.0]-801c:3: Abort command issued nexus=3:0:2 -- 1 2002. qla2xxx [0000:81:00.0]-8009:3: DEVICE RESET ISSUED nexus=3:0:2 cmd=ffff882f89c2c7c0. qla2xxx [0000:81:00.0]-800c:3: do_reset failed for cmd=ffff882f89c2c7c0. qla2xxx [0000:81:00.0]-800f:3: DEVICE RESET FAILED: Task management failed nexus=3:0:2 cmd=ffff882f89c2c7c0. qla2xxx [0000:81:00.0]-8009:3: TARGET RESET ISSUED nexus=3:0:2 cmd=ffff882f89c2c7c0. qla2xxx [0000:81:00.0]-800c:3: do_reset failed for cmd=ffff882f89c2c7c0. qla2xxx [0000:81:00.0]-800f:3: TARGET RESET FAILED: Task management failed nexus=3:0:2 cmd=ffff882f89c2c7c0. qla2xxx [0000:81:00.0]-8012:3: BUS RESET ISSUED nexus=3:0:2. qla2xxx [0000:81:00.0]-802b:3: BUS RESET SUCCEEDED nexus=3:0:2. qla2xxx [0000:81:00.0]-505f:3: Link is operational (8 Gbps). qla2xxx [0000:81:00.0]-8018:3: ADAPTER RESET ISSUED nexus=3:0:2. qla2xxx [0000:81:00.0]-00af:3: Performing ISP error recovery - ha=ffff88bf04d18000. rport-3:0-0: blocked FC remote port time out: removing target and saving binding qla2xxx [0000:81:00.0]-505f:3: Link is operational (8 Gbps). qla2xxx [0000:81:00.0]-8017:3: ADAPTER RESET SUCCEEDED nexus=3:0:2. rport-2:0-0: blocked FC remote port time out: removing target and saving binding sg_rq_end_io: device detached BUG: unable to handle kernel NULL pointer dereference at 00000000000002a8 IP: [] __pm_runtime_idle+0x28/0x90 PGD 7e6586f067 PUD 7e5af06067 PMD 0 [1739975.390354] Oops: 0002 [#1] SMP CPU 0 ... Supported: No, Proprietary modules are loaded [1739975.390463] Pid: 27965, comm: ABCD Tainted: PF X 3.0.101-0.29-default #1 HP ProLiant DL580 Gen8 RIP: 0010:[] [] __pm_runtime_idle+0x28/0x90 RSP: 0018:ffff8839dc1e7c68 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff883f0592fc00 RCX: 0000000000000090 RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000138 RBP: 0000000000000138 R08: 0000000000000010 R09: ffffffff81bd39d0 R10: 00000000000009c0 R11: ffffffff81025790 R12: 0000000000000001 R13: ffff883022212b80 R14: 0000000000000004 R15: ffff883022212b80 FS: 00007f8e54560720(0000) GS:ffff88407f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000002a8 CR3: 0000007e6ced6000 CR4: 00000000001407f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process ABCD (pid: 27965, threadinfo ffff8839dc1e6000, task ffff883592e0c640) Stack: ffff883f0592fc00 00000000fffffffa 0000000000000001 ffff883022212b80 ffff883eff772400 ffffffffa03fa309 0000000000000000 0000000000000000 ffffffffa04003a0 ffff883f063196c0 ffff887f0379a930 ffffffff8115ea1e Call Trace: [] st_open+0x129/0x240 [st] [] chrdev_open+0x13e/0x200 [] __dentry_open+0x198/0x310 [] do_last+0x1f4/0x800 [] path_openat+0xd9/0x420 [] do_filp_open+0x4c/0xc0 [] do_sys_open+0x17f/0x250 [] system_call_fastpath+0x16/0x1b [<00007f8e4f617fd0>] 0x7f8e4f617fcf Code: eb d3 90 48 83 ec 28 40 f6 c6 04 48 89 6c 24 08 4c 89 74 24 20 48 89 fd 48 89 1c 24 4c 89 64 24 10 41 89 f6 4c 89 6c 24 18 74 11 ff 8f 70 01 00 00 0f 94 c0 45 31 ed 84 c0 74 2b 4c 8d a5 a0 RIP [] __pm_runtime_idle+0x28/0x90 RSP CR2: 00000000000002a8 Analysis reveals the cause of the crash to be due to STp->device being NULL. The pointer was NULLed via scsi_tape_put(STp) when it calls scsi_tape_release(). In st_open() we jump to err_out after scsi_block_when_processing_errors() completes and returns the device as offline (sdev_state was SDEV_DEL): 1180 /* Open the device. Needs to take the BKL only because of incrementing the SCSI host 1181 module count. */ 1182 static int st_open(struct inode *inode, struct file *filp) 1183 { 1184 int i, retval = (-EIO); 1185 int resumed = 0; 1186 struct scsi_tape *STp; 1187 struct st_partstat *STps; 1188 int dev = TAPE_NR(inode); 1189 char *name; ... 1217 if (scsi_autopm_get_device(STp->device) < 0) { 1218 retval = -EIO; 1219 goto err_out; 1220 } 1221 resumed = 1; 1222 if (!scsi_block_when_processing_errors(STp->device)) { 1223 retval = (-ENXIO); 1224 goto err_out; 1225 } ... 1264 err_out: 1265 normalize_buffer(STp->buffer); 1266 spin_lock(&st_use_lock); 1267 STp->in_use = 0; 1268 spin_unlock(&st_use_lock); 1269 scsi_tape_put(STp); <-- STp->device = 0 after this 1270 if (resumed) 1271 scsi_autopm_put_device(STp->device); 1272 return retval; The ref count for the struct scsi_tape had already been reduced to 1 when the .remove method of the st module had been called. The kref_put() in scsi_tape_put() caused scsi_tape_release() to be called: 0266 static void scsi_tape_put(struct scsi_tape *STp) 0267 { 0268 struct scsi_device *sdev = STp->device; 0269 0270 mutex_lock(&st_ref_mutex); 0271 kref_put(&STp->kref, scsi_tape_release); <-- calls this 0272 scsi_device_put(sdev); 0273 mutex_unlock(&st_ref_mutex); 0274 } In scsi_tape_release() the struct scsi_device in the struct scsi_tape gets set to NULL: 4273 static void scsi_tape_release(struct kref *kref) 4274 { 4275 struct scsi_tape *tpnt = to_scsi_tape(kref); 4276 struct gendisk *disk = tpnt->disk; 4277 4278 tpnt->device = NULL; <<<---- where the dev is nulled 4279 4280 if (tpnt->buffer) { 4281 normalize_buffer(tpnt->buffer); 4282 kfree(tpnt->buffer->reserved_pages); 4283 kfree(tpnt->buffer); 4284 } 4285 4286 disk->private_data = NULL; 4287 put_disk(disk); 4288 kfree(tpnt); 4289 return; 4290 } Although the problem was reported on SLES11.3 the problem appears in linux-next as well. The crash is fixed by reordering the code so we no longer access the struct scsi_tape after the kref_put() is done on it in st_open(). Signed-off-by: Shane Seymour Signed-off-by: Darren Lavender Reviewed-by: Johannes Thumshirn Acked-by: Kai Mäkisara Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/st.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 9a1c34205254f..525ab4c1f306c 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -1274,9 +1274,9 @@ static int st_open(struct inode *inode, struct file *filp) spin_lock(&st_use_lock); STp->in_use = 0; spin_unlock(&st_use_lock); - scsi_tape_put(STp); if (resumed) scsi_autopm_put_device(STp->device); + scsi_tape_put(STp); return retval; } From b7a843d6c3063389a83d3260d6ee53bafdb39ec3 Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Tue, 23 Jun 2015 16:09:23 +0200 Subject: [PATCH 0463/2091] drivers: clk: st: Fix flexgen lock init commit 0f4f2afd4402883a51ad27a1d9e046643bb1e3cb upstream. While proving lock, the following warning happens and it is fixed after initializing lock in the setup function INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.27-02861-g39df285-dirty #33 [] (unwind_backtrace+0x0/0xf4) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (__lock_acquire+0x900/0xb14) [] (__lock_acquire+0x900/0xb14) from [] (lock_acquire+0x68/0x7c) [] (lock_acquire+0x68/0x7c) from [] (_raw_spin_lock_irqsave+0x48/0x5c) [] (_raw_spin_lock_irqsave+0x48/0x5c) from [] (clk_gate_endisable+0x28/0x88) [] (clk_gate_endisable+0x28/0x88) from [] (clk_gate_enable+0xc/0x14) [] (clk_gate_enable+0xc/0x14) from [] (flexgen_enable+0x28/0x40) [] (flexgen_enable+0x28/0x40) from [] (__clk_enable+0x5c/0x9c) [] (__clk_enable+0x5c/0x9c) from [] (clk_enable+0x18/0x2c) [] (clk_enable+0x18/0x2c) from [] (st_lpc_of_register+0xc0/0x248) [] (st_lpc_of_register+0xc0/0x248) from [] (clocksource_of_init+0x34/0x58) [] (clocksource_of_init+0x34/0x58) from [] (sti_timer_init+0x10/0x18) [] (sti_timer_init+0x10/0x18) from [] (time_init+0x20/0x30) [] (time_init+0x20/0x30) from [] (start_kernel+0x20c/0x2e8) [] (start_kernel+0x20c/0x2e8) from [<40008074>] (0x40008074) Signed-off-by: Giuseppe Cavallaro Signed-off-by: Gabriel Fernandez Fixes: b116517055b7 ("clk: st: STiH407: Support for Flexgen Clocks") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/st/clk-flexgen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index bf12a25eb3a22..0f8db28353c56 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -303,6 +303,8 @@ void __init st_of_flexgen_setup(struct device_node *np) if (!rlock) goto err; + spin_lock_init(rlock); + for (i = 0; i < clk_data->clk_num; i++) { struct clk *clk; const char *clk_name; From 679125858bcf81b47a21e10cdcf769c66f8831d7 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:25 +0200 Subject: [PATCH 0464/2091] drivers: clk: st: Fix mux bit-setting for Cortex A9 clocks commit 3be6d8ce639d92e60d144fb99dd74a53fe3799bb upstream. This patch fixes the mux bit-setting for ClockgenA9. Signed-off-by: Gabriel Fernandez Fixes: 13e6f2da1ddf ("clk: st: STiH407: Support for A9 MUX Clocks") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/st/clkgen-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index fdcff10f6d308..ef6514636bfc9 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -582,7 +582,7 @@ static struct clkgen_mux_data stih416_a9_mux_data = { }; static struct clkgen_mux_data stih407_a9_mux_data = { .offset = 0x1a4, - .shift = 1, + .shift = 0, .width = 2, }; From 8cc9a81362527fcd4a7249cbec8feb244b83772a Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 7 Jul 2015 09:40:50 +0200 Subject: [PATCH 0465/2091] drivers: clk: st: Incorrect register offset used for lock_status commit 56551da9255f20ffd3a9711728a1a3ad4b7100af upstream. Incorrect register offset used for sthi407 clockgenC Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Fixes: 51306d56ba81 ("clk: st: STiH407: Support for clockgenC0") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/st/clkgen-fsyn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index a917c4c7eaa9c..6ae068ab07c85 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -340,7 +340,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { CLKGEN_FIELD(0x30c, 0xf, 20), CLKGEN_FIELD(0x310, 0xf, 20) }, .lockstatus_present = true, - .lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24), + .lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24), .powerup_polarity = 1, .standby_polarity = 1, .pll_ops = &st_quadfs_pll_c32_ops, From 848bd661126da2386b526d3dd0193b6564a2731d Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Mon, 29 Jun 2015 19:41:49 +0100 Subject: [PATCH 0466/2091] mac80211: clear subdir_stations when removing debugfs commit 4479004e6409087d1b4986881dc98c6c15dffb28 upstream. If we don't do this, and we then fail to recreate the debugfs directory during a mode change, then we will fail later trying to add stations to this now bogus directory: BUG: unable to handle kernel NULL pointer dereference at 0000006c IP: [] mutex_lock+0x12/0x30 Call Trace: [] start_creating+0x44/0xc0 [] debugfs_create_dir+0x13/0xf0 [] ieee80211_sta_debugfs_add+0x6e/0x490 [mac80211] Signed-off-by: Tom Hughes Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/debugfs_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 29236e832e444..c09c0131bfa22 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -723,6 +723,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) debugfs_remove_recursive(sdata->vif.debugfs_dir); sdata->vif.debugfs_dir = NULL; + sdata->debugfs.subdir_stations = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) From 59366187ef6725cf9c37aec5dd2e6c80aa05a24a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 17 Jul 2015 09:38:43 +0200 Subject: [PATCH 0467/2091] Subject: pinctrl: imx1-core: Fix debug output in .pin_config_set callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9571b25df1dbf4db17191b54f59734e8b77fd591 upstream. imx1_pinconf_set assumes that the array of pins in struct imx1_pinctrl_soc_info can be indexed by pin id to get the pinctrl_pin_desc for a pin. This used to be correct up to commit 607af165c047 which removed some entries from the array and so made it wrong to access the array by pin id. The result of this bug is a wrong pin name in the output for small pin ids and an oops for the bigger ones. This patch is the result of a discussion that includes patches by Markus Pargmann and Chris Ruehl. Fixes: 607af165c047 ("pinctrl: i.MX27: Remove nonexistent pad definitions") Reported-by: Chris Ruehl Signed-off-by: Uwe Kleine-König Reviewed-by: Markus Pargmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-imx1-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c index 5ac59fbb2440f..d3a3be7476e19 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c +++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c @@ -403,14 +403,13 @@ static int imx1_pinconf_set(struct pinctrl_dev *pctldev, unsigned num_configs) { struct imx1_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev); - const struct imx1_pinctrl_soc_info *info = ipctl->info; int i; for (i = 0; i != num_configs; ++i) { imx1_write_bit(ipctl, pin_id, configs[i] & 0x01, MX1_PUEN); dev_dbg(ipctl->dev, "pinconf set pullup pin %s\n", - info->pins[pin_id].name); + pin_desc_get(pctldev, pin_id)->name); } return 0; From 4647b34f1a3b08b70f8e20436c7dbec721d47f35 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 17 Jul 2015 14:15:30 -0500 Subject: [PATCH 0468/2091] mnt: Clarify and correct the disconnect logic in umount_tree commit f2d0a123bcf16d1a9cf7942ddc98e0ef77862c2b upstream. rmdir mntpoint will result in an infinite loop when there is a mount locked on the mountpoint in another mount namespace. This is because the logic to test to see if a mount should be disconnected in umount_tree is buggy. Move the logic to decide if a mount should remain connected to it's mountpoint into it's own function disconnect_mount so that clarity of expression instead of terseness of expression becomes a virtue. When the conditions where it is invalid to leave a mount connected are first ruled out, the logic for deciding if a mount should be disconnected becomes much clearer and simpler. Fixes: e0c9c0afd2fc958ffa34b697972721d81df8a56f mnt: Update detach_mounts to leave mounts connected Fixes: ce07d891a0891d3c0d0c2d73d577490486b809e1 mnt: Honor MNT_LOCKED when detaching mounts Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 35 +++++++++++++++++++++++++++++++---- fs/pnode.h | 2 -- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 02c6875dd9457..fcd4f7bc5e88f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1350,6 +1350,36 @@ enum umount_tree_flags { UMOUNT_PROPAGATE = 2, UMOUNT_CONNECTED = 4, }; + +static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how) +{ + /* Leaving mounts connected is only valid for lazy umounts */ + if (how & UMOUNT_SYNC) + return true; + + /* A mount without a parent has nothing to be connected to */ + if (!mnt_has_parent(mnt)) + return true; + + /* Because the reference counting rules change when mounts are + * unmounted and connected, umounted mounts may not be + * connected to mounted mounts. + */ + if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) + return true; + + /* Has it been requested that the mount remain connected? */ + if (how & UMOUNT_CONNECTED) + return false; + + /* Is the mount locked such that it needs to remain connected? */ + if (IS_MNT_LOCKED(mnt)) + return false; + + /* By default disconnect the mount */ + return true; +} + /* * mount_lock must be held * namespace_sem must be held for write @@ -1387,10 +1417,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - disconnect = !(((how & UMOUNT_CONNECTED) && - mnt_has_parent(p) && - (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || - IS_MNT_LOCKED_AND_LAZY(p)); + disconnect = disconnect_mount(p, how); pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, disconnect ? &unmounted : NULL); diff --git a/fs/pnode.h b/fs/pnode.h index 7114ce6e6b9ef..0fcdbe7ca6480 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -20,8 +20,6 @@ #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) -#define IS_MNT_LOCKED_AND_LAZY(m) \ - (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 From 3af9ac3e2251ef2424bc4745fa27458bf134da3c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 17 Jul 2015 14:54:27 -0500 Subject: [PATCH 0469/2091] mnt: In detach_mounts detach the appropriate unmounted mount commit fe78fcc85a2046c51f1535710996860557eeec20 upstream. The handling of in detach_mounts of unmounted but connected mounts is buggy and can lead to an infinite loop. Correct the handling of unmounted mounts in detach_mount. When the mountpoint of an unmounted but connected mount is connected to a dentry, and that dentry is deleted we need to disconnect that mount from the parent mount and the deleted dentry. Nothing changes for the unmounted and connected children. They can be safely ignored. Fixes: ce07d891a0891d3c0d0c2d73d577490486b809e1 mnt: Honor MNT_LOCKED when detaching mounts Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index fcd4f7bc5e88f..fce3cc1a3fa77 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1554,11 +1554,8 @@ void __detach_mounts(struct dentry *dentry) while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { - struct mount *p, *tmp; - list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { - hlist_add_head(&p->mnt_umount.s_list, &unmounted); - umount_mnt(p); - } + hlist_add_head(&mnt->mnt_umount.s_list, &unmounted); + umount_mnt(mnt); } else umount_tree(mnt, UMOUNT_CONNECTED); } From 23713b4de718469da486aca7f27d8983b60d7622 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 24 Jul 2015 10:38:12 -0400 Subject: [PATCH 0470/2091] ftrace: Fix breakage of set_ftrace_pid commit e3eea1404f5ff7a2ceb7b5e7ba412a6fd94f2935 upstream. Commit 4104d326b670 ("ftrace: Remove global function list and call function directly") simplified the ftrace code by removing the global_ops list with a new design. But this cleanup also broke the filtering of PIDs that are added to the set_ftrace_pid file. Add back the proper hooks to have pid filtering working once again. Reported-by: Matt Fleming Reported-by: Richard Weinberger Tested-by: Matt Fleming Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 52 +++++++++++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1da602982cf93..6cd8c0ee4b6f8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -116,6 +116,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * SAVE_REGS. If another ops with this flag set is already registered * for any of the functions that this ops will be registered for, then * this ops will fail to register or set_filter_ip. + * PID - Is affected by set_ftrace_pid (allows filtering on those pids) */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -132,6 +133,7 @@ enum { FTRACE_OPS_FL_MODIFYING = 1 << 11, FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, FTRACE_OPS_FL_IPMODIFY = 1 << 13, + FTRACE_OPS_FL_PID = 1 << 14, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -159,6 +161,7 @@ struct ftrace_ops { struct ftrace_ops *next; unsigned long flags; void *private; + ftrace_func_t saved_func; int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE int nr_trampolines; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02bece4a99ea3..eb11011b5292a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -98,6 +98,13 @@ struct ftrace_pid { struct pid *pid; }; +static bool ftrace_pids_enabled(void) +{ + return !list_empty(&ftrace_pids); +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops); + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op, regs); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; + op->saved_func(ip, parent_ip, op, regs); } /** @@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; } static void control_ops_disable_all(struct ftrace_ops *ops) @@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + /* Always save the function, and reset at unregistering */ + ops->saved_func = ops->func; + + if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled()) + ops->func = ftrace_pid_func; + ftrace_update_trampoline(ops); if (ftrace_enabled) @@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); + ops->func = ops->saved_func; + return 0; } static void ftrace_update_pid_func(void) { + bool enabled = ftrace_pids_enabled(); + struct ftrace_ops *op; + /* Only do something if we are tracing something */ if (ftrace_trace_function == ftrace_stub) return; + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PID) { + op->func = enabled ? ftrace_pid_func : + op->saved_func; + ftrace_update_trampoline(op); + } + } while_for_each_ftrace_op(op); + update_ftrace_function(); } @@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = { .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED, + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; /* @@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; static int __init ftrace_nodyn_init(void) @@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) if (WARN_ON(tr->ops->func != ftrace_stub)) printk("ftrace ops had %pS for function\n", tr->ops->func); - /* Only the top level instance does pid tracing */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } } tr->ops->func = func; tr->ops->private = tr; @@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos) { mutex_lock(&ftrace_lock); - if (list_empty(&ftrace_pids) && (!*pos)) + if (!ftrace_pids_enabled() && (!*pos)) return (void *) 1; return seq_list_start(&ftrace_pids, *pos); @@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, From 6cbebdadb406e308d845c0b88d79fe3e2275760d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 14 Jul 2015 14:48:53 -0600 Subject: [PATCH 0471/2091] iommu/vt-d: Fix VM domain ID leak commit 46ebb7af7b93792de65e124e1ab8b89a108a41f2 upstream. This continues the attempt to fix commit fb170fb4c548 ("iommu/vt-d: Introduce helper functions to make code symmetric for readability"). The previous attempt in commit 71684406905f ("iommu/vt-d: Detach domain *only* from attached iommus") overlooked the fact that dmar_domain.iommu_bmp gets cleared for VM domains when devices are detached: intel_iommu_detach_device domain_remove_one_dev_info domain_detach_iommu The domain is detached from the iommu, but the iommu is still attached to the domain, for whatever reason. Thus when we get to domain_exit(), we can't rely on iommu_bmp for VM domains to find the active iommus, we must check them all. Without that, the corresponding bit in intel_iommu.domain_ids doesn't get cleared and repeated VM domain creation and destruction will run out of domain IDs. Meanwhile we still can't call iommu_detach_domain() on arbitrary non-VM domains or we risk clearing in-use domain IDs, as 71684406905f attempted to address. It's tempting to modify iommu_detach_domain() to test the domain iommu_bmp, but the call ordering from domain_remove_one_dev_info() prevents it being able to work as fb170fb4c548 seems to have intended. Caching of unused VM domains on the iommu object seems to be the root of the problem, but this code is far too fragile for that kind of rework to be proposed for stable, so we simply revert this chunk to its state prior to fb170fb4c548. Fixes: fb170fb4c548 ("iommu/vt-d: Introduce helper functions to make code symmetric for readability") Fixes: 71684406905f ("iommu/vt-d: Detach domain *only* from attached iommus") Signed-off-by: Alex Williamson Cc: Jiang Liu Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5ecfaf29933ad..c87c4b1bfc005 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1756,8 +1756,9 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; struct page *freelist = NULL; - int i; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1777,8 +1778,10 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); - for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) - iommu_detach_domain(domain, g_iommus[i]); + for_each_active_iommu(iommu, drhd) + if (domain_type_is_vm(domain) || + test_bit(iommu->seq_id, domain->iommu_bmp)) + iommu_detach_domain(domain, iommu); rcu_read_unlock(); dma_free_pagelist(freelist); From 2ebb372240adabb59cc2671e64a85c2cc6f803b8 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 16 Jun 2015 16:07:17 +0530 Subject: [PATCH 0472/2091] mmc: omap_hsmmc: Fix DTO and DCRC handling commit 408806f740497c5d71f9c305b3d6aad260ff186d upstream. DTO/DCRC errors were not being informed to the mmc core since commit ae4bf788ee9b ("mmc: omap_hsmmc: consolidate error report handling of HSMMC IRQ"). This commit made sure 'end_trans' is never set on DTO/DCRC errors. This is because after this commit 'host->data' is checked after it has been cleared to NULL by omap_hsmmc_dma_cleanup(). Because 'end_trans' is never set, omap_hsmmc_xfer_done() is never invoked making core layer not to be aware of DTO/DCRC errors. Because of this any command invoked after DTO/DCRC error leads to a hang. Fix this by checking for 'host->data' before it is actually cleared. Fixes: ae4bf788ee9b ("mmc: omap_hsmmc: consolidate error report handling of HSMMC IRQ") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Vignesh R Tested-by: Andreas Fenkart Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/omap_hsmmc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 9df2b6801f767..d0abdffb0d7c2 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1062,6 +1062,10 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status) if (status & (CTO_EN | CCRC_EN)) end_cmd = 1; + if (host->data || host->response_busy) { + end_trans = !end_cmd; + host->response_busy = 0; + } if (status & (CTO_EN | DTO_EN)) hsmmc_command_incomplete(host, -ETIMEDOUT, end_cmd); else if (status & (CCRC_EN | DCRC_EN)) @@ -1081,10 +1085,6 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status) } dev_dbg(mmc_dev(host->mmc), "AC12 err: 0x%x\n", ac12); } - if (host->data || host->response_busy) { - end_trans = !end_cmd; - host->response_busy = 0; - } } OMAP_HSMMC_WRITE(host->base, STAT, status); From 7a64ba1a070e33af42f21811886497ee33030670 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 22 Jun 2015 11:41:23 +0800 Subject: [PATCH 0473/2091] mmc: sdhci check parameters before call dma_free_coherent commit 7ac020366b0a436d726408841160b5dc32c19214 upstream. We should not call dma_free_coherent if host->adma_table is NULL, otherwise may trigger panic. Fixes: d1e49f77d7c7 ("mmc: sdhci: convert ADMA descriptors to a...") Signed-off-by: Peng Fan Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index d3dbb28057e9b..bec8a307f8cd3 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3037,8 +3037,11 @@ int sdhci_add_host(struct sdhci_host *host) GFP_KERNEL); host->align_buffer = kmalloc(host->align_buffer_sz, GFP_KERNEL); if (!host->adma_table || !host->align_buffer) { - dma_free_coherent(mmc_dev(mmc), host->adma_table_sz, - host->adma_table, host->adma_addr); + if (host->adma_table) + dma_free_coherent(mmc_dev(mmc), + host->adma_table_sz, + host->adma_table, + host->adma_addr); kfree(host->align_buffer); pr_warn("%s: Unable to allocate ADMA buffers - falling back to standard DMA\n", mmc_hostname(mmc)); From a53ccf3cfe09ff026dd0d38a4211bf5766743ba3 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Wed, 22 Jul 2015 16:44:26 +0200 Subject: [PATCH 0474/2091] mmc: sdhci-esdhc: Make 8BIT bus work commit 8e91125ff3f57f15c6568e2a6d32743b3f7815e4 upstream. Support for 8BIT bus with was added some time ago to sdhci-esdhc but then missed to remove the 8BIT from the reserved bit mask which made 8BIT non functional. Fixes: 66b50a00992d ("mmc: esdhc: Add support for 8-bit bus width and..") Signed-off-by: Joakim Tjernlund Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h index 3497cfaf683c5..a870c42731d7a 100644 --- a/drivers/mmc/host/sdhci-esdhc.h +++ b/drivers/mmc/host/sdhci-esdhc.h @@ -45,6 +45,6 @@ #define ESDHC_DMA_SYSCTL 0x40c #define ESDHC_DMA_SNOOP 0x00000040 -#define ESDHC_HOST_CONTROL_RES 0x05 +#define ESDHC_HOST_CONTROL_RES 0x01 #endif /* _DRIVERS_MMC_SDHCI_ESDHC_H */ From 5ca991392ffc8fb1aaff9008fb3a2471ce578120 Mon Sep 17 00:00:00 2001 From: Jingju Hou Date: Thu, 23 Jul 2015 17:56:23 +0800 Subject: [PATCH 0475/2091] mmc: sdhci-pxav3: fix platform_data is not initialized commit 9cd76049f0d90ae241f5ad80e311489824527000 upstream. pdev->dev.platform_data is not initialized if match is true in function sdhci_pxav3_probe. Just local variable pdata is assigned the return value from function pxav3_get_mmc_pdata(). static int sdhci_pxav3_probe(struct platform_device *pdev) { struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; ... if (match) { ret = mmc_of_parse(host->mmc); if (ret) goto err_of_parse; sdhci_get_of_property(pdev); pdata = pxav3_get_mmc_pdata(dev); } ... } Signed-off-by: Jingju Hou Fixes: b650352dd3df("mmc: sdhci-pxa: Add device tree support") Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pxav3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index b5103a247bc1b..065dc70caa1d1 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -411,6 +411,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev) goto err_of_parse; sdhci_get_of_property(pdev); pdata = pxav3_get_mmc_pdata(dev); + pdev->dev.platform_data = pdata; } else if (pdata) { /* on-chip device */ if (pdata->flags & PXA_FLAG_CARD_PERMANENT) From 103c46c95b48b228c98a8751726ab3bd9e90b525 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Sun, 21 Jun 2015 14:20:25 +0800 Subject: [PATCH 0476/2091] HID: cp2112: fix to force single data-report reply commit 6debce6f4e787a8eb4cec94e7afa85fb4f40db27 upstream. Current implementation of cp2112_raw_event() only accepts one data report at a time. If last received data report is not fully handled yet, a new incoming data report will overwrite it. In such case we don't guaranteed to propagate the correct incoming data. The trivial fix implemented here forces a single report at a time by requesting in cp2112_read() no more than 61 byte of data, which is the payload size of a single data report. Signed-off-by: Antonio Borneo Tested-by: Ellen Wang Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-cp2112.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 3318de690e006..a2dbbbe0d8d7e 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -356,6 +356,8 @@ static int cp2112_read(struct cp2112_device *dev, u8 *data, size_t size) struct cp2112_force_read_report report; int ret; + if (size > sizeof(dev->read_data)) + size = sizeof(dev->read_data); report.report = CP2112_DATA_READ_FORCE_SEND; report.length = cpu_to_be16(size); From bed0400e01d7ade4be054c909817fd1d3a591012 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 31 May 2015 21:44:22 +0300 Subject: [PATCH 0477/2091] iwlwifi: mvm: fix antenna selection when BT is active commit 923a8c1d8069104726bde55c37cec66324ccc328 upstream. When BT is active, we want to avoid the shared antenna for management frame to make sure we don't disturb BT. There was a bug in that code because it chose the antenna BIT(ANT_A) where ANT_A is already a bitmap (0x1). This means that the antenna chosen in the end was ANT_B. While this is not optimal on devices with 2 antennas (it'd disturb BT), it is critical on single antenna devices like 3160 which couldn't connect at all when BT was active. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=97181 Fixes: 34c8b24ff284 ("iwlwifi: mvm: BT Coex - avoid the shared antenna for management frames") Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index ef32e177f662b..281451c274ca3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -225,7 +225,7 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, if (info->band == IEEE80211_BAND_2GHZ && !iwl_mvm_bt_coex_is_shared_ant_avail(mvm)) - rate_flags = BIT(mvm->cfg->non_shared_ant) << RATE_MCS_ANT_POS; + rate_flags = mvm->cfg->non_shared_ant << RATE_MCS_ANT_POS; else rate_flags = BIT(mvm->mgmt_last_antenna_idx) << RATE_MCS_ANT_POS; From a8bc0fe729d868ffec8a1050925da446da32a01e Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Wed, 1 Jul 2015 17:28:34 +0300 Subject: [PATCH 0478/2091] iwlwifi: nvm: remove mac address byte swapping in 8000 family commit be88a1ada9b97bb016196b7f4a1fc2fe2f798529 upstream. This fixes the byte order copying in the MAO (Mac Override Section) section from the PNVM, as the byte swapping is not required anymore in the 8000 family. Due to the byte swapping, the driver was reporting an incorrect MAC adddress. Signed-off-by: Liad Kaufman Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/iwl-nvm-parse.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 8e604a3931ca6..ef20be084b24f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -540,13 +540,11 @@ static void iwl_set_hw_address_family_8000(struct device *dev, hw_addr = (const u8 *)(mac_override + MAC_ADDRESS_OVERRIDE_FAMILY_8000); - /* The byte order is little endian 16 bit, meaning 214365 */ - data->hw_addr[0] = hw_addr[1]; - data->hw_addr[1] = hw_addr[0]; - data->hw_addr[2] = hw_addr[3]; - data->hw_addr[3] = hw_addr[2]; - data->hw_addr[4] = hw_addr[5]; - data->hw_addr[5] = hw_addr[4]; + /* + * Store the MAC address from MAO section. + * No byte swapping is required in MAO section + */ + memcpy(data->hw_addr, hw_addr, ETH_ALEN); /* * Force the use of the OTP MAC address in case of reserved MAC From 5c5aba569ebf67a8bad4bf258af4111e42101654 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 4 Jun 2015 11:09:47 +0300 Subject: [PATCH 0479/2091] iwlwifi: pcie: prepare the device before accessing it commit f9e5554cd8ca1d1212ec922755b397a20f737923 upstream. For 8000 series, we need to access the device to know what firmware to load. Before we do so, we need to prepare the device otherwise we might not be able to access the hardware. Fixes: c278754a21e6 ("iwlwifi: mvm: support family 8000 B2/C steps") Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index dc179094e6a0d..37e6a6f914872 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -2515,6 +2515,12 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans->hw_rev = (trans->hw_rev & 0xfff0) | (CSR_HW_REV_STEP(trans->hw_rev << 2) << 2); + ret = iwl_pcie_prepare_card_hw(trans); + if (ret) { + IWL_WARN(trans, "Exit HW not ready\n"); + goto out_pci_disable_msi; + } + /* * in-order to recognize C step driver should read chip version * id located at the AUX bus MISC address space. From 4731b65de6b30b7a8e771c51e4fb4a53846f158e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 24 Jul 2015 09:22:16 +1000 Subject: [PATCH 0480/2091] md/raid1: fix test for 'was read error from last working device'. commit 34cab6f42003cb06f48f86a86652984dec338ae9 upstream. When we get a read error from the last working device, we don't try to repair it, and don't fail the device. We simple report a read error to the caller. However the current test for 'is this the last working device' is wrong. When there is only one fully working device, it assumes that a non-faulty device is that device. However a spare which is rebuilding would be non-faulty but so not the only working device. So change the test from "!Faulty" to "In_sync". If ->degraded says there is only one fully working device and this device is in_sync, this must be the one. This bug has existed since we allowed read_balance to read from a recovering spare in v3.0 Reported-and-tested-by: Alexander Lyakas Fixes: 76073054c95b ("md/raid1: clean up read_balance.") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9157a29c8dbf1..cd7b0c1e882d7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error) spin_lock_irqsave(&conf->device_lock, flags); if (r1_bio->mddev->degraded == conf->raid_disks || (r1_bio->mddev->degraded == conf->raid_disks-1 && - !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))) + test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) uptodate = 1; spin_unlock_irqrestore(&conf->device_lock, flags); } From 19ea7491d0b3555c333ceacaf489c51fa3d6e89e Mon Sep 17 00:00:00 2001 From: Sifan Naeem Date: Thu, 18 Jun 2015 13:50:54 +0100 Subject: [PATCH 0481/2091] spi: img-spfi: fix support for speeds up to 1/4th input clock commit 6a806a214af42ac951e2d85e64d1bf4463482e16 upstream. Setting the Same Edge bit indicates to the spfi block to receive and transmit data on the same edge of the spfi clock, which in turn doubles the operating frequency of spfi. The maximum supported frequency is limited to 1/4th of the spfi input clock, but without this bit set the maximum would be 1/8th of the input clock. The current driver calculates the divisor with maximum speed at 1/4th of the input clock, this would fail if the requested frequency is higher than 1/8 of the input clock. Any requests for 1/8th of the input clock would still pass. Fixes: 8543d0e72d43 ("spi: img-spfi: Limit bit clock to 1/4th of input clock") Signed-off-by: Sifan Naeem Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-img-spfi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 788e2b176a4f7..acce90ac7371d 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -40,6 +40,7 @@ #define SPFI_CONTROL_SOFT_RESET BIT(11) #define SPFI_CONTROL_SEND_DMA BIT(10) #define SPFI_CONTROL_GET_DMA BIT(9) +#define SPFI_CONTROL_SE BIT(8) #define SPFI_CONTROL_TMODE_SHIFT 5 #define SPFI_CONTROL_TMODE_MASK 0x7 #define SPFI_CONTROL_TMODE_SINGLE 0 @@ -491,6 +492,7 @@ static void img_spfi_config(struct spi_master *master, struct spi_device *spi, else if (xfer->tx_nbits == SPI_NBITS_QUAD && xfer->rx_nbits == SPI_NBITS_QUAD) val |= SPFI_CONTROL_TMODE_QUAD << SPFI_CONTROL_TMODE_SHIFT; + val |= SPFI_CONTROL_SE; spfi_writel(spfi, val, SPFI_CONTROL); } From 56e8479a04db9085278e17e0eac594927d327c5d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Jul 2015 15:01:08 +0200 Subject: [PATCH 0482/2091] spi: imx: Fix small DMA transfers commit f6ee9b582d2db652497b73c1f117591dfb6d3a90 upstream. DMA transfers must be greater than the watermark level size. spi_imx->rx_wml and spi_imx->tx_wml contain the watermark level in 32bit words whereas struct spi_transfer contains the transfer len in bytes. Fix the check if DMA is possible for a transfer accordingly. This fixes transfers with sizes between 33 and 128 bytes for which previously was claimed that DMA is possible. Fixes: f62caccd12c17e4 (spi: spi-imx: add DMA support) Signed-off-by: Sascha Hauer Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-imx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index f08e812b29847..412b9c86b9972 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -201,8 +201,9 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi, { struct spi_imx_data *spi_imx = spi_master_get_devdata(master); - if (spi_imx->dma_is_inited && (transfer->len > spi_imx->rx_wml) - && (transfer->len > spi_imx->tx_wml)) + if (spi_imx->dma_is_inited + && transfer->len > spi_imx->rx_wml * sizeof(u32) + && transfer->len > spi_imx->tx_wml * sizeof(u32)) return true; return false; } From bec2057fee79fe271a629c98feb2201faba4ff2c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 23 Jul 2015 14:11:09 -0400 Subject: [PATCH 0483/2091] tile: use free_bootmem_late() for initrd commit 3f81d2447b37ac697b3c600039f2c6b628c06e21 upstream. We were previously using free_bootmem() and just getting lucky that nothing too bad happened. Signed-off-by: Chris Metcalf Signed-off-by: Greg Kroah-Hartman --- arch/tile/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index d366675e4bf88..396b5c96e2724 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1139,7 +1139,7 @@ static void __init load_hv_initrd(void) void __init free_initrd_mem(unsigned long begin, unsigned long end) { - free_bootmem(__pa(begin), end - begin); + free_bootmem_late(__pa(begin), end - begin); } static int __init setup_initrd(char *str) From 106c930a30784cb783b8ad9d4e685fd1d0c0cbe7 Mon Sep 17 00:00:00 2001 From: Bernhard Bender Date: Thu, 23 Jul 2015 13:58:08 -0700 Subject: [PATCH 0484/2091] Input: usbtouchscreen - avoid unresponsive TSC-30 touch screen commit 968491709e5b1aaf429428814fff3d932fa90b60 upstream. This patch fixes a problem in the usbtouchscreen driver for DMC TSC-30 touch screen. Due to a missing delay between the RESET and SET_RATE commands, the touch screen may become unresponsive during system startup or driver loading. According to the DMC documentation, a delay is needed after the RESET command to allow the chip to complete its internal initialization. As this delay is not guaranteed, we had a system where the touch screen occasionally did not send any touch data. There was no other indication of the problem. The patch fixes the problem by adding a 150ms delay between the RESET and SET_RATE commands. Suggested-by: Jakob Mustafa Signed-off-by: Bernhard Bender Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/usbtouchscreen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index f2c6c352c55af..2c41107240dec 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -627,6 +627,9 @@ static int dmc_tsc10_init(struct usbtouch_usb *usbtouch) goto err_out; } + /* TSC-25 data sheet specifies a delay after the RESET command */ + msleep(150); + /* set coordinate output rate */ buf[0] = buf[1] = 0xFF; ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0), From 1fbb15f220a6740327764b3775566771a4746fe6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Jul 2015 18:05:53 -0400 Subject: [PATCH 0485/2091] blkcg: fix gendisk reference leak in blkg_conf_prep() commit 5f6c2d2b7dbb541c1e922538c49fa04c494ae3d7 upstream. When a blkcg configuration is targeted to a partition rather than a whole device, blkg_conf_prep fails with -EINVAL; unfortunately, it forgets to put the gendisk ref in that case. Fix it. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-cgroup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ac817b750dbc..6817e28960b7e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -716,8 +716,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, return -EINVAL; disk = get_gendisk(MKDEV(major, minor), &part); - if (!disk || part) + if (!disk) return -EINVAL; + if (part) { + put_disk(disk); + return -EINVAL; + } rcu_read_lock(); spin_lock_irq(disk->queue->queue_lock); From e4cadcc1c4595b336d7f9f9b1fb51c5afdf9a6f5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 24 Jun 2015 19:48:43 +0900 Subject: [PATCH 0486/2091] regulator: s2mps11: Fix GPIO suspend enable shift wrapping bug commit 32c848e33ace75fce388cceff76223d12b46eaa3 upstream. Status of enabling suspend mode for regulator was stored in bitmap-like long integer. However since adding support for S2MPU02 the number of regulators exceeded 32 so on devices with more than 32 regulators (S2MPU02 and S2MPS13) overflow happens when shifting the bit. This could lead to enabling suspend mode for completely different regulator than intended or to switching different regulator to other mode (e.g. from always enabled to controlled by PWRHOLD pin). Both cases could result in larger energy usage and issues when suspending to RAM. Fixes: 00e2573d2c10 ("regulator: s2mps11: Add support S2MPU02 regulator device") Reported-by: Dan Carpenter Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s2mps11.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index ff828117798fd..8de135174e820 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -34,6 +34,8 @@ #include #include +/* The highest number of possible regulators for supported devices. */ +#define S2MPS_REGULATOR_MAX S2MPS13_REGULATOR_MAX struct s2mps11_info { unsigned int rdev_num; int ramp_delay2; @@ -49,7 +51,7 @@ struct s2mps11_info { * One bit for each S2MPS13/S2MPS14/S2MPU02 regulator whether * the suspend mode was enabled. */ - unsigned long long s2mps14_suspend_state:50; + DECLARE_BITMAP(suspend_state, S2MPS_REGULATOR_MAX); /* Array of size rdev_num with GPIO-s for external sleep control */ int *ext_control_gpio; @@ -500,7 +502,7 @@ static int s2mps14_regulator_enable(struct regulator_dev *rdev) switch (s2mps11->dev_type) { case S2MPS13X: case S2MPS14X: - if (s2mps11->s2mps14_suspend_state & (1 << rdev_get_id(rdev))) + if (test_bit(rdev_get_id(rdev), s2mps11->suspend_state)) val = S2MPS14_ENABLE_SUSPEND; else if (gpio_is_valid(s2mps11->ext_control_gpio[rdev_get_id(rdev)])) val = S2MPS14_ENABLE_EXT_CONTROL; @@ -508,7 +510,7 @@ static int s2mps14_regulator_enable(struct regulator_dev *rdev) val = rdev->desc->enable_mask; break; case S2MPU02: - if (s2mps11->s2mps14_suspend_state & (1 << rdev_get_id(rdev))) + if (test_bit(rdev_get_id(rdev), s2mps11->suspend_state)) val = S2MPU02_ENABLE_SUSPEND; else val = rdev->desc->enable_mask; @@ -562,7 +564,7 @@ static int s2mps14_regulator_set_suspend_disable(struct regulator_dev *rdev) if (ret < 0) return ret; - s2mps11->s2mps14_suspend_state |= (1 << rdev_get_id(rdev)); + set_bit(rdev_get_id(rdev), s2mps11->suspend_state); /* * Don't enable suspend mode if regulator is already disabled because * this would effectively for a short time turn on the regulator after @@ -960,18 +962,22 @@ static int s2mps11_pmic_probe(struct platform_device *pdev) case S2MPS11X: s2mps11->rdev_num = ARRAY_SIZE(s2mps11_regulators); regulators = s2mps11_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; case S2MPS13X: s2mps11->rdev_num = ARRAY_SIZE(s2mps13_regulators); regulators = s2mps13_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; case S2MPS14X: s2mps11->rdev_num = ARRAY_SIZE(s2mps14_regulators); regulators = s2mps14_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; case S2MPU02: s2mps11->rdev_num = ARRAY_SIZE(s2mpu02_regulators); regulators = s2mpu02_regulators; + BUILD_BUG_ON(S2MPS_REGULATOR_MAX < s2mps11->rdev_num); break; default: dev_err(&pdev->dev, "Invalid device type: %u\n", From 33f293b92e35ae208aefccdca05bcc268e03e4d2 Mon Sep 17 00:00:00 2001 From: Lior Amsalem Date: Tue, 30 Jun 2015 16:09:49 +0200 Subject: [PATCH 0487/2091] ata: pmp: add quirk for Marvell 4140 SATA PMP commit 945b47441d83d2392ac9f984e0267ad521f24268 upstream. This commit adds the necessary quirk to make the Marvell 4140 SATA PMP work properly. This PMP doesn't like SRST on port number 4 (the host port) so this commit marks this port as not supporting SRST. Signed-off-by: Lior Amsalem Reviewed-by: Nadav Haklai Signed-off-by: Thomas Petazzoni Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-pmp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 7ccc084bf1dfb..85aa76116a305 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -460,6 +460,13 @@ static void sata_pmp_quirks(struct ata_port *ap) ATA_LFLAG_NO_SRST | ATA_LFLAG_ASSUME_ATA; } + } else if (vendor == 0x11ab && devid == 0x4140) { + /* Marvell 4140 quirks */ + ata_for_each_link(link, ap, EDGE) { + /* port 4 is for SEMB device and it doesn't like SRST */ + if (link->pmp == 4) + link->flags |= ATA_LFLAG_DISABLED; + } } } From 6cf671f37e11f4b1b32f6cb2c6094dd923bcce4f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 6 Jul 2015 13:12:32 +0200 Subject: [PATCH 0488/2091] usb-storage: ignore ZTE MF 823 card reader in mode 0x1225 commit 5fb2c782f451a4fb9c19c076e2c442839faf0f76 upstream. This device automatically switches itself to another mode (0x1405) unless the specific access pattern of Windows is followed in its initial mode. That makes a dirty unmount of the internal storage devices inevitable if they are mounted. So the card reader of such a device should be ignored, lest an unclean removal become inevitable. This replaces an earlier patch that ignored all LUNs of this device. That patch was overly broad. Signed-off-by: Oliver Neukum Reviewed-by: Lars Melin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index caf188800c679..87898ca2ed171 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2065,6 +2065,18 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_READ_DISC_INFO ), +/* Reported by Oliver Neukum + * This device morphes spontaneously into another device if the access + * pattern of Windows isn't followed. Thus writable media would be dirty + * if the initial instance is used. So the device is limited to its + * virtual CD. + * And yes, the concept that BCD goes up to 9 is not heeded */ +UNUSUAL_DEV( 0x19d2, 0x1225, 0x0000, 0xffff, + "ZTE,Incorporated", + "ZTE WCDMA Technologies MSM", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_SINGLE_LUN ), + /* Reported by Sven Geggus * This encrypted pen drive returns bogus data for the initial READ(10). */ From 4eede03b97bf6d89681905668e983f99b5847827 Mon Sep 17 00:00:00 2001 From: David Jander Date: Fri, 26 Jun 2015 08:11:30 +0200 Subject: [PATCH 0489/2091] Revert "serial: imx: initialized DMA w/o HW flow enabled" commit 907eda32a36fcdb979bdb91ea097abb3dd2c23c9 upstream. This reverts commit 068500e08dc87ea9a453cc4a500cf3ab28d0f936. According to some tests, SDMA support is broken at least for i.MX6 without HW flow control. Different forms of data-corruption appear either with the ROM firmware for the SDMA controller as well as when loading Freescale provided SDMA firmware versions 1.1 or 3.1. Signed-off-by: David Jander Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 88250395b0ce9..01aa52f574e52 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1132,11 +1132,6 @@ static int imx_startup(struct uart_port *port) while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) && (--i > 0)) udelay(1); - /* Can we enable the DMA support? */ - if (is_imx6q_uart(sport) && !uart_console(port) && - !sport->dma_is_inited) - imx_uart_dma_init(sport); - spin_lock_irqsave(&sport->port.lock, flags); /* @@ -1145,9 +1140,6 @@ static int imx_startup(struct uart_port *port) writel(USR1_RTSD, sport->port.membase + USR1); writel(USR2_ORE, sport->port.membase + USR2); - if (sport->dma_is_inited && !sport->dma_is_enabled) - imx_enable_dma(sport); - temp = readl(sport->port.membase + UCR1); temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN; @@ -1318,6 +1310,11 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, } else { ucr2 |= UCR2_CTSC; } + + /* Can we enable the DMA support? */ + if (is_imx6q_uart(sport) && !uart_console(port) + && !sport->dma_is_inited) + imx_uart_dma_init(sport); } else { termios->c_cflag &= ~CRTSCTS; } @@ -1434,6 +1431,8 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, if (UART_ENABLE_MS(&sport->port, termios->c_cflag)) imx_enable_ms(&sport->port); + if (sport->dma_is_inited && !sport->dma_is_enabled) + imx_enable_dma(sport); spin_unlock_irqrestore(&sport->port.lock, flags); } From df86527517e8cd14f7ebec7302e70590543a49b5 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 12 Jul 2015 21:05:26 -0400 Subject: [PATCH 0490/2091] serial: core: Fix crashes while echoing when closing commit e144c58cad6667876173dd76977e9e6557e34941 upstream. While closing, new rx data may be received after the input buffers have been flushed but before stop_rx() halts receiving [1]. The new data might not be processed by flush_to_ldisc() until after uart_shutdown() and normal input processing is re-enabled (ie., tty->closing = 0). The race is outlined below: CPU 0 | CPU 1 | uart_close() | tty_port_close_start() | tty->closing = 1 | tty_ldisc_flush() | | => IRQ | while (LSR & data ready) | uart_insert_char() | tty_flip_buffer_push() | <= EOI stop_rx() | . uart_shutdown() | . free xmit.buf | . tty_port_tty_set(NULL) | . tty->closing = 0 | . | flush_to_ldisc() | n_tty_receive_buf_common() | __receive_buf() | ... | commit_echoes() | uart_flush_chars() | __uart_start() | ** OOPS on port.tty deref ** tty_ldisc_flush() | Input processing must be prevented from echoing (tty->closing = 1) until _after_ the input buffers have been flushed again at the end of uart_close(). [1] In fact, some input may actually be buffered _after_ stop_rx() since the rx interrupt may have already triggered but not yet been handled when stop_rx() disables rx interrupts. Fixes: 2e758910832d ("serial: core: Flush ldisc after dropping port mutex in uart_close()") Reported-by: Robert Elliott Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0b7bb12dfc68b..ec540445bb71f 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1409,7 +1409,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp) mutex_lock(&port->mutex); uart_shutdown(tty, state); tty_port_tty_set(port, NULL); - tty->closing = 0; + spin_lock_irqsave(&port->lock, flags); if (port->blocked_open) { @@ -1435,6 +1435,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp) mutex_unlock(&port->mutex); tty_ldisc_flush(tty); + tty->closing = 0; } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) From eb9a669517a2eeb2e55fff733191c128b2f55830 Mon Sep 17 00:00:00 2001 From: Brian Campbell Date: Tue, 21 Jul 2015 17:20:28 +0300 Subject: [PATCH 0491/2091] xhci: Calculate old endpoints correctly on device reset commit 326124a027abc9a7f43f72dc94f6f0f7a55b02b3 upstream. When resetting a device the number of active TTs may need to be corrected by xhci_update_tt_active_eps, but the number of old active endpoints supplied to it was always zero, so the number of TTs and the bandwidth reserved for them was not updated, and could rise unnecessarily. This affected systems using Intel's Patherpoint chipset, which rely on software bandwidth checking. For example, a Lenovo X230 would lose the ability to use ports on the docking station after enough suspend/resume cycles because the bandwidth calculated would rise with every cycle when a suitable device is attached. The correct number of active endpoints is calculated in the same way as in xhci_reserve_bandwidth. Signed-off-by: Brian Campbell Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 36bf089b708fe..c502c2277aebf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3453,6 +3453,9 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) return -EINVAL; } + if (virt_dev->tt_info) + old_active_eps = virt_dev->tt_info->active_eps; + if (virt_dev->udev != udev) { /* If the virt_dev and the udev does not match, this virt_dev * may belong to another udev. From 6f0433c52944a822f338f300fff672fa2fd43ac5 Mon Sep 17 00:00:00 2001 From: Zhuang Jin Can Date: Tue, 21 Jul 2015 17:20:29 +0300 Subject: [PATCH 0492/2091] xhci: report U3 when link is in resume state commit 243292a2ad3dc365849b820a64868927168894ac upstream. xhci_hub_report_usb3_link_state() returns pls as U0 when the link is in resume state, and this causes usb core to think the link is in U0 while actually it's in resume state. When usb core transfers control request on the link, it fails with TRB error as the link is not ready for transfer. To fix the issue, report U3 when the link is in resume state, thus usb core knows the link it's not ready for transfer. Signed-off-by: Zhuang Jin Can Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0827d7c965276..9e71c58809137 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -484,10 +484,13 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci, u32 pls = status_reg & PORT_PLS_MASK; /* resume state is a xHCI internal state. - * Do not report it to usb core. + * Do not report it to usb core, instead, pretend to be U3, + * thus usb core knows it's not ready for transfer */ - if (pls == XDEV_RESUME) + if (pls == XDEV_RESUME) { + *status |= USB_SS_PORT_LS_U3; return; + } /* When the CAS bit is set then warm reset * should be performed on port From c65fd970bbb46be9e517d93e67b5e9f27f5e45a0 Mon Sep 17 00:00:00 2001 From: Zhuang Jin Can Date: Tue, 21 Jul 2015 17:20:30 +0300 Subject: [PATCH 0493/2091] xhci: prevent bus_suspend if SS port resuming in phase 1 commit fac4271d1126c45ceaceb7f4a336317b771eb121 upstream. When the link is just waken, it's in Resume state, and driver sets PLS to U0. This refers to Phase 1. Phase 2 refers to when the link has completed the transition from Resume state to U0. With the fix of xhci: report U3 when link is in resume state, it also exposes an issue that usb3 roothub and controller can suspend right after phase 1, and this causes a hard hang in controller. To fix the issue, we need to prevent usb3 bus suspend if any port is resuming in phase 1. [merge separate USB2 and USB3 port resume checking to one -Mathias] Signed-off-by: Zhuang Jin Can Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 6 +++--- drivers/usb/host/xhci-ring.c | 3 +++ drivers/usb/host/xhci.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 9e71c58809137..1cada6aa75707 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1123,10 +1123,10 @@ int xhci_bus_suspend(struct usb_hcd *hcd) spin_lock_irqsave(&xhci->lock, flags); if (hcd->self.root_hub->do_remote_wakeup) { - if (bus_state->resuming_ports) { + if (bus_state->resuming_ports || /* USB2 */ + bus_state->port_remote_wakeup) { /* USB3 */ spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "suspend failed because " - "a port is resuming\n"); + xhci_dbg(xhci, "suspend failed because a port is resuming\n"); return -EBUSY; } } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7d34cbfaf373b..d095677a07021 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1546,6 +1546,9 @@ static void handle_port_status(struct xhci_hcd *xhci, usb_hcd_resume_root_hub(hcd); } + if (hcd->speed == HCD_USB3 && (temp & PORT_PLS_MASK) == XDEV_INACTIVE) + bus_state->port_remote_wakeup &= ~(1 << faked_port_index); + if ((temp & PORT_PLC) && (temp & PORT_PLS_MASK) == XDEV_RESUME) { xhci_dbg(xhci, "port resume event for port %d\n", port_id); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 6977f8491fa7c..0f26dd2697b6e 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -285,6 +285,7 @@ struct xhci_op_regs { #define XDEV_U0 (0x0 << 5) #define XDEV_U2 (0x2 << 5) #define XDEV_U3 (0x3 << 5) +#define XDEV_INACTIVE (0x6 << 5) #define XDEV_RESUME (0xf << 5) /* true: port has power (see HCC_PPC) */ #define PORT_POWER (1 << 9) From 81b75e85593dcbfde8a86cd44bd75cc96a22849e Mon Sep 17 00:00:00 2001 From: Zhuang Jin Can Date: Tue, 21 Jul 2015 17:20:31 +0300 Subject: [PATCH 0494/2091] xhci: do not report PLC when link is in internal resume state commit aca3a0489ac019b58cf32794d5362bb284cb9b94 upstream. Port link change with port in resume state should not be reported to usbcore, as this is an internal state to be handled by xhci driver. Reporting PLC to usbcore may cause usbcore clearing PLC first and port change event irq won't be generated. Signed-off-by: Zhuang Jin Can Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 1cada6aa75707..ee07ba41c8db3 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -591,7 +591,14 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_C_RESET << 16; /* USB3.0 only */ if (hcd->speed == HCD_USB3) { - if ((raw_port_status & PORT_PLC)) + /* Port link change with port in resume state should not be + * reported to usbcore, as this is an internal state to be + * handled by xhci driver. Reporting PLC to usbcore may + * cause usbcore clearing PLC first and port change event + * irq won't be generated. + */ + if ((raw_port_status & PORT_PLC) && + (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME) status |= USB_PORT_STAT_C_LINK_STATE << 16; if ((raw_port_status & PORT_WRC)) status |= USB_PORT_STAT_C_BH_RESET << 16; From 35c8bade335f6d747f4567cfa5b678b6798a2187 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 18 Jun 2015 11:41:03 +0300 Subject: [PATCH 0495/2091] mei: prevent unloading mei hw modules while the device is opened. commit 154322f47376fed6ab1e4b350aa45fffa15a61aa upstream. chrdev_open() increases reference counter on cdev->owner. Instead of assigning the owner to mei subsystem, the owner has to be set to the underlaying HW module (mei_me or mei_txe), so once the device is opened the HW module cannot be unloaded. Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 3e29681595064..e40bcd03bd47a 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -685,7 +685,7 @@ int mei_register(struct mei_device *dev, struct device *parent) /* Fill in the data structures */ devno = MKDEV(MAJOR(mei_devt), dev->minor); cdev_init(&dev->cdev, &mei_fops); - dev->cdev.owner = mei_fops.owner; + dev->cdev.owner = parent->driver->owner; /* Add the device */ ret = cdev_add(&dev->cdev, devno, 1); From 5728ec989adce2f303000b84469750579f162c2d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 20 Jul 2015 16:01:53 -0700 Subject: [PATCH 0496/2091] x86/mm: Add parenthesis for TLB tracepoint size calculation commit bbc03778b9954a2ec93baed63718e4df0192f130 upstream. flush_tlb_info->flush_start/end are both normal virtual addresses. When calculating 'nr_pages' (only used for the tracepoint), I neglected to put parenthesis in. Thanks to David Koufaty for pointing this out. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@sr71.net Link: http://lkml.kernel.org/r/20150720230153.9E834081@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3250f2371aea5..90b924acd9822 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -117,7 +117,7 @@ static void flush_tlb_func(void *info) } else { unsigned long addr; unsigned long nr_pages = - f->flush_end - f->flush_start / PAGE_SIZE; + (f->flush_end - f->flush_start) / PAGE_SIZE; addr = f->flush_start; while (addr < f->flush_end) { __flush_tlb_single(addr); From 07ddeec8a096a8d6e8be2af06703b3e828c15d59 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 30 Jun 2015 15:57:51 -0700 Subject: [PATCH 0497/2091] efi: Handle memory error structures produced based on old versions of standard commit 4c62360d7562a20c996836d163259c87d9378120 upstream. The memory error record structure includes as its first field a bitmask of which subsequent fields are valid. The allows new fields to be added to the structure while keeping compatibility with older software that parses these records. This mechanism was used between versions 2.2 and 2.3 to add four new fields, growing the size of the structure from 73 bytes to 80. But Linux just added all the new fields so this test: if (gdata->error_data_length >= sizeof(*mem_err)) cper_print_mem(newpfx, mem_err); else goto err_section_too_small; now make Linux complain about old format records being too short. Add a definition for the old format of the structure and use that for the minimum size check. Pass the actual size to cper_print_mem() so it can sanity check the validation_bits field to ensure that if a BIOS using the old format sets bits as if it were new, we won't access fields beyond the end of the structure. Signed-off-by: Tony Luck Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/cper.c | 15 ++++++++++++--- include/linux/cper.h | 22 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 4fd9961d552e8..d425374254384 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -305,10 +305,17 @@ const char *cper_mem_err_unpack(struct trace_seq *p, return ret; } -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, + int len) { struct cper_mem_err_compact cmem; + /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ + if (len == sizeof(struct cper_sec_mem_err_old) && + (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { + pr_err(FW_WARN "valid bits set for fields beyond structure\n"); + return; + } if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); if (mem->validation_bits & CPER_MEM_VALID_PA) @@ -405,8 +412,10 @@ static void cper_estatus_print_section( } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); printk("%s""section_type: memory error\n", newpfx); - if (gdata->error_data_length >= sizeof(*mem_err)) - cper_print_mem(newpfx, mem_err); + if (gdata->error_data_length >= + sizeof(struct cper_sec_mem_err_old)) + cper_print_mem(newpfx, mem_err, + gdata->error_data_length); else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { diff --git a/include/linux/cper.h b/include/linux/cper.h index 76abba4b238ec..dcacb1a72e26d 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -340,7 +340,27 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; -/* Memory Error Section */ +/* Old Memory Error Section UEFI 2.1, 2.2 */ +struct cper_sec_mem_err_old { + __u64 validation_bits; + __u64 error_status; + __u64 physical_addr; + __u64 physical_addr_mask; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u8 error_type; +}; + +/* Memory Error Section UEFI >= 2.3 */ struct cper_sec_mem_err { __u64 validation_bits; __u64 error_status; From b3525bdf91087473d4490ab7acc31f59821cc04a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 26 Jul 2015 14:59:00 +0200 Subject: [PATCH 0498/2091] arm64/efi: map the entire UEFI vendor string before reading it commit f91b1feada0b6f0a4d33648155b3ded2c4e0707e upstream. At boot, the UTF-16 UEFI vendor string is copied from the system table into a char array with a size of 100 bytes. However, this size of 100 bytes is also used for memremapping() the source, which may not be sufficient if the vendor string exceeds 50 UTF-16 characters, and the placement of the vendor string inside a 4 KB page happens to leave the end unmapped. So use the correct '100 * sizeof(efi_char16_t)' for the size of the mapping. Signed-off-by: Ard Biesheuvel Fixes: f84d02755f5a ("arm64: add EFI runtime services") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/efi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ab21e0d582788..352962bc2e78a 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -122,12 +122,12 @@ static int __init uefi_init(void) /* Show what we know for posterity */ c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor)); + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = c16[i]; vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor)); + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); } pr_info("EFI v%u.%.02u by %s\n", From c54f557fa580989c304cc5c83528efa5a1819a85 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Wed, 15 Jul 2015 19:36:03 -0700 Subject: [PATCH 0499/2091] efi: Check for NULL efi kernel parameters commit 9115c7589b11349a1c3099758b4bded579ff69e0 upstream. Even though it is documented how to specifiy efi parameters, it is possible to cause a kernel panic due to a dereference of a NULL pointer when parsing such parameters if "efi" alone is given: PANIC: early exception 0e rip 10:ffffffff812fb361 error 0 cr2 0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.2.0-rc1+ #450 [ 0.000000] ffffffff81fe20a9 ffffffff81e03d50 ffffffff8184bb0f 00000000000003f8 [ 0.000000] 0000000000000000 ffffffff81e03e08 ffffffff81f371a1 64656c62616e6520 [ 0.000000] 0000000000000069 000000000000005f 0000000000000000 0000000000000000 [ 0.000000] Call Trace: [ 0.000000] [] dump_stack+0x45/0x57 [ 0.000000] [] early_idt_handler_common+0x81/0xae [ 0.000000] [] ? parse_option_str+0x11/0x90 [ 0.000000] [] arch_parse_efi_cmdline+0x15/0x42 [ 0.000000] [] do_early_param+0x50/0x8a [ 0.000000] [] parse_args+0x1e3/0x400 [ 0.000000] [] parse_early_options+0x24/0x28 [ 0.000000] [] ? loglevel+0x31/0x31 [ 0.000000] [] parse_early_param+0x31/0x3d [ 0.000000] [] setup_arch+0x2de/0xc08 [ 0.000000] [] ? vprintk_default+0x1a/0x20 [ 0.000000] [] start_kernel+0x90/0x423 [ 0.000000] [] x86_64_start_reservations+0x2a/0x2c [ 0.000000] [] x86_64_start_kernel+0xeb/0xef [ 0.000000] RIP 0xffffffff81ba2efc This panic is not reproducible with "efi=" as this will result in a non-NULL zero-length string. Thus, verify that the pointer to the parameter string is not NULL. This is consistent with other parameter-parsing functions which check for NULL pointers. Signed-off-by: Ricardo Neri Cc: Dave Young Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/efi.c | 5 +++++ drivers/firmware/efi/efi.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 02744df576d52..841ea05e1b02c 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -946,6 +946,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) static int __init arch_parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); if (parse_option_str(str, "debug")) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e14363d126906..63226e9036a15 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -57,6 +57,11 @@ bool efi_runtime_disabled(void) static int __init parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "noruntime")) disable_runtime = true; From e8647ec68feb66acd35b0f7fa3177bb47cc6a94e Mon Sep 17 00:00:00 2001 From: Dmitry Skorodumov Date: Tue, 28 Jul 2015 18:38:32 +0400 Subject: [PATCH 0500/2091] x86/efi: Use all 64 bit of efi_memmap in setup_e820() commit 7cc03e48965453b5df1cce5062c826189b04b960 upstream. The efi_info structure stores low 32 bits of memory map in efi_memmap and high 32 bits in efi_memmap_hi. While constructing pointer in the setup_e820(), need to take into account all 64 bit of the pointer. It is because on 64bit machine the function efi_get_memory_map() may return full 64bit pointer and before the patch that pointer was truncated. The issue is triggered on Parallles virtual machine and fixed with this patch. Signed-off-by: Dmitry Skorodumov Cc: Denis V. Lunev Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/eboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 48304b89b601f..0cdc154a22b50 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1193,6 +1193,10 @@ static efi_status_t setup_e820(struct boot_params *params, unsigned int e820_type = 0; unsigned long m = efi->efi_memmap; +#ifdef CONFIG_X86_64 + m |= (u64)efi->efi_memmap_hi << 32; +#endif + d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); switch (d->type) { case EFI_RESERVED_TYPE: From 3092e514c62477fa17401667d4ffb4921b14842c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 Mar 2015 22:38:21 +0530 Subject: [PATCH 0501/2091] ARC: Reduce bitops lines of code using macros commit 04e2eee4b02edcafce96c9c37b31b1a3318291a4 upstream. No semantical changes ! Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/bitops.h | 477 ++++++++++------------------------ 1 file changed, 144 insertions(+), 333 deletions(-) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 624a9d048ca9c..f19f0abacffd3 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -18,83 +18,50 @@ #include #include #include +#ifndef CONFIG_ARC_HAS_LLSC +#include +#endif -/* - * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns. - * The Kconfig glue ensures that in SMP, this is only set if the container - * SoC/platform has cross-core coherent LLOCK/SCOND - */ #if defined(CONFIG_ARC_HAS_LLSC) -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - /* - * ARC ISA micro-optimization: - * - * Instructions dealing with bitpos only consider lower 5 bits (0-31) - * e.g (x << 33) is handled like (x << 1) by ASL instruction - * (mem pointer still needs adjustment to point to next word) - * - * Hence the masking to clamp @nr arg can be elided in general. - * - * However if @nr is a constant (above assumed it in a register), - * and greater than 31, gcc can optimize away (x << 33) to 0, - * as overflow, given the 32-bit ISA. Thus masking needs to be done - * for constant @nr, but no code is generated due to const prop. - */ - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bset %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bclr %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; +/* + * Hardware assisted Atomic-R-M-W + */ - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bxor %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned int temp; \ + \ + m += nr >> 5; \ + \ + /* \ + * ARC ISA micro-optimization: \ + * \ + * Instructions dealing with bitpos only consider lower 5 bits \ + * e.g (x << 33) is handled like (x << 1) by ASL instruction \ + * (mem pointer still needs adjustment to point to next word) \ + * \ + * Hence the masking to clamp @nr arg can be elided in general. \ + * \ + * However if @nr is a constant (above assumed in a register), \ + * and greater than 31, gcc can optimize away (x << 33) to 0, \ + * as overflow, given the 32-bit ISA. Thus masking needs to be \ + * done for const @nr, but no code is generated due to gcc \ + * const prop. \ + */ \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " scond %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ + : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \ + "ir"(nr) \ + : "cc"); \ } /* @@ -108,91 +75,38 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *m) * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally * and the old value of bit is returned */ -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - /* - * Explicit full memory barrier needed before/after as - * LLOCK/SCOND themselves don't provide any such semantics - */ - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bset %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - smp_mb(); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bclr %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - smp_mb(); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bxor %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - smp_mb(); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, temp; \ + \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND themselves don't provide any such smenatic \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%2] \n" \ + " " #asm_op " %1, %0, %3 \n" \ + " scond %1, [%2] \n" \ + " bnz 1b \n" \ + : "=&r"(old), "=&r"(temp) \ + : "r"(m), "ir"(nr) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return (old & (1 << nr)) != 0; \ } #else /* !CONFIG_ARC_HAS_LLSC */ -#include - /* * Non hardware assisted Atomic-R-M-W * Locking would change to irq-disabling only (UP) and spinlocks (SMP) @@ -209,111 +123,43 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * at compile time) */ -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp | (1UL << nr); - - bitops_unlock(flags); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp & ~(1UL << nr); - - bitops_unlock(flags); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp ^ (1UL << nr); - - bitops_unlock(flags); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long temp, flags; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + /* \ + * spin lock/unlock provide the needed smp_mb() before/after \ + */ \ + bitops_lock(flags); \ + \ + temp = *m; \ + *m = temp c_op (1UL << nr); \ + \ + bitops_unlock(flags); \ } -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - /* - * spin lock/unlock provide the needed smp_mb() before/after - */ - bitops_lock(flags); - - old = *m; - *m = old | (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old & ~(1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old ^ (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, flags; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + bitops_lock(flags); \ + \ + old = *m; \ + *m = old c_op (1 << nr); \ + \ + bitops_unlock(flags); \ + \ + return (old & (1 << nr)) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -322,86 +168,51 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * Non atomic variants **************************************/ -static inline void __set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp | (1UL << nr); -} - -static inline void __clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp & ~(1UL << nr); -} - -static inline void __change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp ^ (1UL << nr); -} - -static inline int -__test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old | (1 << nr); - - return (old & (1 << nr)) != 0; +#define __BIT_OP(op, c_op, asm_op) \ +static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ +{ \ + unsigned long temp; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + temp = *m; \ + *m = temp c_op (1UL << nr); \ } -static inline int -__test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old & ~(1 << nr); - - return (old & (1 << nr)) != 0; +#define __TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + old = *m; \ + *m = old c_op (1 << nr); \ + \ + return (old & (1 << nr)) != 0; \ } -static inline int -__test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old ^ (1 << nr); - - return (old & (1 << nr)) != 0; -} +#define BIT_OPS(op, c_op, asm_op) \ + \ + /* set_bit(), clear_bit(), change_bit() */ \ + BIT_OP(op, c_op, asm_op) \ + \ + /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\ + TEST_N_BIT_OP(op, c_op, asm_op) \ + \ + /* __set_bit(), __clear_bit(), __change_bit() */ \ + __BIT_OP(op, c_op, asm_op) \ + \ + /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\ + __TEST_N_BIT_OP(op, c_op, asm_op) + +BIT_OPS(set, |, bset) +BIT_OPS(clear, & ~, bclr) +BIT_OPS(change, ^, bxor) /* * This routine doesn't need to be atomic. From 26121b675728939c515594b2bb866756b7789c16 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 3 Jul 2015 11:26:22 +0530 Subject: [PATCH 0502/2091] ARC: Make ARC bitops "safer" (add anti-optimization) commit 80f420842ff42ad61f84584716d74ef635f13892 upstream. ARCompact/ARCv2 ISA provide that any instructions which deals with bitpos/count operand ASL, LSL, BSET, BCLR, BMSK .... will only consider lower 5 bits. i.e. auto-clamp the pos to 0-31. ARC Linux bitops exploited this fact by NOT explicitly masking out upper bits for @nr operand in general, saving a bunch of AND/BMSK instructions in generated code around bitops. While this micro-optimization has worked well over years it is NOT safe as shifting a number with a value, greater than native size is "undefined" per "C" spec. So as it turns outm EZChip ran into this eventually, in their massive muti-core SMP build with 64 cpus. There was a test_bit() inside a loop from 63 to 0 and gcc was weirdly optimizing away the first iteration (so it was really adhering to standard by implementing undefined behaviour vs. removing all the iterations which were phony i.e. (1 << [63..32]) | for i = 63 to 0 | X = ( 1 << i ) | if X == 0 | continue So fix the code to do the explicit masking at the expense of generating additional instructions. Fortunately, this can be mitigated to a large extent as gcc has SHIFT_COUNT_TRUNCATED which allows combiner to fold masking into shift operation itself. It is currently not enabled in ARC gcc backend, but could be done after a bit of testing. Fixes STAR 9000866918 ("unsafe "undefined behavior" code in kernel") Reported-by: Noam Camus Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/bitops.h | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index f19f0abacffd3..dae03e66fa9ea 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -50,8 +50,7 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ * done for const @nr, but no code is generated due to gcc \ * const prop. \ */ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ @@ -82,8 +81,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -129,16 +127,13 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ unsigned long temp, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ /* \ * spin lock/unlock provide the needed smp_mb() before/after \ */ \ bitops_lock(flags); \ \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ } @@ -149,17 +144,14 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * unsigned long old, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ bitops_lock(flags); \ \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -174,11 +166,8 @@ static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ unsigned long temp; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ } #define __TEST_N_BIT_OP(op, c_op, asm_op) \ @@ -187,13 +176,10 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long unsigned long old; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #define BIT_OPS(op, c_op, asm_op) \ @@ -224,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) addr += nr >> 5; - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - mask = 1 << nr; + mask = 1UL << (nr & 0x1f); return ((mask & *addr) != 0); } From f3ef6ad3e4501dae41b0015a3bd8e179b56bdfd8 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Mon, 6 Jul 2015 14:35:11 +0800 Subject: [PATCH 0503/2091] rds: rds_ib_device.refcount overflow commit 4fabb59449aa44a585b3603ffdadd4c5f4d0c033 upstream. Fixes: 3e0249f9c05c ("RDS/IB: add refcount tracking to struct rds_ib_device") There lacks a dropping on rds_ib_device.refcount in case rds_ib_alloc_fmr failed(mr pool running out). this lead to the refcount overflow. A complain in line 117(see following) is seen. From vmcore: s_ib_rdma_mr_pool_depleted is 2147485544 and rds_ibdev->refcount is -2147475448. That is the evidence the mr pool is used up. so rds_ib_alloc_fmr is very likely to return ERR_PTR(-EAGAIN). 115 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) 116 { 117 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0); 118 if (atomic_dec_and_test(&rds_ibdev->refcount)) 119 queue_work(rds_wq, &rds_ibdev->free_work); 120 } fix is to drop refcount when rds_ib_alloc_fmr failed. Signed-off-by: Wengang Wang Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- net/rds/ib_rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 273b8bff6ba44..657ba9f5d3086 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, } ibmr = rds_ib_alloc_fmr(rds_ibdev); - if (IS_ERR(ibmr)) + if (IS_ERR(ibmr)) { + rds_ib_dev_put(rds_ibdev); return ibmr; + } ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); if (ret == 0) From ba3961ad681981dc74fcd519b8f98be8bc3ac381 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 27 Jun 2015 09:21:32 -0400 Subject: [PATCH 0504/2091] n_tty: signal and flush atomically commit 3b19e032295647b7be2aa3be62510db4aaeda759 upstream. When handling signalling char, claim the termios write lock before signalling waiting readers and writers to prevent further i/o before flushing the echo and output buffers. This prevents a userspace signal handler which may output from racing the terminal flush. Reference: Bugzilla #99351 ("Output truncated in ssh session after...") Fixes: commit d2b6f44779d3 ("n_tty: Fix signal handling flushes") Reported-by: Filipe Brandenburger Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 396344cb011fd..16ed0b6c7f9ce 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1108,19 +1108,29 @@ static void eraser(unsigned char c, struct tty_struct *tty) * Locking: ctrl_lock */ -static void isig(int sig, struct tty_struct *tty) +static void __isig(int sig, struct tty_struct *tty) { - struct n_tty_data *ldata = tty->disc_data; struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, sig, 1); put_pid(tty_pgrp); } +} - if (!L_NOFLSH(tty)) { +static void isig(int sig, struct tty_struct *tty) +{ + struct n_tty_data *ldata = tty->disc_data; + + if (L_NOFLSH(tty)) { + /* signal only */ + __isig(sig, tty); + + } else { /* signal and flush */ up_read(&tty->termios_rwsem); down_write(&tty->termios_rwsem); + __isig(sig, tty); + /* clear echo buffer */ mutex_lock(&ldata->output_lock); ldata->echo_head = ldata->echo_tail = 0; From 3425ebac158abf137ce7dcbe062baeb4c04e0097 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2015 19:53:22 +0800 Subject: [PATCH 0505/2091] blk-mq: set default timeout as 30 seconds commit e56f698bd0720e17f10f39e8b0b5b446ad0ab22c upstream. It is reasonable to set default timeout of request as 30 seconds instead of 30000 ticks, which may be 300 seconds if HZ is 100, for example, some arm64 based systems may choose 100 HZ. Signed-off-by: Ming Lei Fixes: c76cbbcf4044 ("blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue()" Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 594eea04266e6..2dc1fd6c5bdb5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1968,7 +1968,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_hctxs; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); - blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); + blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); q->nr_queues = nr_cpu_ids; q->nr_hw_queues = set->nr_hw_queues; From d9aa2c20eef957e4b28c4f835bbc749035c4e5ef Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 13 Jul 2015 08:26:35 -0300 Subject: [PATCH 0506/2091] perf hists browser: Take the --comm, --dsos, etc filters into account commit 9c0fa8dd3d58de8b688fda758eea1719949c7f0a upstream. At some point: commit 2c86c7ca7606 Author: Namhyung Kim Date: Mon Mar 17 18:18:54 2014 -0300 perf report: Merge al->filtered with hist_entry->filtered We stopped dropping samples for things filtered via the --comms, --dsos, --symbols, etc, i.e. things marked as filtered in the symbol resolution routines (thread__find_addr_map(), perf_event__preprocess_sample(), etc). But then, in: commit 268397cb2a47 Author: Namhyung Kim Date: Tue Apr 22 14:49:31 2014 +0900 perf top/tui: Update nr_entries properly after a filter is applied We don't take into account entries that were filtered in perf_event__preprocess_sample() and friends, which leads to inconsistency in the browser seek routines, that expects the number of hist_entry->filtered entries to match what it thinks is the number of unfiltered, browsable entries. So, for instance, when we do: perf top --symbols ___non_existent_symbol___ the hist_browser__nr_entries() routine thinks there are no filters in place, uses the hists->nr_entries but all entries are filtered, leading to a segfault. Tested with: perf top --symbols malloc,free --percentage=relative Freezing, by pressing 'f', at any time and doing the math on the percentages ends up with 100%, ditto for: perf top --dsos libpthread-2.20.so,libxul.so --percentage=relative Both were segfaulting, all fixed now. More work needed to do away with checking if filters are in place, we should just use the nr_non_filtered_samples counter, no need to conditionally use it or hists.nr_filter, as what the browser does is just show unfiltered stuff. An audit of how it is being accounted is needed, this is the minimal fix. Reported-by: Michael Petlan Fixes: 268397cb2a47 ("perf top/tui: Update nr_entries properly after a filter is applied") Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6w01d5q97qk0d64kuojme5in@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 995b7a8596b14..658b0a89796d2 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -45,7 +45,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, static bool hist_browser__has_filter(struct hist_browser *hb) { - return hists__has_filter(hb->hists) || hb->min_pcnt; + return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; } static int hist_browser__get_folding(struct hist_browser *browser) From f919f7a4d004513372807cedc14fac43f0652c4c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 21 Jul 2015 15:55:09 +0100 Subject: [PATCH 0507/2091] perf/x86/intel/cqm: Return cached counter value from IRQ context commit 2c534c0da0a68418693e10ce1c4146e085f39518 upstream. Peter reported the following potential crash which I was able to reproduce with his test program, [ 148.765788] ------------[ cut here ]------------ [ 148.765796] WARNING: CPU: 34 PID: 2840 at kernel/smp.c:417 smp_call_function_many+0xb6/0x260() [ 148.765797] Modules linked in: [ 148.765800] CPU: 34 PID: 2840 Comm: perf Not tainted 4.2.0-rc1+ #4 [ 148.765803] ffffffff81cdc398 ffff88085f105950 ffffffff818bdfd5 0000000000000007 [ 148.765805] 0000000000000000 ffff88085f105990 ffffffff810e413a 0000000000000000 [ 148.765807] ffffffff82301080 0000000000000022 ffffffff8107f640 ffffffff8107f640 [ 148.765809] Call Trace: [ 148.765810] [] dump_stack+0x45/0x57 [ 148.765818] [] warn_slowpath_common+0x8a/0xc0 [ 148.765822] [] ? intel_cqm_stable+0x60/0x60 [ 148.765824] [] ? intel_cqm_stable+0x60/0x60 [ 148.765825] [] warn_slowpath_null+0x1a/0x20 [ 148.765827] [] smp_call_function_many+0xb6/0x260 [ 148.765829] [] ? intel_cqm_stable+0x60/0x60 [ 148.765831] [] on_each_cpu_mask+0x28/0x60 [ 148.765832] [] intel_cqm_event_count+0x7f/0xe0 [ 148.765836] [] perf_output_read+0x2a5/0x400 [ 148.765839] [] perf_output_sample+0x31a/0x590 [ 148.765840] [] ? perf_prepare_sample+0x26d/0x380 [ 148.765841] [] perf_event_output+0x47/0x60 [ 148.765843] [] __perf_event_overflow+0x215/0x240 [ 148.765844] [] perf_event_overflow+0x14/0x20 [ 148.765847] [] intel_pmu_handle_irq+0x1d4/0x440 [ 148.765849] [] ? __perf_event_task_sched_in+0x36/0xa0 [ 148.765853] [] ? vunmap_page_range+0x19d/0x2f0 [ 148.765854] [] ? unmap_kernel_range_noflush+0x11/0x20 [ 148.765859] [] ? ghes_copy_tofrom_phys+0x11e/0x2a0 [ 148.765863] [] ? native_apic_msr_write+0x2b/0x30 [ 148.765865] [] ? x2apic_send_IPI_self+0x1d/0x20 [ 148.765869] [] ? arch_irq_work_raise+0x35/0x40 [ 148.765872] [] ? irq_work_queue+0x66/0x80 [ 148.765875] [] perf_event_nmi_handler+0x26/0x40 [ 148.765877] [] nmi_handle+0x79/0x100 [ 148.765879] [] default_do_nmi+0x42/0x100 [ 148.765880] [] do_nmi+0x83/0xb0 [ 148.765884] [] end_repeat_nmi+0x1e/0x2e [ 148.765886] [] ? __perf_event_task_sched_in+0x36/0xa0 [ 148.765888] [] ? __perf_event_task_sched_in+0x36/0xa0 [ 148.765890] [] ? __perf_event_task_sched_in+0x36/0xa0 [ 148.765891] <> [] finish_task_switch+0x156/0x210 [ 148.765898] [] __schedule+0x341/0x920 [ 148.765899] [] schedule+0x37/0x80 [ 148.765903] [] ? do_page_fault+0x2f/0x80 [ 148.765905] [] schedule_user+0x1a/0x50 [ 148.765907] [] retint_careful+0x14/0x32 [ 148.765908] ---[ end trace e33ff2be78e14901 ]--- The CQM task events are not safe to be called from within interrupt context because they require performing an IPI to read the counter value on all sockets. And performing IPIs from within IRQ context is a "no-no". Make do with the last read counter value currently event in event->count when we're invoked in this context. Reported-by: Peter Zijlstra Signed-off-by: Matt Fleming Cc: Thomas Gleixner Cc: Vikas Shivappa Cc: Kanaka Juvva Cc: Will Auld Link: http://lkml.kernel.org/r/1437490509-15373-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index e4d1b8b738fa8..cb77b11bc4143 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -933,6 +933,14 @@ static u64 intel_cqm_event_count(struct perf_event *event) if (!cqm_group_leader(event)) return 0; + /* + * Getting up-to-date values requires an SMP IPI which is not + * possible if we're being called in interrupt context. Return + * the cached values instead. + */ + if (unlikely(in_interrupt())) + goto out; + /* * Notice that we don't perform the reading of an RMID * atomically, because we can't hold a spin lock across the From fd1a8f57bfd8686709c17cbe878470bd43fb3c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 17 Jul 2015 15:32:03 +0200 Subject: [PATCH 0508/2091] vhost: actually track log eventfd file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7932c0bd7740f4cd2aa168d3ce0199e7af7d72d5 upstream. While reviewing vhost log code, I found out that log_file is never set. Note: I haven't tested the change (QEMU doesn't use LOG_FD yet). Signed-off-by: Marc-André Lureau Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee28266fd070..fa49d3294cd52 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -886,6 +886,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) } if (eventfp != d->log_file) { filep = d->log_file; + d->log_file = eventfp; ctx = d->log_ctx; d->log_ctx = eventfp ? eventfd_ctx_fileget(eventfp) : NULL; From 1c57a6c95d3de1ea32010d88b9b93984d3302e8d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 4 Jul 2015 13:23:42 -0700 Subject: [PATCH 0509/2091] hwmon: (nct7802) Fix integer overflow seen when writing voltage limits commit 9200bc4c28cd8992eb5379345abd6b4f0c93df16 upstream. Writing a large value into a voltage limit attribute can result in an overflow due to an auto-conversion from unsigned long to unsigned int. Cc: Constantine Shulyupin Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7802.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 28fcb2e246d55..fbfc02bb2cfa1 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -195,7 +195,7 @@ static int nct7802_read_voltage(struct nct7802_data *data, int nr, int index) } static int nct7802_write_voltage(struct nct7802_data *data, int nr, int index, - unsigned int voltage) + unsigned long voltage) { int shift = 8 - REG_VOLTAGE_LIMIT_MSB_SHIFT[index - 1][nr]; int err; From fe7c1d336dff21503aa08bda735519aeed795afb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 27 Jul 2015 10:21:46 -0700 Subject: [PATCH 0510/2091] hwmon: (nct7904) Rename pwm attributes to match hwmon ABI commit 0d6aaffc3a6db642e0a165ba4d17d6d7bbaf5201 upstream. pwm attributes have well defined names, which should be used. Cc: Vadim V. Vlasov Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Documentation/hwmon/nct7904 | 4 +-- drivers/hwmon/nct7904.c | 57 +++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904 index 014f112e2a14e..57fffe33ebfcd 100644 --- a/Documentation/hwmon/nct7904 +++ b/Documentation/hwmon/nct7904 @@ -35,11 +35,11 @@ temp1_input Local temperature (1/1000 degree, temp[2-9]_input CPU temperatures (1/1000 degree, 0.125 degree resolution) -fan[1-4]_mode R/W, 0/1 for manual or SmartFan mode +pwm[1-4]_enable R/W, 1/2 for manual or SmartFan mode Setting SmartFan mode is supported only if it has been previously configured by BIOS (or configuration EEPROM) -fan[1-4]_pwm R/O in SmartFan mode, R/W in manual control mode +pwm[1-4] R/O in SmartFan mode, R/W in manual control mode The driver checks sensor control registers and does not export the sensors that are not enabled. Anyway, a sensor that is enabled may actually be not diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index b77b82f244800..6153df735e82c 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -412,8 +412,9 @@ static ssize_t show_pwm(struct device *dev, return sprintf(buf, "%d\n", val); } -static ssize_t store_mode(struct device *dev, struct device_attribute *devattr, - const char *buf, size_t count) +static ssize_t store_enable(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) { int index = to_sensor_dev_attr(devattr)->index; struct nct7904_data *data = dev_get_drvdata(dev); @@ -422,18 +423,18 @@ static ssize_t store_mode(struct device *dev, struct device_attribute *devattr, if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; - if (val > 1 || (val && !data->fan_mode[index])) + if (val < 1 || val > 2 || (val == 2 && !data->fan_mode[index])) return -EINVAL; ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + index, - val ? data->fan_mode[index] : 0); + val == 2 ? data->fan_mode[index] : 0); return ret ? ret : count; } -/* Return 0 for manual mode or 1 for SmartFan mode */ -static ssize_t show_mode(struct device *dev, - struct device_attribute *devattr, char *buf) +/* Return 1 for manual mode or 2 for SmartFan mode */ +static ssize_t show_enable(struct device *dev, + struct device_attribute *devattr, char *buf) { int index = to_sensor_dev_attr(devattr)->index; struct nct7904_data *data = dev_get_drvdata(dev); @@ -443,36 +444,36 @@ static ssize_t show_mode(struct device *dev, if (val < 0) return val; - return sprintf(buf, "%d\n", val ? 1 : 0); + return sprintf(buf, "%d\n", val ? 2 : 1); } /* 2 attributes per channel: pwm and mode */ -static SENSOR_DEVICE_ATTR(fan1_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 0); -static SENSOR_DEVICE_ATTR(fan1_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 0); -static SENSOR_DEVICE_ATTR(fan2_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 0); +static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 1); -static SENSOR_DEVICE_ATTR(fan2_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 1); -static SENSOR_DEVICE_ATTR(fan3_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 1); +static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 2); -static SENSOR_DEVICE_ATTR(fan3_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 2); -static SENSOR_DEVICE_ATTR(fan4_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 2); +static SENSOR_DEVICE_ATTR(pwm4, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 3); -static SENSOR_DEVICE_ATTR(fan4_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 3); +static SENSOR_DEVICE_ATTR(pwm4_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 3); static struct attribute *nct7904_fanctl_attrs[] = { - &sensor_dev_attr_fan1_pwm.dev_attr.attr, - &sensor_dev_attr_fan1_mode.dev_attr.attr, - &sensor_dev_attr_fan2_pwm.dev_attr.attr, - &sensor_dev_attr_fan2_mode.dev_attr.attr, - &sensor_dev_attr_fan3_pwm.dev_attr.attr, - &sensor_dev_attr_fan3_mode.dev_attr.attr, - &sensor_dev_attr_fan4_pwm.dev_attr.attr, - &sensor_dev_attr_fan4_mode.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_pwm1_enable.dev_attr.attr, + &sensor_dev_attr_pwm2.dev_attr.attr, + &sensor_dev_attr_pwm2_enable.dev_attr.attr, + &sensor_dev_attr_pwm3.dev_attr.attr, + &sensor_dev_attr_pwm3_enable.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm4_enable.dev_attr.attr, NULL }; From f22d5595d56d15b7aadc0ff5cc47e8811d4c9eb1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 11:02:53 -0400 Subject: [PATCH 0511/2091] NFS: Don't revalidate the mapping if both size and change attr are up to date commit 85a23cee3f2c928475f31777ead5a71340a12fc3 upstream. If we've ensured that the size and the change attribute are both correct, then there is no point in marking those attributes as needing revalidation again. Only do so if we know the size is incorrect and was not updated. Fixes: f2467b6f64da ("NFS: Clear NFS_INO_REVAL_PAGECACHE when...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d244..5d25b9d97c29c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1242,9 +1242,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->nrequests == 0) + if (cur_size != new_isize) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } + if (nfsi->nrequests != 0) + invalid &= ~NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) @@ -1682,8 +1684,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE; + | NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; @@ -1715,7 +1716,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - invalid &= ~NFS_INO_REVAL_PAGECACHE; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", From c9bb1d26fe5747a4dbea75fe37e6474f6abbcdad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jul 2015 13:49:48 +0300 Subject: [PATCH 0512/2091] avr32: handle NULL as a valid clock object commit 5c02a4206538da12c040b51778d310df84c6bf6c upstream. Since NULL is used as valid clock object on optional clocks we have to handle this case in avr32 implementation as well. Fixes: e1824dfe0d8e (net: macb: Adjust tx_clk when link speed changes) Signed-off-by: Andy Shevchenko Acked-by: Hans-Christian Egtvedt Signed-off-by: Greg Kroah-Hartman --- arch/avr32/mach-at32ap/clock.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c index 23b1a97fae7ad..52c179bec0cc6 100644 --- a/arch/avr32/mach-at32ap/clock.c +++ b/arch/avr32/mach-at32ap/clock.c @@ -80,6 +80,9 @@ int clk_enable(struct clk *clk) { unsigned long flags; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); __clk_enable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -106,6 +109,9 @@ void clk_disable(struct clk *clk) { unsigned long flags; + if (IS_ERR_OR_NULL(clk)) + return; + spin_lock_irqsave(&clk_lock, flags); __clk_disable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -117,6 +123,9 @@ unsigned long clk_get_rate(struct clk *clk) unsigned long flags; unsigned long rate; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); rate = clk->get_rate(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -129,6 +138,9 @@ long clk_round_rate(struct clk *clk, unsigned long rate) { unsigned long flags, actual_rate; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -145,6 +157,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) unsigned long flags; long ret; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -161,6 +176,9 @@ int clk_set_parent(struct clk *clk, struct clk *parent) unsigned long flags; int ret; + if (!clk) + return 0; + if (!clk->set_parent) return -ENOSYS; @@ -174,7 +192,7 @@ EXPORT_SYMBOL(clk_set_parent); struct clk *clk_get_parent(struct clk *clk) { - return clk->parent; + return !clk ? NULL : clk->parent; } EXPORT_SYMBOL(clk_get_parent); From c7231368e696a31c78e61189273999c0b5243bbf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jul 2015 13:46:13 -0400 Subject: [PATCH 0513/2091] NFSv4: We must set NFS_OPEN_STATE flag in nfs_resync_open_stateid_locked commit 3c38cbe2ade88240fabb585b408f779ad3b9a31b upstream. Otherwise, nfs4_select_rw_stateid() will always return the zero stateid instead of the correct open stateid. Fixes: f95549cf24660 ("NFSv4: More CLOSE/OPEN races") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3d..d3f2051266093 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1204,12 +1204,15 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, static void nfs_resync_open_stateid_locked(struct nfs4_state *state) { + if (!(state->n_wronly || state->n_rdonly || state->n_rdwr)) + return; if (state->n_wronly) set_bit(NFS_O_WRONLY_STATE, &state->flags); if (state->n_rdonly) set_bit(NFS_O_RDONLY_STATE, &state->flags); if (state->n_rdwr) set_bit(NFS_O_RDWR_STATE, &state->flags); + set_bit(NFS_OPEN_STATE, &state->flags); } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, From 13e8ae1f4da1386297c2cf65522b38f8e24fa88a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Jul 2015 10:23:19 -0400 Subject: [PATCH 0514/2091] NFS: Fix a memory leak in nfs_do_recoalesce commit 03d5eb65b53889fe98a5ecddfe205c16e3093190 upstream. If the function exits early, then we must put those requests that were not processed back onto the &mirror->pg_list so they can be cleaned up by nfs_pgio_error(). Fixes: a7d42ddb30997 ("nfs: add mirroring support to pgio layer") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b393695106..7b45526785360 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1110,8 +1110,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; - if (desc->pg_error < 0) + if (desc->pg_error < 0) { + list_splice_tail(&head, &mirror->pg_list); + mirror->pg_recoalesce = 1; return 0; + } break; } } while (mirror->pg_recoalesce); From d1339ce0bcb285a0756bf9037f49e258b5df636f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 22 Jul 2015 14:30:03 -0600 Subject: [PATCH 0515/2091] IB/ipoib: Fix CONFIG_INFINIBAND_IPOIB_CM commit efc1eedbf63a194b3b576fc25776f3f1fa55a4d4 upstream. If the above is turned off then ipoib_cm_dev_init unconditionally returns ENOSYS, and the newly added error handling in 0b3957 prevents ipoib from coming up at all: kernel: mlx4_0: ipoib_transport_dev_init failed kernel: mlx4_0: failed to initialize port 1 (ret = -12) Fixes: 0b39578bcde4 (IB/ipoib: Use dedicated workqueues per interface) Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index e5cc43074196d..2d13fd08ceb74 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -176,7 +176,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - goto out_free_wq; + if (ret != -ENOSYS) + goto out_free_wq; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); if (IS_ERR(priv->recv_cq)) { From 00ff0eb11dbddcc2c2ba914f5d89755a43a0aa21 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 22 Jul 2015 00:24:09 -0700 Subject: [PATCH 0516/2091] iscsi-target: Fix use-after-free during TPG session shutdown commit 417c20a9bdd1e876384127cf096d8ae8b559066c upstream. This patch fixes a use-after-free bug in iscsit_release_sessions_for_tpg() where se_portal_group->session_lock was incorrectly released/re-acquired while walking the active se_portal_group->tpg_sess_list. The can result in a NULL pointer dereference when iscsit_close_session() shutdown happens in the normal path asynchronously to this code, causing a bogus dereference of an already freed list entry to occur. To address this bug, walk the session list checking for the same state as before, but move entries to a local list to avoid dropping the lock while walking the active list. As before, signal using iscsi_session->session_restatement=1 for those list entries to be released locally by iscsit_free_session() code. Reported-by: Sunilkumar Nadumuttlu Cc: Sunilkumar Nadumuttlu Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 74e6114ff18f9..f267b6f17112b 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4776,6 +4776,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) struct iscsi_session *sess; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; struct se_session *se_sess, *se_sess_tmp; + LIST_HEAD(free_list); int session_count = 0; spin_lock_bh(&se_tpg->session_lock); @@ -4797,14 +4798,17 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); - spin_unlock_bh(&se_tpg->session_lock); - iscsit_free_session(sess); - spin_lock_bh(&se_tpg->session_lock); + list_move_tail(&se_sess->sess_list, &free_list); + } + spin_unlock_bh(&se_tpg->session_lock); + list_for_each_entry_safe(se_sess, se_sess_tmp, &free_list, sess_list) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + + iscsit_free_session(sess); session_count++; } - spin_unlock_bh(&se_tpg->session_lock); pr_debug("Released %d iSCSI Session(s) from Target Portal" " Group: %hu\n", session_count, tpg->tpgt); From 5f4f44a51d81bba3ce116857a7bdf4f45231ed58 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 22 Jul 2015 23:14:19 -0700 Subject: [PATCH 0517/2091] iscsi-target: Fix iscsit_start_kthreads failure OOPs commit e54198657b65625085834847ab6271087323ffea upstream. This patch fixes a regression introduced with the following commit in v4.0-rc1 code, where a iscsit_start_kthreads() failure triggers a NULL pointer dereference OOPs: commit 88dcd2dab5c23b1c9cfc396246d8f476c872f0ca Author: Nicholas Bellinger Date: Thu Feb 26 22:19:15 2015 -0800 iscsi-target: Convert iscsi_thread_set usage to kthread.h To address this bug, move iscsit_start_kthreads() immediately preceeding the transmit of last login response, before signaling a successful transition into full-feature-phase within existing iscsi_target_do_tx_login_io() logic. This ensures that no target-side resource allocation failures can occur after the final login response has been successfully sent. Also, it adds a iscsi_conn->rx_login_comp to allow the RX thread to sleep to prevent other socket related failures until the final iscsi_post_login_handler() call is able to complete. Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 18 +++++++-- drivers/target/iscsi/iscsi_target_login.c | 45 +++++++++-------------- drivers/target/iscsi/iscsi_target_login.h | 3 +- drivers/target/iscsi/iscsi_target_nego.c | 34 ++++++++++++++++- include/target/iscsi/iscsi_target_core.h | 1 + 5 files changed, 68 insertions(+), 33 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index f267b6f17112b..3a707195e5904 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4001,7 +4001,13 @@ int iscsi_target_tx_thread(void *arg) } transport_err: - iscsit_take_action_for_connection_exit(conn); + /* + * Avoid the normal connection failure code-path if this connection + * is still within LOGIN mode, and iscsi_np process context is + * responsible for cleaning up the early connection failure. + */ + if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) + iscsit_take_action_for_connection_exit(conn); out: return 0; } @@ -4093,7 +4099,7 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) int iscsi_target_rx_thread(void *arg) { - int ret; + int ret, rc; u8 buffer[ISCSI_HDR_LEN], opcode; u32 checksum = 0, digest = 0; struct iscsi_conn *conn = arg; @@ -4103,10 +4109,16 @@ int iscsi_target_rx_thread(void *arg) * connection recovery / failure event can be triggered externally. */ allow_signal(SIGINT); + /* + * Wait for iscsi_post_login_handler() to complete before allowing + * incoming iscsi/tcp socket I/O, and/or failing the connection. + */ + rc = wait_for_completion_interruptible(&conn->rx_login_comp); + if (rc < 0) + return 0; if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { struct completion comp; - int rc; init_completion(&comp); rc = wait_for_completion_interruptible(&comp); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 70d799dfab03c..c3bccaddb5920 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -82,6 +82,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) init_completion(&conn->conn_logout_comp); init_completion(&conn->rx_half_close_comp); init_completion(&conn->tx_half_close_comp); + init_completion(&conn->rx_login_comp); spin_lock_init(&conn->cmd_lock); spin_lock_init(&conn->conn_usage_lock); spin_lock_init(&conn->immed_queue_lock); @@ -699,7 +700,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsit_start_kthreads(struct iscsi_conn *conn) +int iscsit_start_kthreads(struct iscsi_conn *conn) { int ret = 0; @@ -734,6 +735,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn) return 0; out_tx: + send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); conn->tx_thread_active = false; out_bitmap: @@ -744,7 +746,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn) return ret; } -int iscsi_post_login_handler( +void iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -754,7 +756,6 @@ int iscsi_post_login_handler( struct se_session *se_sess = sess->se_sess; struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; - int rc; iscsit_inc_conn_usage_count(conn); @@ -795,10 +796,6 @@ int iscsi_post_login_handler( sess->sess_ops->InitiatorName); spin_unlock_bh(&sess->conn_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -807,15 +804,20 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); + if (stop_timer) { spin_lock_bh(&se_tpg->session_lock); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); } iscsit_dec_session_usage_count(sess); - return 0; + return; } iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1); @@ -856,10 +858,6 @@ int iscsi_post_login_handler( " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt); spin_unlock_bh(&se_tpg->session_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -868,10 +866,12 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); - - return 0; } static void iscsi_handle_login_thread_timeout(unsigned long data) @@ -1436,23 +1436,12 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (ret < 0) goto new_sess_out; - if (!conn->sess) { - pr_err("struct iscsi_conn session pointer is NULL!\n"); - goto new_sess_out; - } - iscsi_stop_login_thread_timer(np); - if (signal_pending(current)) - goto new_sess_out; - if (ret == 1) { tpg_np = conn->tpg_np; - ret = iscsi_post_login_handler(np, conn, zero_tsih); - if (ret < 0) - goto new_sess_out; - + iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 29d098324b7f9..55cbf4533544a 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,7 +12,8 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); -extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern int iscsit_start_kthreads(struct iscsi_conn *); +extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, bool, bool); extern int iscsi_target_login_thread(void *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 8c02fa34716fa..f9cde91418367 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -17,6 +17,7 @@ ******************************************************************************/ #include +#include #include #include #include @@ -361,10 +362,24 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log ntohl(login_rsp->statsn), login->rsp_length); padding = ((-login->rsp_length) & 3); + /* + * Before sending the last login response containing the transition + * bit for full-feature-phase, go ahead and start up TX/RX threads + * now to avoid potential resource allocation failures after the + * final login response has been sent. + */ + if (login->login_complete) { + int rc = iscsit_start_kthreads(conn); + if (rc) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + } if (conn->conn_transport->iscsit_put_login_tx(conn, login, login->rsp_length + padding) < 0) - return -1; + goto err; login->rsp_length = 0; mutex_lock(&sess->cmdsn_mutex); @@ -373,6 +388,23 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log mutex_unlock(&sess->cmdsn_mutex); return 0; + +err: + if (login->login_complete) { + if (conn->rx_thread && conn->rx_thread_active) { + send_sig(SIGINT, conn->rx_thread, 1); + kthread_stop(conn->rx_thread); + } + if (conn->tx_thread && conn->tx_thread_active) { + send_sig(SIGINT, conn->tx_thread, 1); + kthread_stop(conn->tx_thread); + } + spin_lock(&iscsit_global->ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id, + get_order(1)); + spin_unlock(&iscsit_global->ts_bitmap_lock); + } + return -1; } static void iscsi_target_sk_data_ready(struct sock *sk) diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 54e7af301888f..73abbc54063de 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -606,6 +606,7 @@ struct iscsi_conn { int bitmap_id; int rx_thread_active; struct task_struct *rx_thread; + struct completion rx_login_comp; int tx_thread_active; struct task_struct *tx_thread; /* list_head for session connection list */ From 725dbab305404b65d01d788cee4e5bedc2964150 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 23 Jul 2015 22:30:31 +0000 Subject: [PATCH 0518/2091] iscsi-target: Fix iser explicit logout TX kthread leak commit 007d038bdf95ccfe2491d0078be54040d110fd06 upstream. This patch fixes a regression introduced with the following commit in v4.0-rc1 code, where an explicit iser-target logout would result in ->tx_thread_active being incorrectly cleared by the logout post handler, and subsequent TX kthread leak: commit 88dcd2dab5c23b1c9cfc396246d8f476c872f0ca Author: Nicholas Bellinger Date: Thu Feb 26 22:19:15 2015 -0800 iscsi-target: Convert iscsi_thread_set usage to kthread.h To address this bug, change iscsit_logout_post_handler_closesession() and iscsit_logout_post_handler_samecid() to only cmpxchg() on ->tx_thread_active for traditional iscsi/tcp connections. This is required because iscsi/tcp connections are invoking logout post handler logic directly from TX kthread context, while iser connections are invoking logout post handler logic from a seperate workqueue context. Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 3a707195e5904..305a5cbc099a6 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4555,7 +4555,18 @@ static void iscsit_logout_post_handler_closesession( struct iscsi_conn *conn) { struct iscsi_session *sess = conn->sess; - int sleep = cmpxchg(&conn->tx_thread_active, true, false); + int sleep = 1; + /* + * Traditional iscsi/tcp will invoke this logic from TX thread + * context during session logout, so clear tx_thread_active and + * sleep if iscsit_close_connection() has not already occured. + * + * Since iser-target invokes this logic from it's own workqueue, + * always sleep waiting for RX/TX thread shutdown to complete + * within iscsit_close_connection(). + */ + if (conn->conn_transport->transport_type == ISCSI_TCP) + sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4569,7 +4580,10 @@ static void iscsit_logout_post_handler_closesession( static void iscsit_logout_post_handler_samecid( struct iscsi_conn *conn) { - int sleep = cmpxchg(&conn->tx_thread_active, true, false); + int sleep = 1; + + if (conn->conn_transport->transport_type == ISCSI_TCP) + sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); From c97cda421f4d5c34638423fb7e07e7968703e619 Mon Sep 17 00:00:00 2001 From: Lukasz Anaczkowski Date: Tue, 21 Jul 2015 10:41:13 +0200 Subject: [PATCH 0519/2091] intel_pstate: Add get_scaling cpu_defaults param to Knights Landing commit 69cefc273f942bd7bb347a02e8b5b738d5f6e6f3 upstream. Scaling for Knights Landing is same as the default scaling (100000). When Knigts Landing support was added to the pstate driver, this parameter was omitted resulting in a kernel panic during boot. Fixes: b34ef932d79a (intel_pstate: Knights Landing support) Reported-by: Yasuaki Ishimatsu Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Lukasz Anaczkowski Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index c45d274a75c8b..6f9d27f9001c7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -678,6 +678,7 @@ static struct cpu_defaults knl_params = { .get_max = core_get_max_pstate, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, .set = core_set_pstate, }, }; From beca4eb65b6f392577ae2a6507eb3b3579f69dd3 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Wed, 10 Jun 2015 11:05:17 -0400 Subject: [PATCH 0520/2091] qla2xxx: Fix hardware lock/unlock issue causing kernel panic. commit ba9f6f64a0ff6b7ecaed72144c179061f8eca378 upstream. [ Upstream commit ef86cb2059a14b4024c7320999ee58e938873032 ] This patch fixes a kernel panic for qla2xxx Target core Module driver introduced by a fix in the qla2xxx initiator code. Commit ef86cb2 ("qla2xxx: Mark port lost when we receive an RSCN for it.") introduced the regression for qla2xxx Target driver. Stack trace will have following signature --- --- [ffff88081faa3cc8] _raw_spin_lock_irqsave at ffffffff815b1f03 [ffff88081faa3cd0] qlt_fc_port_deleted at ffffffffa096ccd0 [qla2xxx] [ffff88081faa3d20] qla2x00_schedule_rport_del at ffffffffa0913831[qla2xxx] [ffff88081faa3d50] qla2x00_mark_device_lost at ffffffffa09159c5[qla2xxx] [ffff88081faa3db0] qla2x00_async_event at ffffffffa0938d59 [qla2xxx] [ffff88081faa3e30] qla24xx_msix_default at ffffffffa093a326 [qla2xxx] [ffff88081faa3e90] handle_irq_event_percpu at ffffffff810a7b8d [ffff88081faa3ee0] handle_irq_event at ffffffff810a7d32 [ffff88081faa3f10] handle_edge_irq at ffffffff810ab6b9 [ffff88081faa3f30] handle_irq at ffffffff8100619c [ffff88081faa3f70] do_IRQ at ffffffff815b4b1c --- --- Signed-off-by: Saurav Kashyap Signed-off-by: Himanshu Madhani Reviewed-by: Nicholas Bellinger Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 4 ++++ drivers/scsi/qla2xxx/qla_target.c | 6 ------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 285cb204f3005..3c1550adc043e 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2924,6 +2924,7 @@ qla2x00_rport_del(void *data) struct fc_rport *rport; scsi_qla_host_t *vha = fcport->vha; unsigned long flags; + unsigned long vha_flags; spin_lock_irqsave(fcport->vha->host->host_lock, flags); rport = fcport->drport ? fcport->drport: fcport->rport; @@ -2935,7 +2936,9 @@ qla2x00_rport_del(void *data) * Release the target mode FC NEXUS in qla_target.c code * if target mod is enabled. */ + spin_lock_irqsave(&vha->hw->hardware_lock, vha_flags); qlt_fc_port_deleted(vha, fcport); + spin_unlock_irqrestore(&vha->hw->hardware_lock, vha_flags); } } @@ -3303,6 +3306,7 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) * Create target mode FC NEXUS in qla_target.c if target mode is * enabled.. */ + qlt_fc_port_added(vha, fcport); spin_lock_irqsave(fcport->vha->host->host_lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fe8a8d157e225..ed39ed29515d9 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -782,10 +782,8 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) { - struct qla_hw_data *ha = vha->hw; struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; struct qla_tgt_sess *sess; - unsigned long flags; if (!vha->hw->tgt.tgt_ops) return; @@ -793,14 +791,11 @@ void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) if (!tgt || (fcport->port_type != FCT_INITIATOR)) return; - spin_lock_irqsave(&ha->hardware_lock, flags); if (tgt->tgt_stop) { - spin_unlock_irqrestore(&ha->hardware_lock, flags); return; } sess = qlt_find_sess_by_port_name(tgt, fcport->port_name); if (!sess) { - spin_unlock_irqrestore(&ha->hardware_lock, flags); return; } @@ -808,7 +803,6 @@ void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) sess->local = 1; qlt_schedule_sess_for_deletion(sess, false); - spin_unlock_irqrestore(&ha->hardware_lock, flags); } static inline int test_tgt_sess_count(struct qla_tgt *tgt) From 117118ea1a8e170b3461105439b563fb7e834fec Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 10 Jun 2015 11:05:20 -0400 Subject: [PATCH 0521/2091] qla2xxx: release request queue reservation. commit 810e30bc4658e9c069577bde52394a5af872803c upstream. Request IOCB queue element(s) is reserved during good path IO. Under error condition such as unable to allocate IOCB handle condition, the IOCB count that was reserved is not released. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Nicholas Bellinger Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ed39ed29515d9..02980e20f444b 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2341,9 +2341,10 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_build_ctio_crc2_pkt(&prm, vha); else res = qlt_24xx_build_ctio_pkt(&prm, vha); - if (unlikely(res != 0)) + if (unlikely(res != 0)) { + vha->req->cnt += full_req_cnt; goto out_unmap_unlock; - + } pkt = (struct ctio7_to_24xx *)prm.pkt; @@ -2481,8 +2482,11 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) else res = qlt_24xx_build_ctio_pkt(&prm, vha); - if (unlikely(res != 0)) + if (unlikely(res != 0)) { + vha->req->cnt += prm.req_cnt; goto out_unlock_free_unmap; + } + pkt = (struct ctio7_to_24xx *)prm.pkt; pkt->u.status0.flags |= __constant_cpu_to_le16(CTIO7_FLAGS_DATA_OUT | CTIO7_FLAGS_STATUS_MODE_0); From 6dad361baad76206bb51e9af1814bddb45a99488 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 10 Jun 2015 11:05:22 -0400 Subject: [PATCH 0522/2091] qla2xxx: Remove msleep in qlt_send_term_exchange commit 6bc85dd595a5438b50ec085668e53ef26058bb90 upstream. Remove unnecessary msleep from qlt_send_term_exchange as it adds latency of 250 msec while sending terminate exchange to an aborted task. Signed-off-by: Himanshu Madhani Signed-off-by: Giridhar Malavali Reviewed-by: Nicholas Bellinger Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 02980e20f444b..340b3a06d849f 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2715,7 +2715,7 @@ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, static void qlt_send_term_exchange(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked) { - unsigned long flags; + unsigned long flags = 0; int rc; if (qlt_issue_marker(vha, ha_locked) < 0) @@ -2731,17 +2731,18 @@ static void qlt_send_term_exchange(struct scsi_qla_host *vha, rc = __qlt_send_term_exchange(vha, cmd, atio); if (rc == -ENOMEM) qlt_alloc_qfull_cmd(vha, atio, 0, 0); - spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); done: if (cmd && ((cmd->state != QLA_TGT_STATE_ABORTED) || !cmd->cmd_sent_to_fw)) { - if (!ha_locked && !in_interrupt()) - msleep(250); /* just in case */ - - qlt_unmap_sg(vha, cmd); + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); vha->hw->tgt.tgt_ops->free_cmd(cmd); } + + if (!ha_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); + return; } From 1e6ff894cd9fd295a7447b243c4882749f0a5a0c Mon Sep 17 00:00:00 2001 From: Kanoj Sarcar Date: Wed, 10 Jun 2015 11:05:23 -0400 Subject: [PATCH 0523/2091] qla2xxx: fix command initialization in target mode. commit 9fce12540cb9f91e7f1f539a80b70f0b388bdae0 upstream. Signed-off-by: Kanoj Sarcar Signed-off-by: Himanshu Madhani Reviewed-by: Nicholas Bellinger Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 340b3a06d849f..4399f0976eac0 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3346,6 +3346,11 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha, cmd->loop_id = sess->loop_id; cmd->conf_compl_supported = sess->conf_compl_supported; + cmd->cmd_flags = 0; + cmd->jiffies_at_alloc = get_jiffies_64(); + + cmd->reset_count = vha->hw->chip_reset; + return cmd; } @@ -3452,11 +3457,6 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, return -ENOMEM; } - cmd->cmd_flags = 0; - cmd->jiffies_at_alloc = get_jiffies_64(); - - cmd->reset_count = vha->hw->chip_reset; - cmd->cmd_in_wq = 1; cmd->cmd_flags |= BIT_0; INIT_WORK(&cmd->work, qlt_do_work); From de60f552147f87956aa3a56ecb410e4f583dc99d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 14 Jul 2015 16:00:42 -0400 Subject: [PATCH 0524/2091] qla2xxx: kill sessions/log out initiator on RSCN and port down events commit b2032fd567326ad0b2d443bb6d96d2580ec670a5 upstream. To fix some issues talking to ESX, this patch modifies the qla2xxx driver so that it never logs into remote ports. This has the side effect of getting rid of the "rports" entirely, which means we never log out of initiators and never tear down sessions when an initiator goes away. This is mostly OK, except that we can run into trouble if we have initiator A assigned FC address X:Y:Z by the fabric talking to us, and then initiator A goes away. Some time (could be a long time) later, initiator B comes along and also gets FC address X:Y:Z (which is available again, because initiator A is gone). If initiator B starts talking to us, then we'll still have the session for initiator A, and since we look up incoming IO based on the FC address X:Y:Z, initiator B will end up using ACLs for initiator A. Fix this by: 1. Handling RSCN events somewhat differently; instead of completely skipping the processing of fcports, we look through the list, and if an fcport disappears, we tell the target code the tear down the session and tell the HBA FW to release the N_Port handle. 2. Handling "port down" events by flushing all of our sessions. The firmware was already releasing the N_Port handle but we want the target code to drop all the sessions too. Signed-off-by: Roland Dreier Signed-off-by: Alexei Potashnik Acked-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_init.c | 137 ++++++++++++++++++++++++------ drivers/scsi/qla2xxx/qla_target.c | 9 +- 3 files changed, 117 insertions(+), 31 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 0e6ee3ca30e66..e9ae6b924c705 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -68,7 +68,7 @@ * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | * | Target Mode | 0xe079 | | - * | Target Mode Management | 0xf072 | 0xf002 | + * | Target Mode Management | 0xf080 | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000b | | * ---------------------------------------------------------------------- diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 3c1550adc043e..998498e2341b3 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3464,20 +3464,43 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; - if (fcport->scan_state == QLA_FCPORT_SCAN && - atomic_read(&fcport->state) == FCS_ONLINE) { - qla2x00_mark_device_lost(vha, fcport, - ql2xplogiabsentdevice, 0); - if (fcport->loop_id != FC_NO_LOOP_ID && - (fcport->flags & FCF_FCP2_DEVICE) == 0 && - fcport->port_type != FCT_INITIATOR && - fcport->port_type != FCT_BROADCAST) { - ha->isp_ops->fabric_logout(vha, - fcport->loop_id, - fcport->d_id.b.domain, - fcport->d_id.b.area, - fcport->d_id.b.al_pa); - qla2x00_clear_loop_id(fcport); + if (fcport->scan_state == QLA_FCPORT_SCAN) { + if (qla_ini_mode_enabled(base_vha) && + atomic_read(&fcport->state) == FCS_ONLINE) { + qla2x00_mark_device_lost(vha, fcport, + ql2xplogiabsentdevice, 0); + if (fcport->loop_id != FC_NO_LOOP_ID && + (fcport->flags & FCF_FCP2_DEVICE) == 0 && + fcport->port_type != FCT_INITIATOR && + fcport->port_type != FCT_BROADCAST) { + ha->isp_ops->fabric_logout(vha, + fcport->loop_id, + fcport->d_id.b.domain, + fcport->d_id.b.area, + fcport->d_id.b.al_pa); + qla2x00_clear_loop_id(fcport); + } + } else if (!qla_ini_mode_enabled(base_vha)) { + /* + * In target mode, explicitly kill + * sessions and log out of devices + * that are gone, so that we don't + * end up with an initiator using the + * wrong ACL (if the fabric recycles + * an FC address and we have a stale + * session around) and so that we don't + * report initiators that are no longer + * on the fabric. + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf077, + "port gone, logging out/killing session: " + "%8phC state 0x%x flags 0x%x fc4_type 0x%x " + "scan_state %d\n", + fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + qlt_fc_port_deleted(vha, fcport); } } } @@ -3498,6 +3521,28 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) (fcport->flags & FCF_LOGIN_NEEDED) == 0) continue; + /* + * If we're not an initiator, skip looking for devices + * and logging in. There's no reason for us to do it, + * and it seems to actively cause problems in target + * mode if we race with the initiator logging into us + * (we might get the "port ID used" status back from + * our login command and log out the initiator, which + * seems to cause havoc). + */ + if (!qla_ini_mode_enabled(base_vha)) { + if (fcport->scan_state == QLA_FCPORT_FOUND) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf078, + "port %8phC state 0x%x flags 0x%x fc4_type 0x%x " + "scan_state %d (initiator mode disabled; skipping " + "login)\n", fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + } + continue; + } + if (fcport->loop_id == FC_NO_LOOP_ID) { fcport->loop_id = next_loopid; rval = qla2x00_find_new_loop_id( @@ -3524,16 +3569,38 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; - /* Find a new loop ID to use. */ - fcport->loop_id = next_loopid; - rval = qla2x00_find_new_loop_id(base_vha, fcport); - if (rval != QLA_SUCCESS) { - /* Ran out of IDs to use */ - break; - } + /* + * If we're not an initiator, skip looking for devices + * and logging in. There's no reason for us to do it, + * and it seems to actively cause problems in target + * mode if we race with the initiator logging into us + * (we might get the "port ID used" status back from + * our login command and log out the initiator, which + * seems to cause havoc). + */ + if (qla_ini_mode_enabled(base_vha)) { + /* Find a new loop ID to use. */ + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id(base_vha, + fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; + } - /* Login and update database */ - qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + /* Login and update database */ + qla2x00_fabric_dev_login(vha, fcport, + &next_loopid); + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf079, + "new port %8phC state 0x%x flags 0x%x fc4_type " + "0x%x scan_state %d (initiator mode disabled; " + "skipping login)\n", + fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + } list_move_tail(&fcport->list, &vha->vp_fcports); } @@ -3729,11 +3796,12 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, fcport->fp_speed = new_fcport->fp_speed; /* - * If address the same and state FCS_ONLINE, nothing - * changed. + * If address the same and state FCS_ONLINE + * (or in target mode), nothing changed. */ if (fcport->d_id.b24 == new_fcport->d_id.b24 && - atomic_read(&fcport->state) == FCS_ONLINE) { + (atomic_read(&fcport->state) == FCS_ONLINE || + !qla_ini_mode_enabled(base_vha))) { break; } @@ -3753,6 +3821,22 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, * Log it out if still logged in and mark it for * relogin later. */ + if (!qla_ini_mode_enabled(base_vha)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf080, + "port changed FC ID, %8phC" + " old %x:%x:%x (loop_id 0x%04x)-> new %x:%x:%x\n", + fcport->port_name, + fcport->d_id.b.domain, + fcport->d_id.b.area, + fcport->d_id.b.al_pa, + fcport->loop_id, + new_fcport->d_id.b.domain, + new_fcport->d_id.b.area, + new_fcport->d_id.b.al_pa); + fcport->d_id.b24 = new_fcport->d_id.b24; + break; + } + fcport->d_id.b24 = new_fcport->d_id.b24; fcport->flags |= FCF_LOGIN_NEEDED; if (fcport->loop_id != FC_NO_LOOP_ID && @@ -3772,6 +3856,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, if (found) continue; /* If device was not in our fcports list, then add it. */ + new_fcport->scan_state = QLA_FCPORT_FOUND; list_add_tail(&new_fcport->list, new_fcports); /* Allocate a new replacement fcport. */ diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 4399f0976eac0..496a733d0ca38 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -113,6 +113,7 @@ static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, struct atio_from_isp *atio, uint16_t status, int qfull); static void qlt_disable_vha(struct scsi_qla_host *vha); +static void qlt_clear_tgt_db(struct qla_tgt *tgt); /* * Global Variables */ @@ -431,10 +432,10 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) loop_id = le16_to_cpu(n->u.isp24.nport_handle); if (loop_id == 0xFFFF) { -#if 0 /* FIXME: Re-enable Global event handling.. */ /* Global event */ - atomic_inc(&ha->tgt.qla_tgt->tgt_global_resets_count); - qlt_clear_tgt_db(ha->tgt.qla_tgt); + atomic_inc(&vha->vha_tgt.qla_tgt->tgt_global_resets_count); + qlt_clear_tgt_db(vha->vha_tgt.qla_tgt); +#if 0 /* FIXME: do we need to choose a session here? */ if (!list_empty(&ha->tgt.qla_tgt->sess_list)) { sess = list_entry(ha->tgt.qla_tgt->sess_list.next, typeof(*sess), sess_list_entry); @@ -788,7 +789,7 @@ void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) if (!vha->hw->tgt.tgt_ops) return; - if (!tgt || (fcport->port_type != FCT_INITIATOR)) + if (!tgt) return; if (tgt->tgt_stop) { From 49aed64d0a9e9bce6e848440ed8bcce2d8fab558 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Mon, 29 Jun 2015 04:07:20 -0400 Subject: [PATCH 0525/2091] drm/nouveau/fbcon/nv11-: correctly account for ring space usage commit d108142c0840ce389cd9898aa76943b3fb430b83 upstream. The RING_SPACE macro accounts how much space is used up so it's important to ask it for the right amount. Incorrect accounting of this can cause page faults down the line as writes are attempted outside of the ring. Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv04_fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index 4ef602c5469d2..495c57644ced9 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -203,7 +203,7 @@ nv04_fbcon_accel_init(struct fb_info *info) if (ret) return ret; - if (RING_SPACE(chan, 49)) { + if (RING_SPACE(chan, 49 + (device->info.chipset >= 0x11 ? 4 : 0))) { nouveau_fbcon_gpu_lockup(info); return 0; } From 140d4baa12b22909676ea2be7cfdc12fa1572720 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 28 Jul 2015 17:20:57 +1000 Subject: [PATCH 0526/2091] drm/nouveau/kms/nv50-: guard against enabling cursor on disabled heads commit 697bb728d9e2367020aa0c5af7363809d7658e43 upstream. Userspace has started doing this, which upsets the display class hw error checking in various unpleasant ways. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nv50_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 7da7958556a3a..981342d142ff6 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -979,7 +979,7 @@ nv50_crtc_cursor_show_hide(struct nouveau_crtc *nv_crtc, bool show, bool update) { struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev); - if (show && nv_crtc->cursor.nvbo) + if (show && nv_crtc->cursor.nvbo && nv_crtc->base.enabled) nv50_crtc_cursor_show(nv_crtc); else nv50_crtc_cursor_hide(nv_crtc); From 53f092abcba808c59ae9d4744175e9c25287a81c Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Wed, 15 Jul 2015 17:18:15 +0200 Subject: [PATCH 0527/2091] drm/nouveau: hold mutex when calling nouveau_abi16_fini() commit ac8c79304280da6ef05c348a9da03ab04898b994 upstream. This was the only access to cli->abi16 without holding the mutex. Signed-off-by: Kamil Dudka Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 89049335b7383..cd6dae08175e4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -863,8 +863,10 @@ nouveau_drm_preclose(struct drm_device *dev, struct drm_file *fpriv) pm_runtime_get_sync(dev->dev); + mutex_lock(&cli->mutex); if (cli->abi16) nouveau_abi16_fini(cli->abi16); + mutex_unlock(&cli->mutex); mutex_lock(&drm->client.mutex); list_del(&cli->head); From 58e8dcc3d24e8115e8d0d988c8a0c68992a1c8fb Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Wed, 15 Jul 2015 22:57:43 +0200 Subject: [PATCH 0528/2091] drm/nouveau/drm/nv04-nv40/instmem: protect access to priv->heap by mutex commit 7512223b1ece29a5968ed8b67ccb891d21b7834b upstream. This fixes the list_del corruption reported at . Signed-off-by: Kamil Dudka Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c index 80614f1b20747..282143f49d72e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c @@ -50,7 +50,12 @@ nv04_instobj_dtor(struct nvkm_object *object) { struct nv04_instmem_priv *priv = (void *)nvkm_instmem(object); struct nv04_instobj_priv *node = (void *)object; + struct nvkm_subdev *subdev = (void *)priv; + + mutex_lock(&subdev->mutex); nvkm_mm_free(&priv->heap, &node->mem); + mutex_unlock(&subdev->mutex); + nvkm_instobj_destroy(&node->base); } @@ -62,6 +67,7 @@ nv04_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nv04_instmem_priv *priv = (void *)nvkm_instmem(parent); struct nv04_instobj_priv *node; struct nvkm_instobj_args *args = data; + struct nvkm_subdev *subdev = (void *)priv; int ret; if (!args->align) @@ -72,8 +78,10 @@ nv04_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; + mutex_lock(&subdev->mutex); ret = nvkm_mm_head(&priv->heap, 0, 1, args->size, args->size, args->align, &node->mem); + mutex_unlock(&subdev->mutex); if (ret) return ret; From ff7f8c6411b43080cd8dc5907e2ef9613934daab Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 29 Jul 2015 11:48:01 +1000 Subject: [PATCH 0529/2091] xfs: remote attribute headers contain an invalid LSN commit e3c32ee9e3e747fec01eb38e6610a9157d44c3ea upstream. In recent testing, a system that crashed failed log recovery on restart with a bad symlink buffer magic number: XFS (vda): Starting recovery (logdev: internal) XFS (vda): Bad symlink block magic! XFS: Assertion failed: 0, file: fs/xfs/xfs_log_recover.c, line: 2060 On examination of the log via xfs_logprint, none of the symlink buffers in the log had a bad magic number, nor were any other types of buffer log format headers mis-identified as symlink buffers. Tracing was used to find the buffer the kernel was tripping over, and xfs_db identified it's contents as: 000: 5841524d 00000000 00000346 64d82b48 8983e692 d71e4680 a5f49e2c b317576e 020: 00000000 00602038 00000000 006034ce d0020000 00000000 4d4d4d4d 4d4d4d4d 040: 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 060: 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d 4d4d4d4d ..... This is a remote attribute buffer, which are notable in that they are not logged but are instead written synchronously by the remote attribute code so that they exist on disk before the attribute transactions are committed to the journal. The above remote attribute block has an invalid LSN in it - cycle 0xd002000, block 0 - which means when log recovery comes along to determine if the transaction that writes to the underlying block should be replayed, it sees a block that has a future LSN and so does not replay the buffer data in the transaction. Instead, it validates the buffer magic number and attaches the buffer verifier to it. It is this buffer magic number check that is failing in the above assert, indicating that we skipped replay due to the LSN of the underlying buffer. The problem here is that the remote attribute buffers cannot have a valid LSN placed into them, because the transaction that contains the attribute tree pointer changes and the block allocation that the attribute data is being written to hasn't yet been committed. Hence the LSN field in the attribute block is completely unwritten, thereby leaving the underlying contents of the block in the LSN field. It could have any value, and hence a future overwrite of the block by log recovery may or may not work correctly. Fix this by always writing an invalid LSN to the remote attribute block, as any buffer in log recovery that needs to write over the remote attribute should occur. We are protected from having old data written over the attribute by the fact that freeing the block before the remote attribute is written will result in the buffer being marked stale in the log and so all changes prior to the buffer stale transaction will be cancelled by log recovery. Hence it is safe to ignore the LSN in the case or synchronously written, unlogged metadata such as remote attribute blocks, and to ensure we do that correctly, we need to write an invalid LSN to all remote attribute blocks to trigger immediate recovery of metadata that is written over the top. As a further protection for filesystems that may already have remote attribute blocks with bad LSNs on disk, change the log recovery code to always trigger immediate recovery of metadata over remote attribute blocks. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr_remote.c | 29 +++++++++++++++++++++++------ fs/xfs/xfs_log_recover.c | 11 ++++++++--- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 20de88d1bf862..2faec26962e89 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + int blksize = mp->m_attr_geo->blksize; char *ptr; int len; xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify( ASSERT(len >= blksize); while (len > 0) { + struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } - if (bip) { - struct xfs_attr3_rmt_hdr *rmt; - rmt = (struct xfs_attr3_rmt_hdr *)ptr; - rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); + /* + * Ensure we aren't writing bogus LSNs to disk. See + * xfs_attr3_rmt_hdr_set() for the explanation. + */ + if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set( rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); + /* + * Remote attribute blocks are written synchronously, so we don't + * have an LSN that we can stamp in them that makes any sense to log + * recovery. To ensure that log recovery handles overwrites of these + * blocks sanely (i.e. once they've been freed and reallocated as some + * other type of metadata) we need to ensure that the LSN has a value + * that tells log recovery to ignore the LSN and overwrite the buffer + * with whatever is in it's log. To do this, we use the magic + * NULLCOMMITLSN to indicate that the LSN is invalid. + */ + rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); + return sizeof(struct xfs_attr3_rmt_hdr); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4f5784f85a5b2..a5d03396dda01 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1887,9 +1887,14 @@ xlog_recover_get_buf_lsn( uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; break; case XFS_ATTR3_RMT_MAGIC: - lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); - uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; - break; + /* + * Remote attr blocks are written synchronously, rather than + * being logged. That means they do not contain a valid LSN + * (i.e. transactionally ordered) in them, and hence any time we + * see a buffer to replay over the top of a remote attribute + * block we should simply do so. + */ + goto recover_immediately; case XFS_SB_MAGIC: lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); uuid = &((struct xfs_dsb *)blk)->sb_uuid; From d00799670008b9d6745e2b4adb1f7e09b3e71069 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 29 Jul 2015 11:48:02 +1000 Subject: [PATCH 0530/2091] xfs: remote attributes need to be considered data commit df150ed102baa0e78c06e08e975dfb47147dd677 upstream. We don't log remote attribute contents, and instead write them synchronously before we commit the block allocation and attribute tree update transaction. As a result we are writing to the allocated space before the allcoation has been made permanent. As a result, we cannot consider this allocation to be a metadata allocation. Metadata allocation can take blocks from the free list and so reuse them before the transaction that freed the block is committed to disk. This behaviour is perfectly fine for journalled metadata changes as log recovery will ensure the free operation is replayed before the overwrite, but for remote attribute writes this is not the case. Hence we have to consider the remote attribute blocks to contain data and allocate accordingly. We do this by dropping the XFS_BMAPI_METADATA flag from the block allocation. This means the allocation will not use blocks that are on the busy list without first ensuring that the freeing transaction has been committed to disk and the blocks removed from the busy list. This ensures we will never overwrite a freed block without first ensuring that it is really free. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr_remote.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 2faec26962e89..dd714037c322d 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -451,14 +451,21 @@ xfs_attr_rmtval_set( /* * Allocate a single extent, up to the size of the value. + * + * Note that we have to consider this a data allocation as we + * write the remote attribute without logging the contents. + * Hence we must ensure that we aren't using blocks that are on + * the busy list so that we don't overwrite blocks which have + * recently been freed but their transactions are not yet + * committed to disk. If we overwrite the contents of a busy + * extent and then crash then the block may not contain the + * correct metadata after log recovery occurs. */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, args->total, &map, &nmap, - args->flist); + blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, + args->total, &map, &nmap, args->flist); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); From ace89c9942b31342c008d26601ad7f89dfeedb06 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 13 Jul 2015 08:21:57 -0300 Subject: [PATCH 0531/2091] perf symbols: Store if there is a filter in place commit 0bc2f2f7d080561cc484d2d0a162a9396bed3383 upstream. When setting yup the symbols library we setup several filter lists, for dsos, comms, symbols, etc, and there is code that, if there are filters, do certain operations, like recalculate the number of non filtered histogram entries in the top/report TUI. But they were considering just the "Zoom" filters, when they need to take into account as well the above mentioned filters (perf top --comms, --dsos, etc). So store in symbol_conf.has_filter true if any of those filters is in place. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-f5edfmhq69vfvs1kmikq1wep@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: Andre Tomt Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/symbol.c | 2 ++ tools/perf/util/symbol.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 201f6c4ca738d..99378a5c57a76 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1893,6 +1893,8 @@ int setup_intlist(struct intlist **list, const char *list_str, pr_err("problems parsing %s list\n", list_name); return -1; } + + symbol_conf.has_filter = true; return 0; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 09561500164a0..be0217989bcce 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,7 +105,8 @@ struct symbol_conf { demangle_kernel, filter_relative, show_hist_headers, - branch_callstack; + branch_callstack, + has_filter; const char *vmlinux_name, *kallsyms_name, *source_prefix, From 352cb8677f83a6cf2139151578c8c79785d2d4bf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Aug 2015 12:22:34 -0700 Subject: [PATCH 0532/2091] Linux 4.1.5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b451c00332e57..068dd690933d4 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 4 +SUBLEVEL = 5 EXTRAVERSION = NAME = Series 4800 From 80e2e4be68de3ab6ae1c93830c786b50f8fa017d Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Tue, 23 Jun 2015 12:02:00 +0100 Subject: [PATCH 0533/2091] MIPS: unaligned: Fix build error on big endian R6 kernels commit 531a6d599f4304156236ebdd531aaa80be61868d upstream. Commit eeb538950367 ("MIPS: unaligned: Prevent EVA instructions on kernel unaligned accesses") renamed the Load* and Store* defines in unaligned.c to _Load* and _Store* as part of its fix. One define was missed out which causes big endian R6 kernels to fail to build. arch/mips/kernel/unaligned.c:880:35: error: implicit declaration of function '_StoreDW' #define StoreDW(addr, value, res) _StoreDW(addr, value, res) ^ Signed-off-by: James Cowgill Fixes: eeb538950367 ("MIPS: unaligned: Prevent EVA instructions on kernel unaligned accesses") Cc: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10575/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/unaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index af84bef0c90de..eb3efd137fd17 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -438,7 +438,7 @@ do { \ : "memory"); \ } while(0) -#define StoreDW(addr, value, res) \ +#define _StoreDW(addr, value, res) \ do { \ __asm__ __volatile__ ( \ ".set\tpush\n\t" \ From 672f42875d1ad80c6f7c58db3a6dcf5b34a57e7c Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Wed, 17 Jun 2015 17:12:50 +0100 Subject: [PATCH 0534/2091] MIPS: Replace add and sub instructions in relocate_kernel.S with addiu commit a4504755e7dc8d43ed2a934397032691cd03adf7 upstream. Fixes the assembler errors generated when compiling a MIPS R6 kernel with CONFIG_KEXEC on, by replacing the offending add and sub instructions with addiu instructions. Build errors: arch/mips/kernel/relocate_kernel.S: Assembler messages: arch/mips/kernel/relocate_kernel.S:27: Error: invalid operands `dadd $16,$16,8' arch/mips/kernel/relocate_kernel.S:64: Error: invalid operands `dadd $20,$20,8' arch/mips/kernel/relocate_kernel.S:65: Error: invalid operands `dadd $18,$18,8' arch/mips/kernel/relocate_kernel.S:66: Error: invalid operands `dsub $22,$22,1' scripts/Makefile.build:294: recipe for target 'arch/mips/kernel/relocate_kernel.o' failed Signed-off-by: James Cowgill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10558/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/relocate_kernel.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index 74bab9ddd0e19..c6bbf21650515 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -24,7 +24,7 @@ LEAF(relocate_new_kernel) process_entry: PTR_L s2, (s0) - PTR_ADD s0, s0, SZREG + PTR_ADDIU s0, s0, SZREG /* * In case of a kdump/crash kernel, the indirection page is not @@ -61,9 +61,9 @@ copy_word: /* copy page word by word */ REG_L s5, (s2) REG_S s5, (s4) - PTR_ADD s4, s4, SZREG - PTR_ADD s2, s2, SZREG - LONG_SUB s6, s6, 1 + PTR_ADDIU s4, s4, SZREG + PTR_ADDIU s2, s2, SZREG + LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word b process_entry From 00bccfd7af02de130a6fbebb3095688a439dc561 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Jul 2015 15:54:41 +0100 Subject: [PATCH 0535/2091] MIPS: Malta: Don't reinitialise RTC commit 106eccb4d20f35ebc58ff2286c170d9e79c5ff68 upstream. On Malta, since commit a87ea88d8f6c ("MIPS: Malta: initialise the RTC at boot"), the RTC is reinitialised and forced into binary coded decimal (BCD) mode during init, even if the bootloader has already initialised it, and may even have already put it into binary mode (as YAMON does). This corrupts the current time, can result in the RTC seconds being an invalid BCD (e.g. 0x1a..0x1f) for up to 6 seconds, as well as confusing YAMON for a while after reset, enough for it to report timeouts when attempting to load from TFTP (it actually uses the RTC in that code). Therefore only initialise the RTC to the extent that is necessary so that Linux avoids interfering with the bootloader setup, while also allowing it to estimate the CPU frequency without hanging, without a bootloader necessarily having done anything with the RTC (for example when the kernel is loaded via EJTAG). The divider control is configured for a 32KHZ reference clock if necessary, and the SET bit of the RTC_CONTROL register is cleared if necessary without changing any other bits (this bit will be set when coming out of reset if the battery has been disconnected). Fixes: a87ea88d8f6c ("MIPS: Malta: initialise the RTC at boot") Signed-off-by: James Hogan Reviewed-by: Paul Burton Cc: Ralf Baechle Cc: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10739/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mti-malta/malta-time.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f459..ac42a8d72bcbb 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -165,14 +165,17 @@ unsigned int get_c0_compare_int(void) static void __init init_rtc(void) { - /* stop the clock whilst setting it up */ - CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL); + unsigned char freq, ctrl; - /* 32KHz time base */ - CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); + /* Set 32KHz time base if not already set */ + freq = CMOS_READ(RTC_FREQ_SELECT); + if ((freq & RTC_DIV_CTL) != RTC_REF_CLCK_32KHZ) + CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); - /* start the clock */ - CMOS_WRITE(RTC_24H, RTC_CONTROL); + /* Ensure SET bit is clear so RTC can run */ + ctrl = CMOS_READ(RTC_CONTROL); + if (ctrl & RTC_SET) + CMOS_WRITE(ctrl & ~RTC_SET, RTC_CONTROL); } void __init plat_time_init(void) From 6300e59fa9852292d2fafc733e75b449b2286d8e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 19 Jul 2015 00:38:41 +0200 Subject: [PATCH 0536/2091] MIPS: Fix sched_getaffinity with MT FPAFF enabled commit 1d62d737555e1378eb62a8bba26644f7d97139d2 upstream. p->thread.user_cpus_allowed is zero-initialized and is only filled on the first sched_setaffinity call. To avoid adding overhead in the task initialization codepath, simply OR the returned mask in sched_getaffinity with p->cpus_allowed. Signed-off-by: Felix Fietkau Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10740/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/mips-mt-fpaff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 3e4491aa6d6b2..789d7bf4fef32 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -154,7 +154,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { unsigned int real_len; - cpumask_t mask; + cpumask_t allowed, mask; int retval; struct task_struct *p; @@ -173,7 +173,8 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_and(&mask, &p->thread.user_cpus_allowed, cpu_possible_mask); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: read_unlock(&tasklist_lock); From 6adda660861c3dbdc41629bef2b9d6ffad783be6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 23 Jul 2015 18:59:52 +0200 Subject: [PATCH 0537/2091] MIPS: Export get_c0_perfcount_int() commit 0cb0985f57783c2f3c6c8ffe7e7665e80c56bd92 upstream. get_c0_perfcount_int is tested from oprofile code. If oprofile is compiled as module, get_c0_perfcount_int needs to be exported, otherwise it cannot be resolved. Fixes: a669efc4a3b4 ("MIPS: Add hook to get C0 performance counter interrupt") Signed-off-by: Felix Fietkau Cc: linux-mips@linux-mips.org Cc: abrestic@chromium.org Patchwork: https://patchwork.linux-mips.org/patch/10763/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/setup.c | 1 + arch/mips/lantiq/irq.c | 1 + arch/mips/mti-malta/malta-time.c | 1 + arch/mips/mti-sead3/sead3-time.c | 1 + arch/mips/pistachio/time.c | 1 + arch/mips/ralink/irq.c | 1 + 6 files changed, 6 insertions(+) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 7fc8397d16f21..fd2a36a79f97c 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -186,6 +186,7 @@ int get_c0_perfcount_int(void) { return ATH79_MISC_IRQ(5); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 6ab10573490de..d01ade63492fd 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -466,6 +466,7 @@ int get_c0_perfcount_int(void) { return ltq_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index ac42a8d72bcbb..a7f7d9ffb4025 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -148,6 +148,7 @@ int get_c0_perfcount_int(void) return mips_cpu_perf_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index e1d69895fb1de..a120b7a5a8fe4 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -77,6 +77,7 @@ int get_c0_perfcount_int(void) return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; return -1; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa8..ab73f6f405bb0 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -26,6 +26,7 @@ int get_c0_perfcount_int(void) { return gic_get_c0_perfcount_int(); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); void __init plat_time_init(void) { diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 7cf91b92e9d10..199ace4ca1ad6 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -89,6 +89,7 @@ int get_c0_perfcount_int(void) { return rt_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { From 47650397eb480f99a1ae9c43e5d79dbb220b94c3 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 27 Jul 2015 13:50:21 +0100 Subject: [PATCH 0538/2091] MIPS: do_mcheck: Fix kernel code dump with EVA commit 55c723e181ccec30fb5c672397fe69ec35967d97 upstream. If a machine check exception is raised in kernel mode, user context, with EVA enabled, then the do_mcheck handler will attempt to read the code around the EPC using EVA load instructions, i.e. as if the reads were from user mode. This will either read random user data if the process has anything mapped at the same address, or it will cause an exception which is handled by __get_user, resulting in this output: Code: (Bad address in epc) Fix by setting the current user access mode to kernel if the saved register context indicates the exception was taken in kernel mode. This causes __get_user to use normal loads to read the kernel code. Signed-off-by: James Hogan Cc: Markos Chandras Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10777/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/traps.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d2d1c1933bc9f..4db525cc87358 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1518,6 +1518,7 @@ asmlinkage void do_mcheck(struct pt_regs *regs) const int field = 2 * sizeof(unsigned long); int multi_match = regs->cp0_status & ST0_TS; enum ctx_state prev_state; + mm_segment_t old_fs = get_fs(); prev_state = exception_enter(); show_regs(regs); @@ -1539,8 +1540,13 @@ asmlinkage void do_mcheck(struct pt_regs *regs) dump_tlb_all(); } + if (!user_mode(regs)) + set_fs(KERNEL_DS); + show_code((unsigned int __user *) regs->cp0_epc); + set_fs(old_fs); + /* * Some chips may have other causes of machine check (e.g. SB1 * graduation timer) From c6adca1ab5a611c46f7acdb4ef4990966d34184b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 27 Jul 2015 13:50:22 +0100 Subject: [PATCH 0539/2091] MIPS: show_stack: Fix stack trace with EVA commit 1e77863a51698c4319587df34171bd823691a66a upstream. The show_stack() function deals exclusively with kernel contexts, but if it gets called in user context with EVA enabled, show_stacktrace() will attempt to access the stack using EVA accesses, which will either read other user mapped data, or more likely cause an exception which will be handled by __get_user(). This is easily reproduced using SysRq t to show all task states, which results in the following stack dump output: Stack : (Bad stack address) Fix by setting the current user access mode to kernel around the call to show_stacktrace(). This causes __get_user() to use normal loads to read the kernel stack. Now we get the correct output, like this: Stack : 00000000 80168960 00000000 004a0000 00000000 00000000 8060016c 1f3abd0c 1f172cd8 8056f09c 7ff1e450 8014fc3c 00000001 806dd0b0 0000001d 00000002 1f17c6a0 1f17c804 1f17c6a0 8066f6e0 00000000 0000000a 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0110e800 1f3abd6c 1f17c6a0 ... Signed-off-by: James Hogan Cc: Markos Chandras Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10778/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/traps.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 4db525cc87358..5f5f44edc77d4 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -192,6 +192,7 @@ static void show_stacktrace(struct task_struct *task, void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; + mm_segment_t old_fs = get_fs(); if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; @@ -210,7 +211,13 @@ void show_stack(struct task_struct *task, unsigned long *sp) prepare_frametrace(®s); } } + /* + * show_stack() deals exclusively with kernel mode, so be sure to access + * the stack in the kernel (not user) address space. + */ + set_fs(KERNEL_DS); show_stacktrace(task, ®s); + set_fs(old_fs); } static void show_code(unsigned int __user *pc) From 701af4aeccff8d196859dd66b1e298cacd454414 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 28 Jul 2015 19:24:24 -0700 Subject: [PATCH 0540/2091] Revert "MIPS: BCM63xx: Provide a plat_post_dma_flush hook" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 247bfb65d731350093f5d1a0a8b3d65e49c17baa upstream. This reverts commit 3cf29543413207d3ab1c3f62a88c09bb46f2264e ("MIPS: BCM63xx: Provide a plat_post_dma_flush hook") since this commit was found to prevent BCM6358 (early BMIPS4350 cores) and some BCM6368 (BMIPS4380 cores) from booting reliably. Alvaro was able to track this down to an issue specifically located to devices that use the second thread (TP1) when booting. Since BCM63xx did not have a need for plat_post_dma_flush() hook before, let's just keep things the way they were. Reported-by: Álvaro Fernández Rojas Reported-by: Jonas Gorski Signed-off-by: Florian Fainelli Cc: Kevin Cernekee Cc: Nicolas Schichan Cc: linux-mips@linux-mips.org Cc: blogic@openwrt.org Cc: noltari@gmail.com Cc: jogo@openwrt.org Cc: Florian Fainelli Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10804/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mach-bcm63xx/dma-coherence.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 arch/mips/include/asm/mach-bcm63xx/dma-coherence.h diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h deleted file mode 100644 index 11d3b572b1b3d..0000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H -#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H - -#include - -#define plat_post_dma_flush bmips_post_dma_flush - -#include - -#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */ From 9bc226acd9f692206851b9323d42c7dc02c2dbdd Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Jul 2015 16:29:38 +0100 Subject: [PATCH 0541/2091] MIPS: Flush RPS on kernel entry with EVA commit 3aff47c062b944a5e1f9af56a37a23f5295628fc upstream. When EVA is enabled, flush the Return Prediction Stack (RPS) present on some MIPS cores on entry to the kernel from user mode. This is important specifically for interAptiv with EVA enabled, otherwise kernel mode RPS mispredicts may trigger speculative fetches of user return addresses, which may be sensitive in the kernel address space due to EVA's overlapping user/kernel address spaces. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Markos Chandras Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10812/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/stackframe.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 28d6d9364bd1f..a71da576883c8 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -152,6 +152,31 @@ .set noreorder bltz k0, 8f move k1, sp +#ifdef CONFIG_EVA + /* + * Flush interAptiv's Return Prediction Stack (RPS) by writing + * EntryHi. Toggling Config7.RPS is slower and less portable. + * + * The RPS isn't automatically flushed when exceptions are + * taken, which can result in kernel mode speculative accesses + * to user addresses if the RPS mispredicts. That's harmless + * when user and kernel share the same address space, but with + * EVA the same user segments may be unmapped to kernel mode, + * even containing sensitive MMIO regions or invalid memory. + * + * This can happen when the kernel sets the return address to + * ret_from_* and jr's to the exception handler, which looks + * more like a tail call than a function call. If nested calls + * don't evict the last user address in the RPS, it will + * mispredict the return and fetch from a user controlled + * address into the icache. + * + * More recent EVA-capable cores with MAAR to restrict + * speculative accesses aren't affected. + */ + MFC0 k0, CP0_ENTRYHI + MTC0 k0, CP0_ENTRYHI +#endif .set reorder /* Called from user mode, new stack. */ get_saved_sp From f797296322df8950e358d18e1f0168fe8d7a9e24 Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 3 Aug 2015 17:48:43 -0700 Subject: [PATCH 0542/2091] MIPS: Make set_pte() SMP safe. commit 46011e6ea39235e4aca656673c500eac81a07a17 upstream. On MIPS the GLOBAL bit of the PTE must have the same value in any aligned pair of PTEs. These pairs of PTEs are referred to as "buddies". In a SMP system is is possible for two CPUs to be calling set_pte() on adjacent PTEs at the same time. There is a race between setting the PTE and a different CPU setting the GLOBAL bit in its buddy PTE. This race can be observed when multiple CPUs are executing vmap()/vfree() at the same time. Make setting the buddy PTE's GLOBAL bit an atomic operation to close the race condition. The case of CONFIG_64BIT_PHYS_ADDR && CONFIG_CPU_MIPS32 is *not* handled. Signed-off-by: David Daney Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10835/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/pgtable.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 819af9d057a8b..70f6e7f073b04 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -182,8 +182,39 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ +#ifdef CONFIG_64BIT +#define LL_INSN "lld" +#define SC_INSN "scd" +#else /* CONFIG_32BIT */ +#define LL_INSN "ll" +#define SC_INSN "sc" +#endif + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + " .set push\n" + " .set noreorder\n" + "1: " LL_INSN " %[tmp], %[buddy]\n" + " bnez %[tmp], 2f\n" + " or %[tmp], %[tmp], %[global]\n" + " " SC_INSN " %[tmp], %[buddy]\n" + " beqz %[tmp], 1b\n" + " nop\n" + "2:\n" + " .set pop" + : [buddy] "+m" (buddy->pte), + [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } #endif } From 14e69b5a7229454f662b976d4518e84f6eb7d825 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 6 Aug 2015 15:46:42 -0700 Subject: [PATCH 0543/2091] fsnotify: fix oops in fsnotify_clear_marks_by_group_flags() commit 8f2f3eb59dff4ec538de55f2e0592fec85966aab upstream. fsnotify_clear_marks_by_group_flags() can race with fsnotify_destroy_marks() so that when fsnotify_destroy_mark_locked() drops mark_mutex, a mark from the list iterated by fsnotify_clear_marks_by_group_flags() can be freed and thus the next entry pointer we have cached may become stale and we dereference free memory. Fix the problem by first moving marks to free to a special private list and then always free the first entry in the special list. This method is safe even when entries from the list can disappear once we drop the lock. Signed-off-by: Jan Kara Reported-by: Ashish Sangwan Reviewed-by: Ashish Sangwan Cc: Lino Sanfilippo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/notify/mark.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 92e48c70f0f05..39ddcaf0918f1 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags) { struct fsnotify_mark *lmark, *mark; + LIST_HEAD(to_free); + /* + * We have to be really careful here. Anytime we drop mark_mutex, e.g. + * fsnotify_clear_marks_by_inode() can come and free marks. Even in our + * to_free list so we have to use mark_mutex even when accessing that + * list. And freeing mark requires us to drop mark_mutex. So we can + * reliably free only the first mark in the list. That's why we first + * move marks to free to to_free list in one go and then free marks in + * to_free list one by one. + */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) { - fsnotify_get_mark(mark); - fsnotify_destroy_mark_locked(mark, group); - fsnotify_put_mark(mark); - } + if (mark->flags & flags) + list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); + + while (1) { + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + if (list_empty(&to_free)) { + mutex_unlock(&group->mark_mutex); + break; + } + mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + fsnotify_get_mark(mark); + fsnotify_destroy_mark_locked(mark, group); + mutex_unlock(&group->mark_mutex); + fsnotify_put_mark(mark); + } } /* From 57689a9f86b856fcdfd3102cb95b7e930856524a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 28 Jun 2015 09:19:26 +0100 Subject: [PATCH 0544/2091] drm/i915: Declare the swizzling unknown for L-shaped configurations commit 5eb3e5a5e11d14f9deb2a4b83555443b69ab9940 upstream. The old style of memory interleaving swizzled upto the end of the first even bank of memory, and then used the remainder as unswizzled on the unpaired bank - i.e. swizzling is not constant for all memory. This causes problems when we try to migrate memory and so the kernel prevents migration at all when we detect L-shaped inconsistent swizzling. However, this issue also extends to userspace who try to manually detile into memory as the swizzling for an individual page is unknown (it depends on its physical address only known to the kernel), userspace cannot correctly swizzle. Note that this is a new attempt for the previously merged one, reverted in commit d82c0ba6e306f079407f07003e53c262d683397b Author: Daniel Vetter Date: Tue Jul 14 12:29:27 2015 +0200 Revert "drm/i915: Declare the swizzling unknown for L-shaped configurations" This is cc: stable since we need it to fix up troubles with wc cpu mmaps that userspace recently started to use widely. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91105 Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Add note about previous (failed attempt).] Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_tiling.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6377b22269ad1..7ee23d1d1e744 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -464,7 +464,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, } /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ - args->phys_swizzle_mode = args->swizzle_mode; + if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) + args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; + else + args->phys_swizzle_mode = args->swizzle_mode; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) From 7f62822ffdb19561d89f9da7956d125f95300eed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Jul 2015 09:50:42 +0100 Subject: [PATCH 0545/2091] drm/i915: Replace WARN inside I915_READ64_2x32 with retry loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ee0a227b7ac6e75f28e10269f81c7ec6eb600952 upstream. Since we may conceivably encounter situations where the upper part of the 64bit register changes between reads, for example when a timestamp counter overflows, change the WARN into a retry loop. Signed-off-by: Chris Wilson Cc: Michał Winiarski Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8ae6f7f06b3a0..683a9b004c117 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3190,15 +3190,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) #define I915_READ64_2x32(lower_reg, upper_reg) ({ \ - u32 upper = I915_READ(upper_reg); \ - u32 lower = I915_READ(lower_reg); \ - u32 tmp = I915_READ(upper_reg); \ - if (upper != tmp) { \ - upper = tmp; \ - lower = I915_READ(lower_reg); \ - WARN_ON(I915_READ(upper_reg) != upper); \ - } \ - (u64)upper << 32 | lower; }) + u32 upper, lower, tmp; \ + tmp = I915_READ(upper_reg); \ + do { \ + upper = tmp; \ + lower = I915_READ(lower_reg); \ + tmp = I915_READ(upper_reg); \ + } while (upper != tmp); \ + (u64)upper << 32 | lower; }) #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) From 6f9dbb525abc7b21b76bdee6558b6e706425d1a8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 23 Jul 2015 10:01:09 -0400 Subject: [PATCH 0546/2091] drm/radeon: rework audio detect (v4) commit d0ea397e22f9ad0113c1dbdaab14eded050472eb upstream. 1. Always assign audio function pointers even if the display does not support audio. We need to properly disable the audio stream when when using a non-audio capable monitor. Fixes purple line on some hdmi monitors. 2. Check if a pin is in use by another encoder before disabling it. v2: make sure we've fetched the edid before checking audio and look up the encoder before calling audio_detect since connector->encoder may not be assigned yet. Separate pin and afmt. They are allocated at different times and have no dependency on eachother. v3: fix connector fetching in encoder functions v4: fix missed dig->pin check in dce6_afmt_write_latency_fields bugs: https://bugzilla.kernel.org/show_bug.cgi?id=93701 https://bugzilla.redhat.com/show_bug.cgi?id=1236337 https://bugs.freedesktop.org/show_bug.cgi?id=91041 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/dce6_afmt.c | 62 ++++----- drivers/gpu/drm/radeon/radeon_audio.c | 143 +++++++++------------ drivers/gpu/drm/radeon/radeon_audio.h | 3 +- drivers/gpu/drm/radeon/radeon_connectors.c | 18 ++- drivers/gpu/drm/radeon/radeon_mode.h | 2 +- 5 files changed, 105 insertions(+), 123 deletions(-) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 68fd9fc677e35..44480c1b9738c 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -93,30 +93,26 @@ void dce6_afmt_select_pin(struct drm_encoder *encoder) struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->offset; - - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, - AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); + WREG32(AFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, + AFMT_AUDIO_SRC_SELECT(dig->pin->id)); } void dce6_afmt_write_latency_fields(struct drm_encoder *encoder, - struct drm_connector *connector, struct drm_display_mode *mode) + struct drm_connector *connector, + struct drm_display_mode *mode) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 tmp = 0, offset; + u32 tmp = 0; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) { if (connector->latency_present[1]) tmp = VIDEO_LIPSYNC(connector->video_latency[1]) | @@ -130,24 +126,24 @@ void dce6_afmt_write_latency_fields(struct drm_encoder *encoder, else tmp = VIDEO_LIPSYNC(0) | AUDIO_LIPSYNC(0); } - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); } void dce6_afmt_hdmi_write_speaker_allocation(struct drm_encoder *encoder, - u8 *sadb, int sad_count) + u8 *sadb, int sad_count) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset, tmp; + u32 tmp; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - /* program the speaker allocation */ - tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = RREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); /* set HDMI mode */ tmp |= HDMI_CONNECTION; @@ -155,24 +151,24 @@ void dce6_afmt_hdmi_write_speaker_allocation(struct drm_encoder *encoder, tmp |= SPEAKER_ALLOCATION(sadb[0]); else tmp |= SPEAKER_ALLOCATION(5); /* stereo */ - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); } void dce6_afmt_dp_write_speaker_allocation(struct drm_encoder *encoder, - u8 *sadb, int sad_count) + u8 *sadb, int sad_count) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset, tmp; + u32 tmp; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - /* program the speaker allocation */ - tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = RREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); tmp &= ~(HDMI_CONNECTION | SPEAKER_ALLOCATION_MASK); /* set DP mode */ tmp |= DP_CONNECTION; @@ -180,13 +176,13 @@ void dce6_afmt_dp_write_speaker_allocation(struct drm_encoder *encoder, tmp |= SPEAKER_ALLOCATION(sadb[0]); else tmp |= SPEAKER_ALLOCATION(5); /* stereo */ - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); } void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, - struct cea_sad *sads, int sad_count) + struct cea_sad *sads, int sad_count) { - u32 offset; int i; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; @@ -206,11 +202,9 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 value = 0; u8 stereo_freqs = 0; @@ -237,7 +231,7 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, value |= SUPPORTED_FREQUENCIES_STEREO(stereo_freqs); - WREG32_ENDPOINT(offset, eld_reg_to_type[i][0], value); + WREG32_ENDPOINT(dig->pin->offset, eld_reg_to_type[i][0], value); } } @@ -253,7 +247,7 @@ void dce6_audio_enable(struct radeon_device *rdev, } void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { /* Two dtos; generally use dto0 for HDMI */ u32 value = 0; @@ -272,7 +266,7 @@ void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, } void dce6_dp_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { /* Two dtos; generally use dto1 for DP */ u32 value = 0; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index fa719c53449bc..59b3d3221294c 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -245,6 +245,28 @@ static struct radeon_audio_funcs dce6_dp_funcs = { static void radeon_audio_enable(struct radeon_device *rdev, struct r600_audio_pin *pin, u8 enable_mask) { + struct drm_encoder *encoder; + struct radeon_encoder *radeon_encoder; + struct radeon_encoder_atom_dig *dig; + int pin_count = 0; + + if (!pin) + return; + + if (rdev->mode_info.mode_config_initialized) { + list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + if (radeon_encoder_is_digital(encoder)) { + radeon_encoder = to_radeon_encoder(encoder); + dig = radeon_encoder->enc_priv; + if (dig->pin == pin) + pin_count++; + } + } + + if ((pin_count > 1) && (enable_mask == 0)) + return; + } + if (rdev->audio.funcs->enable) rdev->audio.funcs->enable(rdev, pin, enable_mask); } @@ -336,24 +358,13 @@ void radeon_audio_endpoint_wreg(struct radeon_device *rdev, u32 offset, static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) { - struct radeon_encoder *radeon_encoder; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct cea_sad *sads; int sad_count; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } sad_count = drm_edid_to_sad(radeon_connector_edid(connector), &sads); if (sad_count <= 0) { @@ -362,8 +373,6 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) } BUG_ON(!sads); - radeon_encoder = to_radeon_encoder(encoder); - if (radeon_encoder->audio && radeon_encoder->audio->write_sad_regs) radeon_encoder->audio->write_sad_regs(encoder, sads, sad_count); @@ -372,27 +381,16 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) static void radeon_audio_write_speaker_allocation(struct drm_encoder *encoder) { + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; u8 *sadb = NULL; int sad_count; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } - sad_count = drm_edid_to_speaker_allocation( - radeon_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), + &sadb); if (sad_count < 0) { DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); @@ -406,26 +404,13 @@ static void radeon_audio_write_speaker_allocation(struct drm_encoder *encoder) } static void radeon_audio_write_latency_fields(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { - struct radeon_encoder *radeon_encoder; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = 0; - - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } - - radeon_encoder = to_radeon_encoder(encoder); if (radeon_encoder->audio && radeon_encoder->audio->write_latency_fields) radeon_encoder->audio->write_latency_fields(encoder, connector, mode); @@ -451,29 +436,23 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder) } void radeon_audio_detect(struct drm_connector *connector, + struct drm_encoder *encoder, enum drm_connector_status status) { - struct radeon_device *rdev; - struct radeon_encoder *radeon_encoder; + struct drm_device *dev = connector->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig; - if (!connector || !connector->encoder) + if (!radeon_audio_chipset_supported(rdev)) return; - rdev = connector->encoder->dev->dev_private; - - if (!radeon_audio_chipset_supported(rdev)) + if (!radeon_encoder_is_digital(encoder)) return; - radeon_encoder = to_radeon_encoder(connector->encoder); dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { - radeon_encoder->audio = NULL; - return; - } - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -486,11 +465,17 @@ void radeon_audio_detect(struct drm_connector *connector, radeon_encoder->audio = rdev->audio.hdmi_funcs; } - dig->afmt->pin = radeon_audio_get_pin(connector->encoder); - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (!dig->pin) + dig->pin = radeon_audio_get_pin(encoder); + radeon_audio_enable(rdev, dig->pin, 0xf); + } else { + radeon_audio_enable(rdev, dig->pin, 0); + dig->pin = NULL; + } } else { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; + radeon_audio_enable(rdev, dig->pin, 0); + dig->pin = NULL; } } @@ -518,29 +503,18 @@ static void radeon_audio_set_dto(struct drm_encoder *encoder, unsigned int clock } static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; struct hdmi_avi_infoframe frame; int err; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return -ENOENT; - } + if (!connector) + return -EINVAL; err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); if (err < 0) { @@ -563,8 +537,8 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, return err; } - if (dig && dig->afmt && - radeon_encoder->audio && radeon_encoder->audio->set_avi_packet) + if (dig && dig->afmt && radeon_encoder->audio && + radeon_encoder->audio->set_avi_packet) radeon_encoder->audio->set_avi_packet(rdev, dig->afmt->offset, buffer, sizeof(buffer)); @@ -745,7 +719,7 @@ static void radeon_audio_hdmi_mode_set(struct drm_encoder *encoder, } static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; @@ -756,6 +730,9 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; + if (!connector) + return; + if (!dig || !dig->afmt) return; @@ -774,7 +751,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, } void radeon_audio_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 8438304f71395..059cc3012062a 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -68,7 +68,8 @@ struct radeon_audio_funcs int radeon_audio_init(struct radeon_device *rdev); void radeon_audio_detect(struct drm_connector *connector, - enum drm_connector_status status); + struct drm_encoder *encoder, + enum drm_connector_status status); u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev, u32 offset, u32 reg); void radeon_audio_endpoint_wreg(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index cebb65e07e1d1..94b21ae70ef72 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1379,8 +1379,16 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) /* updated in get modes as well since we need to know if it's analog or digital */ radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) - radeon_audio_detect(connector, ret); + if ((radeon_audio != 0) && radeon_connector->use_digital) { + const struct drm_connector_helper_funcs *connector_funcs = + connector->helper_private; + + encoder = connector_funcs->best_encoder(connector); + if (encoder && (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)) { + radeon_connector_get_edid(connector); + radeon_audio_detect(connector, encoder, ret); + } + } exit: pm_runtime_mark_last_busy(connector->dev->dev); @@ -1717,8 +1725,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) - radeon_audio_detect(connector, ret); + if ((radeon_audio != 0) && encoder) { + radeon_connector_get_edid(connector); + radeon_audio_detect(connector, encoder, ret); + } out: pm_runtime_mark_last_busy(connector->dev->dev); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index f01c797b78cf0..9af2d8398e90f 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -237,7 +237,6 @@ struct radeon_afmt { int offset; bool last_buffer_filled_status; int id; - struct r600_audio_pin *pin; }; struct radeon_mode_info { @@ -439,6 +438,7 @@ struct radeon_encoder_atom_dig { uint8_t backlight_level; int panel_mode; struct radeon_afmt *afmt; + struct r600_audio_pin *pin; int active_mst_links; }; From 7b449d0568bc9522969009d52337b46f6ce8a2f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Jul 2015 19:24:31 -0400 Subject: [PATCH 0547/2091] drm/radeon/combios: add some validation of lvds values commit 0a90a0cff9f429f886f423967ae053150dce9259 upstream. Fixes a broken hsync start value uncovered by: abc0b1447d4974963548777a5ba4a4457c82c426 (drm: Perform basic sanity checks on probed modes) The driver handled the bad hsync start elsewhere, but the above commit prevented it from getting added. bug: https://bugs.freedesktop.org/show_bug.cgi?id=91401 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_combios.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 3e5f6b71f3ada..c097d3a82bda7 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -1255,10 +1255,15 @@ struct radeon_encoder_lvds *radeon_combios_get_lvds_info(struct radeon_encoder if ((RBIOS16(tmp) == lvds->native_mode.hdisplay) && (RBIOS16(tmp + 2) == lvds->native_mode.vdisplay)) { + u32 hss = (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) - 1) * 8; + + if (hss > lvds->native_mode.hdisplay) + hss = (10 - 1) * 8; + lvds->native_mode.htotal = lvds->native_mode.hdisplay + (RBIOS16(tmp + 17) - RBIOS16(tmp + 19)) * 8; lvds->native_mode.hsync_start = lvds->native_mode.hdisplay + - (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) - 1) * 8; + hss; lvds->native_mode.hsync_end = lvds->native_mode.hsync_start + (RBIOS8(tmp + 23) * 8); From 220dad5a140841103c0f25ccc659e4ec6035dc61 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 3 Aug 2015 17:24:10 +0200 Subject: [PATCH 0548/2091] drm/dp-mst: Remove debug WARN_ON commit 42639ba554655c280ae6cb72df0522b1201f2961 upstream. Apparently been in there since forever and fairly easy to hit when hotplugging really fast. I can do that since my mst hub has a manual button to flick the hpd line for reprobing. The resulting WARNING spam isn't pretty. Cc: Dave Airlie Reviewed-by: Thierry Reding Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 778bbb6425b80..b0487c9f018cf 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1294,7 +1294,6 @@ static int drm_dp_send_sideband_msg(struct drm_dp_mst_topology_mgr *mgr, goto retry; } DRM_DEBUG_KMS("failed to dpcd write %d %d\n", tosend, ret); - WARN(1, "fail\n"); return -EIO; } From 65f3a9d893f17796575aaa0c43898edf1c709eb6 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 14 Jul 2015 11:41:29 -0500 Subject: [PATCH 0549/2091] ipr: Fix locking for unit attention handling commit 36b8e180e1e929e00b351c3b72aab3147fc14116 upstream. Make sure we have the host lock held when calling scsi_report_bus_reset. Fixes a crash seen as the __devices list in the scsi host was changing as we were iterating through it. Reviewed-by: Wen Xiong Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Brian King Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 882744852aacb..486ffaf5109a9 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6263,21 +6263,23 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long hrrq_flags; + unsigned long lock_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, lock_flags); list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, lock_flags); } else { - spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock(&ipr_cmd->hrrq->_lock); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); + spin_unlock(&ipr_cmd->hrrq->_lock); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); } } From b91250c3f7be2ac7705027f06a0edb98194c33ed Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 14 Jul 2015 11:41:31 -0500 Subject: [PATCH 0550/2091] ipr: Fix incorrect trace indexing commit bb7c54339e6a10ecce5c4961adf5e75b3cf0af30 upstream. When ipr's internal driver trace was changed to an atomic, a signed/unsigned bug slipped in which results in us indexing backwards in our memory buffer writing on memory that does not belong to us. This patch fixes this by removing the modulo and instead just mask off the low bits. Tested-by: Wen Xiong Reviewed-by: Wen Xiong Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Brian King Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 5 +++-- drivers/scsi/ipr.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 486ffaf5109a9..7ae9cd743eb27 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -599,9 +599,10 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, { struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + unsigned int trace_index; - trace_entry = &ioa_cfg->trace[atomic_add_return - (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; + trace_index = atomic_add_return(1, &ioa_cfg->trace_index) & IPR_TRACE_INDEX_MASK; + trace_entry = &ioa_cfg->trace[trace_index]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 73790a1d09690..6b97ee45c7b46 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1486,6 +1486,7 @@ struct ipr_ioa_cfg { #define IPR_NUM_TRACE_INDEX_BITS 8 #define IPR_NUM_TRACE_ENTRIES (1 << IPR_NUM_TRACE_INDEX_BITS) +#define IPR_TRACE_INDEX_MASK (IPR_NUM_TRACE_ENTRIES - 1) #define IPR_TRACE_SIZE (sizeof(struct ipr_trace_entry) * IPR_NUM_TRACE_ENTRIES) char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" From ccd98d0cc1bd2c31a065258256727217e112db96 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 14 Jul 2015 11:41:33 -0500 Subject: [PATCH 0551/2091] ipr: Fix invalid array indexing for HRRQ commit 3f1c0581310d5d94bd72740231507e763a6252a4 upstream. Fixes another signed / unsigned array indexing bug in the ipr driver. Currently, when hrrq_index wraps, it becomes a negative number. We do the modulo, but still have a negative number, so we end up indexing backwards in the array. Given where the hrrq array is located in memory, we probably won't actually reference memory we don't own, but nonetheless ipr is still looking at data within struct ipr_ioa_cfg and interpreting it as struct ipr_hrr_queue data, so bad things could certainly happen. Each ipr adapter has anywhere from 1 to 16 HRRQs. By default, we use 2 on new adapters. Let's take an example: Assume ioa_cfg->hrrq_index=0x7fffffffe and ioa_cfg->hrrq_num=4: The atomic_add_return will then return -1. We mod this with 3 and get -2, add one and get -1 for an array index. On adapters which support more than a single HRRQ, we dedicate HRRQ to adapter initialization and error interrupts so that we can optimize the other queues for fast path I/O. So all normal I/O uses HRRQ 1-15. So we want to spread the I/O requests across those HRRQs. With the default module parameter settings, this bug won't hit, only when someone sets the ipr.number_of_msix parameter to a value larger than 3 is when bad things start to happen. Tested-by: Wen Xiong Reviewed-by: Wen Xiong Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Brian King Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 7ae9cd743eb27..a9aa38903efef 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -1052,10 +1052,15 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) { + unsigned int hrrq; + if (ioa_cfg->hrrq_num == 1) - return 0; - else - return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; + hrrq = 0; + else { + hrrq = atomic_add_return(1, &ioa_cfg->hrrq_index); + hrrq = (hrrq % (ioa_cfg->hrrq_num - 1)) + 1; + } + return hrrq; } /** From 4e200fcacc37eebe93acfc3d751b6b7b79bf85a1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 20 Jul 2015 20:31:25 +0300 Subject: [PATCH 0552/2091] Bluetooth: Fix NULL pointer dereference in smp_conn_security commit 25ba265390c09b0a2b2f3fd9ba82e37248b7a371 upstream. The l2cap_conn->smp pointer may be NULL for various valid reasons where SMP has failed to initialize properly. One such scenario is when crypto support is missing, another when the adapter has been powered on through a legacy method. The smp_conn_security() function should have the appropriate check for this situation to avoid NULL pointer dereferences. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/smp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 1ab3dc9c8f99b..7b815bcc8c9b5 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2295,6 +2295,10 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) return 1; chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; From 9cd4b78cd56489755fcba685b5e1a243cea6e6cf Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 15 Jun 2015 17:25:16 +0900 Subject: [PATCH 0553/2091] dmaengine: pl330: Fix overflow when reporting residue in memcpy commit ae128293d97404f491dc76f1843c7adacfec3441 upstream. During memcpy operations the residue was always set to an u32 overflowed value. In pl330_tx_status() function number of currently transferred bytes was subtracted from internal "bytes_requested" field. However this "bytes_requested" was not initialized at start to length of memcpy buffer so transferred bytes were subtracted from 0 causing overflow. Signed-off-by: Krzysztof Kozlowski Fixes: aee4d1fac887 ("dmaengine: pl330: improve pl330_tx_status() function") Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 340f9e607cd8b..8652388e77d69 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2621,6 +2621,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, desc->rqcfg.brst_len = 1; desc->rqcfg.brst_len = get_burst_len(desc, len); + desc->bytes_requested = len; desc->txd.flags = flags; From e925654834c583e62d08777c6993e20a362a69ac Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 15 Jun 2015 23:00:09 +0900 Subject: [PATCH 0554/2091] dmaengine: pl330: Really fix choppy sound because of wrong residue calculation commit 5dd90e5b91e0f5c925b12b132c7cd27538870256 upstream. When pl330 driver was used during sound playback, after some time or after a number of plays the sound became choppy or totally noisy. For example on Odroid XU3 board the first four executions of aplay with small WAVE worked fine, but fifth was unrecognizable with errors: $ aplay /usr/share/sounds/alsa/Front_Right.wava underrun!!! (at least 0.095 ms long) Issue was caused by wrong residue reported by pl330 driver to pcm_dmaengine for its cyclic dma transfers. The pl330_tx_status(), residue reporting function, used a "last" flag in a descriptor to indicate that there is no more data to send. The pl330_tx_submit() iterated over descriptors trying to remove this flag from them and then mark last descriptor as "last". However when iterating it actually removed the flag not from descriptors but always from last of it (and then reset it). Thus effectively once some descriptor was marked as last, then it stayed like this forever causing residue to be reported too low. Signed-off-by: Krzysztof Kozlowski Fixes: aee4d1fac887 ("dmaengine: pl330: improve pl330_tx_status() function") Reported-by: gabriel@unseen.is Suggested-by: Marek Szyprowski Tested-by: Lars-Peter Clausen Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 8652388e77d69..3dabc52b96154 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2328,7 +2328,7 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) desc->txd.callback = last->txd.callback; desc->txd.callback_param = last->txd.callback_param; } - last->last = false; + desc->last = false; dma_cookie_assign(&desc->txd); From 5c09786f13e2d0db8d0e8d0175a87c5a19bb3349 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 3 Aug 2015 16:07:48 +0300 Subject: [PATCH 0555/2091] xhci: fix off by one error in TRB DMA address boundary check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7895086afde2a05fa24a0e410d8e6b75ca7c8fdd upstream. We need to check that a TRB is part of the current segment before calculating its DMA address. Previously a ring segment didn't use a full memory page, and every new ring segment got a new memory page, so the off by one error in checking the upper bound was never seen. Now that we use a full memory page, 256 TRBs (4096 bytes), the off by one didn't catch the case when a TRB was the first element of the next segment. This is triggered if the virtual memory pages for a ring segment are next to each in increasing order where the ring buffer wraps around and causes errors like: [ 106.398223] xhci_hcd 0000:00:14.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 0 comp_code 1 [ 106.398230] xhci_hcd 0000:00:14.0: Looking for event-dma fffd3000 trb-start fffd4fd0 trb-end fffd5000 seg-start fffd4000 seg-end fffd4ff0 The trb-end address is one outside the end-seg address. Tested-by: Arkadiusz Miśkiewicz Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d095677a07021..b3a0a2275f5a4 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -82,7 +82,7 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, return 0; /* offset in TRBs */ segment_offset = trb - seg->trbs; - if (segment_offset > TRBS_PER_SEGMENT) + if (segment_offset >= TRBS_PER_SEGMENT) return 0; return seg->dma + (segment_offset * sizeof(*trb)); } From 57e5880a312a6985e76e32d959de76deeb770d76 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 3 Aug 2015 16:07:49 +0300 Subject: [PATCH 0556/2091] drivers/usb: Delete XHCI command timer if necessary commit ffe5adcb7661d94e952d6b5ed7f493cb4ef0c7bc upstream. When xhci_mem_cleanup() is called, it's possible that the command timer isn't initialized and scheduled. For those cases, to delete the command timer causes soft-lockup as below stack dump shows. The patch avoids deleting the command timer if it's not scheduled with the help of timer_pending(). NMI watchdog: BUG: soft lockup - CPU#40 stuck for 23s! [kworker/40:1:8140] : NIP [c000000000150b30] lock_timer_base.isra.34+0x90/0xa0 LR [c000000000150c24] try_to_del_timer_sync+0x34/0xa0 Call Trace: [c000000f67c975e0] [c0000000015b84f8] mon_ops+0x0/0x8 (unreliable) [c000000f67c97620] [c000000000150c24] try_to_del_timer_sync+0x34/0xa0 [c000000f67c97660] [c000000000150cf0] del_timer_sync+0x60/0x80 [c000000f67c97690] [c00000000070ac0c] xhci_mem_cleanup+0x5c/0x5e0 [c000000f67c97740] [c00000000070c2e8] xhci_mem_init+0x1158/0x13b0 [c000000f67c97860] [c000000000700978] xhci_init+0x88/0x110 [c000000f67c978e0] [c000000000701644] xhci_gen_setup+0x2b4/0x590 [c000000f67c97970] [c0000000006d4410] xhci_pci_setup+0x40/0x190 [c000000f67c979f0] [c0000000006b1af8] usb_add_hcd+0x418/0xba0 [c000000f67c97ab0] [c0000000006cb15c] usb_hcd_pci_probe+0x1dc/0x5c0 [c000000f67c97b50] [c0000000006d3ba4] xhci_pci_probe+0x64/0x1f0 [c000000f67c97ba0] [c0000000004fe9ac] local_pci_probe+0x6c/0x130 [c000000f67c97c30] [c0000000000e5ce8] work_for_cpu_fn+0x38/0x60 [c000000f67c97c60] [c0000000000eacb8] process_one_work+0x198/0x470 [c000000f67c97cf0] [c0000000000eb6ac] worker_thread+0x37c/0x5a0 [c000000f67c97d80] [c0000000000f2730] kthread+0x110/0x130 [c000000f67c97e30] [c000000000009660] ret_from_kernel_thread+0x5c/0x7c Reported-by: Priya M. A Signed-off-by: Gavin Shan Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 3e442f77a2b93..9a8c936cd42c1 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1792,7 +1792,8 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int size; int i, j, num_ports; - del_timer_sync(&xhci->cmd_timer); + if (timer_pending(&xhci->cmd_timer)) + del_timer_sync(&xhci->cmd_timer); /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); From b5c270edd80f00434b8b15ae12e9ecbd5c038b2a Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Mon, 27 Jul 2015 08:56:05 +0200 Subject: [PATCH 0557/2091] USB: sierra: add 1199:68AB device ID commit 74472233233f577eaa0ca6d6e17d9017b6e53150 upstream. Add support for the Sierra Wireless AR8550 device with USB descriptor 0x1199, 0x68AB. It is common with MC879x modules 1199:683c/683d which also are composite devices with 7 interfaces (0..6) and also MDM62xx based as the AR8550. The major difference are only the interface attributes 02/02/01 on interfaces 3 and 4 on the AR8550. They are vendor specific ff/ff/ff on MC879x modules. lsusb reports: Bus 001 Device 004: ID 1199:68ab Sierra Wireless, Inc. Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x1199 Sierra Wireless, Inc. idProduct 0x68ab bcdDevice 0.06 iManufacturer 3 Sierra Wireless, Incorporated iProduct 2 AR8550 iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 198 bNumInterfaces 7 bConfigurationValue 1 iConfiguration 1 Sierra Configuration bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 0mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 4 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x05 EP 5 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 5 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x88 EP 8 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x89 EP 9 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x06 EP 6 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 6 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x8a EP 10 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x8b EP 11 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x07 EP 7 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 32 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 bNumConfigurations 1 Device Status: 0x0001 Self Powered Signed-off-by: Dirk Behme Cc: Lars Melin Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/sierra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 46179a0828ebc..07d1ecd564f79 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -289,6 +289,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, + { USB_DEVICE(0x1199, 0x68AB) }, /* Sierra Wireless AR8550 */ /* AT&T Direct IP LTE modems */ { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist From 89dd0d8ca5069df6d18493a852285cc2dfb3bdfe Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 16 Jan 2015 11:32:51 -0500 Subject: [PATCH 0558/2091] usb: udc: core: add device_del() call to error pathway commit c93e64e91248becd0edb8f01723dff9da890e2ab upstream. This patch fixes a bug in the error pathway of usb_add_gadget_udc_release() in udc-core.c. If the udc registration fails, the gadget registration is not fully undone; there's a put_device(&gadget->dev) call but no device_del(). Acked-by: Peter Chen Signed-off-by: Alan Stern Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/udc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index d69c35558f685..7d69931cf45d0 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -321,6 +321,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, err3: put_device(&udc->dev); + device_del(&gadget->dev); err2: put_device(&gadget->dev); From 87a73d80571ac563db67c72b0827c22f979c5936 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 21 Jul 2015 09:51:29 +0800 Subject: [PATCH 0559/2091] usb: chipidea: ehci_init_driver is intended to call one time commit 2f01a33bd26545c16fea7592697f7f15c416402b upstream. The ehci_init_driver is used to initialize hcd APIs for each ehci controller driver, it is designed to be called only one time and before driver register is called. The current design will cause ehci_init_driver is called multiple times at probe process, it will cause hc_driver's initialization affect current running hcd. We run out NULL pointer dereference problem when one hcd is started by module_init, and the other is started by otg thread at SMP platform. The reason for this problem is ehci_init_driver will do memory copy for current uniform hc_driver, and this memory copy will do memset (as 0) first, so when the first hcd is running usb_add_hcd, and the second hcd may clear the uniform hc_driver's space (at ehci_init_driver), then the first hcd will meet NULL pointer at the same time. See below two logs: LOG_1: ci_hdrc ci_hdrc.0: EHCI Host Controller ci_hdrc ci_hdrc.0: new USB bus registered, assigned bus number 1 ci_hdrc ci_hdrc.1: doesn't support gadget Unable to handle kernel NULL pointer dereference at virtual address 00000014 pgd = 80004000 [00000014] *pgd=00000000 Internal error: Oops: 805 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 108 Comm: kworker/u8:2 Not tainted 3.14.38-222193-g24b2734-dirty #25 Workqueue: ci_otg ci_otg_work task: d839ec00 ti: d8400000 task.ti: d8400000 PC is at ehci_run+0x4c/0x284 LR is at _raw_spin_unlock_irqrestore+0x28/0x54 pc : [<8041f9a0>] lr : [<8070ea84>] psr: 60000113 sp : d8401e30 ip : 00000000 fp : d8004400 r10: 00000001 r9 : 00000001 r8 : 00000000 r7 : 00000000 r6 : d8419940 r5 : 80dd24c0 r4 : d8419800 r3 : 8001d060 r2 : 00000000 r1 : 00000001 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 10c53c7d Table: 1000404a DAC: 00000015 Process kworker/u8:2 (pid: 108, stack limit = 0xd8400238) Stack: (0xd8401e30 to 0xd8402000) 1e20: d87523c0 d8401e48 66667562 d8419800 1e40: 00000000 00000000 d8419800 00000000 00000000 00000000 d84198b0 8040fcdc 1e60: 00000000 80dd320c d8477610 d8419c00 d803d010 d8419800 00000000 00000000 1e80: d8004400 00000000 d8400008 80431494 80431374 d803d100 d803d010 d803d1ac 1ea0: 00000000 80432428 804323d4 d803d100 00000001 80435eb8 80e0d0bc d803d100 1ec0: 00000006 80436458 00000000 d803d100 80e92ec8 80436f44 d803d010 d803d100 1ee0: d83fde00 8043292c d8752710 d803d1f4 d803d010 8042ddfc 8042ddb8 d83f3b00 1f00: d803d1f4 80042b60 00000000 00000003 00000001 00000001 80054598 d83f3b00 1f20: d8004400 d83f3b18 d8004414 d8400000 80e3957b 00000089 d8004400 80043814 1f40: d839ec00 00000000 d83fcd80 d83f3b00 800436e4 00000000 00000000 00000000 1f60: 00000000 80048f34 00000000 00000000 00000000 d83f3b00 00000000 00000000 1f80: d8401f80 d8401f80 00000000 00000000 d8401f90 d8401f90 d8401fac d83fcd80 1fa0: 80048e68 00000000 00000000 8000e538 00000000 00000000 00000000 00000000 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [<8041f9a0>] (ehci_run) from [<8040fcdc>] (usb_add_hcd+0x248/0x6e8) [<8040fcdc>] (usb_add_hcd) from [<80431494>] (host_start+0x120/0x2e4) [<80431494>] (host_start) from [<80432428>] (ci_otg_start_host+0x54/0xbc) [<80432428>] (ci_otg_start_host) from [<80435eb8>] (otg_set_protocol+0xa4/0xd0) [<80435eb8>] (otg_set_protocol) from [<80436458>] (otg_set_state+0x574/0xc58) [<80436458>] (otg_set_state) from [<80436f44>] (otg_statemachine+0x408/0x46c) [<80436f44>] (otg_statemachine) from [<8043292c>] (ci_otg_fsm_work+0x3c/0x190) [<8043292c>] (ci_otg_fsm_work) from [<8042ddfc>] (ci_otg_work+0x44/0x1c4) [<8042ddfc>] (ci_otg_work) from [<80042b60>] (process_one_work+0xf4/0x35c) [<80042b60>] (process_one_work) from [<80043814>] (worker_thread+0x130/0x3bc) [<80043814>] (worker_thread) from [<80048f34>] (kthread+0xcc/0xe4) [<80048f34>] (kthread) from [<8000e538>] (ret_from_fork+0x14/0x3c) Code: e5953018 e3530000 0a000000 e12fff33 (e5878014) LOG_2: ci_hdrc ci_hdrc.0: EHCI Host Controller ci_hdrc ci_hdrc.0: new USB bus registered, assigned bus number 1 ci_hdrc ci_hdrc.1: doesn't support gadget Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = 80004000 [00000000] *pgd=00000000 In Online 00:00ternal e Offline rror: Oops: 80000005 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 108 Comm: kworker/u8:2 Not tainted 3.14.38-02007-g24b2734-dirty #127 Workque Online 00:00ue: ci_o Offline tg ci_otg_work Online 00:00task: d8 Offline 39ec00 ti: d83ea000 task.ti: d83ea000 PC is at 0x0 LR is at usb_add_hcd+0x248/0x6e8 pc : [<00000000>] lr : [<8040f644>] psr: 60000113 sp : d83ebe60 ip : 00000000 fp : d8004400 r10: 00000001 r9 : 00000001 r8 : d85fd4b0 r7 : 00000000 r6 : 00000000 r5 : 00000000 r4 : d85fd400 r3 : 00000000 r2 : d85fd4f4 r1 : 80410178 r0 : d85fd400 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 10c53c7d Table: 1000404a DAC: 00000015 Process kworker/u8:2 (pid: 108, stack limit = 0xd83ea238) Stack: (0xd83ebe60 to 0xd83ec000) be60: 00000000 80dd920c d8654e10 d85fd800 d803e010 d85fd400 00000000 00000000 be80: d8004400 00000000 d83ea008 80430e34 80430d14 d803e100 d803e010 d803e1ac bea0: 00000000 80431dc8 80431d74 d803e100 00000001 80435858 80e130bc d803e100 bec0: 00000006 80435df8 00000000 d803e100 80e98ec8 804368e4 d803e010 d803e100 bee0: d86e8100 804322cc d86cf050 d803e1f4 d803e010 8042d79c 8042d758 d83cf900 bf00: d803e1f4 80042b78 00000000 00000003 00000001 00000001 800545e8 d83cf900 bf20: d8004400 d83cf918 d8004414 d83ea000 80e3f57b 00000089 d8004400 8004382c bf40: d839ec00 00000000 d8393780 d83cf900 800436fc 00000000 00000000 00000000 bf60: 00000000 80048f50 80e019f4 00000000 0000264c d83cf900 00000000 00000000 bf80: d83ebf80 d83ebf80 00000000 00000000 d83ebf90 d83ebf90 d83ebfac d8393780 bfa0: 80048e84 00000000 00000000 8000e538 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ee66e85d 133ebd03 [<804 Online 00:000f644>] Offline (usb_add_hcd) from [<80430e34>] (host_start+0x120/0x2e4) [<80430e34>] (host_start) from [<80431dc8>] (ci_otg_start_host+0x54/0xbc) [<80431dc8>] (ci_otg_start_host) from [<80435858>] (otg_set_protocol+0xa4/0xd0) [<80435858>] (otg_set_protocol) from [<80435df8>] (otg_set_state+0x574/0xc58) [<80435df8>] (otg_set_state) from [<804368e4>] (otg_statemachine+0x408/0x46c) [<804368e4>] (otg_statemachine) from [<804322cc>] (ci_otg_fsm_work+0x3c/0x190) [<804322cc>] (ci_otg_fsm_work) from [<8042d79c>] (ci_otg_work+0x44/0x1c4) [<8042d79c>] (ci_otg_work) from [<80042b78>] (process_one_work+0xf4/0x35c) [<80042b78>] (process_one_work) from [<8004382c>] (worker_thread+0x130/0x3bc) [<8004382c>] (worker_thread) from [<80048f50>] (kthread+0xcc/0xe4) [<80048f50>] (kthread) from [<8000e538>] (ret_from_fork+0x14/0x3c) Code: bad PC value Cc: Jun Li Cc: Alan Stern Acked-by: Alan Stern Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 13 ++++++++++++- drivers/usb/chipidea/host.c | 7 +++++-- drivers/usb/chipidea/host.h | 6 ++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 74fea4fa41b15..3ad48e1c0c57e 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -1024,7 +1024,18 @@ static struct platform_driver ci_hdrc_driver = { }, }; -module_platform_driver(ci_hdrc_driver); +static int __init ci_hdrc_platform_register(void) +{ + ci_hdrc_host_driver_init(); + return platform_driver_register(&ci_hdrc_driver); +} +module_init(ci_hdrc_platform_register); + +static void __exit ci_hdrc_platform_unregister(void) +{ + platform_driver_unregister(&ci_hdrc_driver); +} +module_exit(ci_hdrc_platform_unregister); MODULE_ALIAS("platform:ci_hdrc"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 21fe1a3143135..2f8af40e87ca7 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -237,9 +237,12 @@ int ci_hdrc_host_init(struct ci_hdrc *ci) rdrv->name = "host"; ci->roles[CI_ROLE_HOST] = rdrv; + return 0; +} + +void ci_hdrc_host_driver_init(void) +{ ehci_init_driver(&ci_ehci_hc_driver, &ehci_ci_overrides); orig_bus_suspend = ci_ehci_hc_driver.bus_suspend; ci_ehci_hc_driver.bus_suspend = ci_ehci_bus_suspend; - - return 0; } diff --git a/drivers/usb/chipidea/host.h b/drivers/usb/chipidea/host.h index 5707bf379bfb4..0f12f131bdd3f 100644 --- a/drivers/usb/chipidea/host.h +++ b/drivers/usb/chipidea/host.h @@ -5,6 +5,7 @@ int ci_hdrc_host_init(struct ci_hdrc *ci); void ci_hdrc_host_destroy(struct ci_hdrc *ci); +void ci_hdrc_host_driver_init(void); #else @@ -18,6 +19,11 @@ static inline void ci_hdrc_host_destroy(struct ci_hdrc *ci) } +static void ci_hdrc_host_driver_init(void) +{ + +} + #endif #endif /* __DRIVERS_USB_CHIPIDEA_HOST_H */ From 73f7d1ff06aaee5a51ef26fbe48252878c98bd0e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 16 Apr 2015 18:03:04 +1000 Subject: [PATCH 0560/2091] phy: twl4030-usb: make runtime pm more reliable. commit 56301df6bcaaed31e77b8c500ca1b437f46a3158 upstream. A construct like: if (pm_runtime_suspended(twl->dev)) pm_runtime_get_sync(twl->dev); is against the spirit of the runtime_pm interface as it makes the internal refcounting useless. In this case it is also racy, particularly as 'put_autosuspend' is used to drop a reference. When that happens a timer is started and the device is runtime-suspended after the timeout. If the above code runs in this window, the device will not be found to be suspended so no pm_runtime reference is taken. When the timer expires the device will be suspended, which is against the intention of the code. So be more direct is taking and dropping references. If twl->linkstat is VBUS_VALID or ID_GROUND, then hold a pm_runtime reference, otherwise don't. Define "cable_present()" to test for this condition. Tested-by: Tony Lindgren Signed-off-by: NeilBrown Signed-off-by: Kishon Vijay Abraham I Cc: Alexander Holler Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-twl4030-usb.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index 8882afbef6885..6285f46f3ddb4 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -144,6 +144,16 @@ #define PMBR1 0x0D #define GPIO_USB_4PIN_ULPI_2430C (3 << 0) +/* + * If VBUS is valid or ID is ground, then we know a + * cable is present and we need to be runtime-enabled + */ +static inline bool cable_present(enum omap_musb_vbus_id_status stat) +{ + return stat == OMAP_MUSB_VBUS_VALID || + stat == OMAP_MUSB_ID_GROUND; +} + struct twl4030_usb { struct usb_phy phy; struct device *dev; @@ -536,8 +546,10 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) mutex_lock(&twl->lock); if (status >= 0 && status != twl->linkstat) { + status_changed = + cable_present(twl->linkstat) != + cable_present(status); twl->linkstat = status; - status_changed = true; } mutex_unlock(&twl->lock); @@ -553,15 +565,11 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) * USB_LINK_VBUS state. musb_hdrc won't care until it * starts to handle softconnect right. */ - if ((status == OMAP_MUSB_VBUS_VALID) || - (status == OMAP_MUSB_ID_GROUND)) { - if (pm_runtime_suspended(twl->dev)) - pm_runtime_get_sync(twl->dev); + if (cable_present(status)) { + pm_runtime_get_sync(twl->dev); } else { - if (pm_runtime_active(twl->dev)) { - pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put_autosuspend(twl->dev); - } + pm_runtime_mark_last_busy(twl->dev); + pm_runtime_put_autosuspend(twl->dev); } omap_musb_mailbox(status); } @@ -766,6 +774,9 @@ static int twl4030_usb_remove(struct platform_device *pdev) /* disable complete OTG block */ twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB); + + if (cable_present(twl->linkstat)) + pm_runtime_put_noidle(twl->dev); pm_runtime_mark_last_busy(twl->dev); pm_runtime_put(twl->dev); From 33afeac21b9cb79ad8fc5caf239af89c79e25e1e Mon Sep 17 00:00:00 2001 From: Benjamin Randazzo Date: Sat, 25 Jul 2015 16:36:50 +0200 Subject: [PATCH 0561/2091] md: use kzalloc() when bitmap is disabled commit b6878d9e03043695dbf3fa1caa6dfc09db225b16 upstream. In drivers/md/md.c get_bitmap_file() uses kmalloc() for creating a mdu_bitmap_file_t called "file". 5769 file = kmalloc(sizeof(*file), GFP_NOIO); 5770 if (!file) 5771 return -ENOMEM; This structure is copied to user space at the end of the function. 5786 if (err == 0 && 5787 copy_to_user(arg, file, sizeof(*file))) 5788 err = -EFAULT But if bitmap is disabled only the first byte of "file" is initialized with zero, so it's possible to read some bytes (up to 4095) of kernel space memory from user space. This is an information leak. 5775 /* bitmap disabled, zero the first byte and copy out */ 5776 if (!mddev->bitmap_info.file) 5777 file->pathname[0] = '\0'; Signed-off-by: Benjamin Randazzo Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index b9200282fd779..e4621511d118b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5740,7 +5740,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg) char *ptr; int err; - file = kmalloc(sizeof(*file), GFP_NOIO); + file = kzalloc(sizeof(*file), GFP_NOIO); if (!file) return -ENOMEM; From b235edce5cbe4e978d44d8c57ec15afc4ba27d50 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Mon, 20 Apr 2015 09:20:41 +0000 Subject: [PATCH 0562/2091] ath10k: fix qca61x4 hw2.1 support commit 11a002efbaa7fbd9f6e616695ab42aa9f1caf060 upstream. During initialization firmware does some sort of memory switch between DRAM and IRAM. If configuration value for bank switching isn't correct device crashes during init. The new value prevents firmware 11.0.0.302 (and possibly others) for qca61x4 hw2.1 from crashing during init. Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 7681237fe298a..ead5432821287 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1524,12 +1524,11 @@ static int ath10k_pci_get_num_banks(struct ath10k *ar) switch (MS(ar->chip_id, SOC_CHIP_ID_REV)) { case QCA6174_HW_1_0_CHIP_ID_REV: case QCA6174_HW_1_1_CHIP_ID_REV: + case QCA6174_HW_2_1_CHIP_ID_REV: + case QCA6174_HW_2_2_CHIP_ID_REV: return 3; case QCA6174_HW_1_3_CHIP_ID_REV: return 2; - case QCA6174_HW_2_1_CHIP_ID_REV: - case QCA6174_HW_2_2_CHIP_ID_REV: - return 6; case QCA6174_HW_3_0_CHIP_ID_REV: case QCA6174_HW_3_1_CHIP_ID_REV: case QCA6174_HW_3_2_CHIP_ID_REV: From 4e1fb62bc119a4a130b01ff538c2b9a4c0853acc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Jun 2015 13:24:29 -0700 Subject: [PATCH 0563/2091] x86/asm/entry/64: Remove pointless jump to irq_return commit 5ca6f70f387b4f82903037cc3c5488e2c97dcdbc upstream. INTERRUPT_RETURN turns into a jmp instruction. There's no need for extra indirection. Signed-off-by: Andy Lutomirski Cc: Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2f2318653dbad284a59311f13f08cea71298fd7c.1433449436.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 02c2eff7478da..b32f3465f105a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -793,8 +793,6 @@ retint_kernel: restore_c_regs_and_iret: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 - -irq_return: INTERRUPT_RETURN ENTRY(native_iret) @@ -1640,7 +1638,7 @@ nmi_restore: /* Clear the NMI executing stack variable */ movq $0, 5*8(%rsp) - jmp irq_return + INTERRUPT_RETURN CFI_ENDPROC END(nmi) From e0146756cb2b7122adbe9b5bb97da4a99c5437b3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Jul 2015 10:29:33 -0700 Subject: [PATCH 0564/2091] x86/nmi: Enable nested do_nmi() handling for 64-bit kernels commit 9d05041679904b12c12421cbcf9cb5f4860a8d7b upstream. 32-bit kernels handle nested NMIs in C. Enable the exact same handling on 64-bit kernels as well. This isn't currently necessary, but it will become necessary once the asm code starts allowing limited nesting. Signed-off-by: Andy Lutomirski Reviewed-by: Steven Rostedt Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/nmi.c | 123 ++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 71 deletions(-) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751ce..d8766b1c99745 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C (preventing nested - * NMIs if an NMI takes a trap). Simply have 3 states the NMI - * can be in: + * NMIs can hit breakpoints which will cause it to lose its NMI context + * with the CPU when the breakpoint or page fault does an IRET. + * + * As a result, NMIs can nest if NMIs get unmasked due an IRET during + * NMI processing. On x86_64, the asm glue protects us from nested NMIs + * if the outer NMI came from kernel mode, but we can still nest if the + * outer NMI came from user mode. + * + * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing @@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the exit of the first NMI will - * perform a dec_return, if the result is zero (NOT_RUNNING), then - * it will simply exit the NMI handler. If not, the dec_return - * would have set the state to NMI_EXECUTING (what we want it to - * be when we are running). In this case, we simply jump back - * to rerun the NMI handler again, and restart the 'latched' NMI. + * If an NMI executes an iret, another NMI can preempt it. We do not + * want to allow this new NMI to run, but we want to execute it when the + * first one finishes. We set the state to "latched", and the exit of + * the first NMI will perform a dec_return, if the result is zero + * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the + * dec_return would have set the state to NMI_EXECUTING (what we want it + * to be when we are running). In this case, we simply jump back to + * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING @@ -461,49 +460,36 @@ enum nmi_states { static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); -#define nmi_nesting_preprocess(regs) \ - do { \ - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ - this_cpu_write(nmi_state, NMI_LATCHED); \ - return; \ - } \ - this_cpu_write(nmi_state, NMI_EXECUTING); \ - this_cpu_write(nmi_cr2, read_cr2()); \ - } while (0); \ - nmi_restart: - -#define nmi_nesting_postprocess() \ - do { \ - if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ - write_cr2(this_cpu_read(nmi_cr2)); \ - if (this_cpu_dec_return(nmi_state)) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ +#ifdef CONFIG_X86_64 /* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. + * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without + * some care, the inner breakpoint will clobber the outer breakpoint's + * stack. * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. + * If a breakpoint is being processed, and the debug stack is being + * used, if an NMI comes in and also hits a breakpoint, the stack + * pointer will be set to the same fixed address as the breakpoint that + * was interrupted, causing that stack to be corrupted. To handle this + * case, check if the stack that was interrupted is the debug stack, and + * if so, change the IDT so that new breakpoints will use the current + * stack and not switch to the fixed address. On return of the NMI, + * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); +#endif -static inline void nmi_nesting_preprocess(struct pt_regs *regs) +dotraplinkage notrace void +do_nmi(struct pt_regs *regs, long error_code) { + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { + this_cpu_write(nmi_state, NMI_LATCHED); + return; + } + this_cpu_write(nmi_state, NMI_EXECUTING); + this_cpu_write(nmi_cr2, read_cr2()); +nmi_restart: + +#ifdef CONFIG_X86_64 /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) debug_stack_set_zero(); this_cpu_write(update_debug_stack, 1); } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} #endif -dotraplinkage notrace void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - nmi_enter(); inc_irq_stat(__nmi_count); @@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); +#ifdef CONFIG_X86_64 + if (unlikely(this_cpu_read(update_debug_stack))) { + debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } +#endif + + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; } NOKPROBE_SYMBOL(do_nmi); From f163d838c24833d4493e693a612be275b723d5e8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Jul 2015 10:29:34 -0700 Subject: [PATCH 0565/2091] x86/nmi/64: Remove asm code that saves CR2 commit 0e181bb58143cb4a2e8f01c281b0816cd0e4798e upstream. Now that do_nmi saves CR2, we don't need to save it in asm. Signed-off-by: Andy Lutomirski Reviewed-by: Steven Rostedt Acked-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b32f3465f105a..34f07d7807d8f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1603,29 +1603,11 @@ end_repeat_nmi: call paranoid_entry DEFAULT_FRAME 0 - /* - * Save off the CR2 register. If we take a page fault in the NMI then - * it could corrupt the CR2 value. If the NMI preempts a page fault - * handler before it was able to read the CR2 register, and then the - * NMI itself takes a page fault, the page fault that was preempted - * will read the information from the NMI page fault and not the - * origin fault. Save it off and restore it if it changes. - * Use the r12 callee-saved register. - */ - movq %cr2, %r12 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi - /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 -1: - testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: From 60e6cbaf875edd9aef40948d0790decb8e1a77cc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Jul 2015 10:29:35 -0700 Subject: [PATCH 0566/2091] x86/nmi/64: Switch stacks on userspace NMI entry commit 9b6e6a8334d56354853f9c255d1395c2ba570e0a upstream. Returning to userspace is tricky: IRET can fail, and ESPFIX can rearrange the stack prior to IRET. The NMI nesting fixup relies on a precise stack layout and atomic IRET. Rather than trying to teach the NMI nesting fixup to handle ESPFIX and failed IRET, punt: run NMIs that came from user mode on the normal kernel stack. This will make some nested NMIs visible to C code, but the C code is okay with that. As a side effect, this should speed up perf: it eliminates an RDMSR when NMIs come from user mode. Signed-off-by: Andy Lutomirski Reviewed-by: Steven Rostedt Reviewed-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 61 +++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 34f07d7807d8f..7a0b8008727b9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1424,19 +1424,72 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as our temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + SWAPGS + cld + movq %rsp, %rdx + movq PER_CPU_VAR(kernel_stack), %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + jmp restore_c_regs_and_iret + +.Lnmi_from_kernel: /* * Check the special variable on the stack to see if NMIs are * executing. From 1dd191d72fdfb46988d8c97c8b19fc6d83d44d3e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Jul 2015 10:29:36 -0700 Subject: [PATCH 0567/2091] x86/nmi/64: Improve nested NMI comments commit 0b22930ebad563ae97ff3f8d7b9f12060b4c6e6b upstream. I found the nested NMI documentation to be difficult to follow. Improve the comments. Signed-off-by: Andy Lutomirski Reviewed-by: Steven Rostedt Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 160 +++++++++++++++++++++---------------- arch/x86/kernel/nmi.c | 4 +- 2 files changed, 94 insertions(+), 70 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7a0b8008727b9..0cc6af62f34dc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1411,11 +1411,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1491,18 +1492,60 @@ ENTRY(nmi) .Lnmi_from_kernel: /* - * Check the special variable on the stack to see if NMIs are - * executing. + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + + /* + * Determine whether we're a nested NMI. + * + * First check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. */ lea 6*8(%rsp), %rdx /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ @@ -1519,9 +1562,11 @@ ENTRY(nmi) nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * If we interrupted an NMI that is between repeat_nmi and + * end_repeat_nmi, then we must not modify the "iret" frame + * because it's being written by the outer NMI. That's okay; + * the outer NMI handler is about to call do_nmi anyway, + * so we can just resume the outer NMI. */ movq $repeat_nmi, %rdx cmpq 8(%rsp), %rdx @@ -1531,7 +1576,10 @@ nested_nmi: ja nested_nmi_out 1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ + /* + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. + */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 1*8 @@ -1550,60 +1598,23 @@ nested_nmi_out: popq_cfi %rdx CFI_RESTORE rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN CFI_RESTORE_STATE first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx CFI_RESTORE rdx - /* Set the NMI executing variable on the stack. */ + /* Set "NMI executing" on the stack. */ pushq_cfi $1 - /* - * Leave room for the "copied" frame - */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp CFI_ADJUST_CFA_OFFSET 5*8 - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq_cfi 11*8(%rsp) .endr @@ -1611,6 +1622,7 @@ first_nmi: /* Everything up to here is safe from nested NMIs */ +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1619,16 +1631,21 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. + * + * Set "NMI executing" in case we came back here via IRET. */ movq $1, 10*8(%rsp) - /* Make another copy, this one may be modified by nested NMIs */ + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ addq $(10*8), %rsp CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 @@ -1639,9 +1656,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK @@ -1668,11 +1685,18 @@ nmi_swapgs: nmi_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS - /* Pop the extra iret frame at once */ + + /* Point RSP at the "iret" frame. */ REMOVE_PT_GPREGS_FROM_STACK 6*8 - /* Clear the NMI executing stack variable */ + /* Clear "NMI executing". */ movq $0, 5*8(%rsp) + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ INTERRUPT_RETURN CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d8766b1c99745..d05bd2e2ee91e 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,8 +408,8 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its NMI context - * with the CPU when the breakpoint or page fault does an IRET. + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. * * As a result, NMIs can nest if NMIs get unmasked due an IRET during * NMI processing. On x86_64, the asm glue protects us from nested NMIs From d8246ca4e3ce08c9ed98ebe292f36ee2bc5f54ab Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Jul 2015 10:29:37 -0700 Subject: [PATCH 0568/2091] x86/nmi/64: Reorder nested NMI checks commit a27507ca2d796cfa8d907de31ad730359c8a6d06 upstream. Check the repeat_nmi .. end_repeat_nmi special case first. The next patch will rework the RSP check and, as a side effect, the RSP check will no longer detect repeat_nmi .. end_repeat_nmi, so we'll need this ordering of the checks. Note: this is more subtle than it appears. The check for repeat_nmi .. end_repeat_nmi jumps straight out of the NMI code instead of adjusting the "iret" frame to force a repeat. This is necessary, because the code between repeat_nmi and end_repeat_nmi sets "NMI executing" and then writes to the "iret" frame itself. If a nested NMI comes in and modifies the "iret" frame while repeat_nmi is also modifying it, we'll end up with garbage. The old code got this right, as does the new code, but the new code is a bit more explicit. If we were to move the check right after the "NMI executing" check, then we'd get it wrong and have random crashes. ( Because the "NMI executing" check would jump to the code that would modify the "iret" frame without checking if the interrupted NMI was currently modifying it. ) Signed-off-by: Andy Lutomirski Reviewed-by: Steven Rostedt Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0cc6af62f34dc..3824867c89933 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1535,7 +1535,24 @@ ENTRY(nmi) /* * Determine whether we're a nested NMI. * - * First check "NMI executing". If it's set, then we're nested. + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. + */ + + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: + + /* + * Now check "NMI executing". If it's set, then we're nested. * This will not detect if we interrupted an outer NMI just * before IRET. */ @@ -1561,21 +1578,6 @@ ENTRY(nmi) CFI_REMEMBER_STATE nested_nmi: - /* - * If we interrupted an NMI that is between repeat_nmi and - * end_repeat_nmi, then we must not modify the "iret" frame - * because it's being written by the outer NMI. That's okay; - * the outer NMI handler is about to call do_nmi anyway, - * so we can just resume the outer NMI. - */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: /* * Modify the "iret" frame to point to repeat_nmi, forcing another * iteration of NMI handling. From 37df1cab0c4d4ec0f4bec868b2e26b84e725c478 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Jul 2015 10:29:38 -0700 Subject: [PATCH 0569/2091] x86/nmi/64: Use DF to avoid userspace RSP confusing nested NMI detection commit 810bc075f78ff2c221536eb3008eac6a492dba2d upstream. We have a tricky bug in the nested NMI code: if we see RSP pointing to the NMI stack on NMI entry from kernel mode, we assume that we are executing a nested NMI. This isn't quite true. A malicious userspace program can point RSP at the NMI stack, issue SYSCALL, and arrange for an NMI to happen while RSP is still pointing at the NMI stack. Fix it with a sneaky trick. Set DF in the region of code that the RSP check is intended to detect. IRET will clear DF atomically. ( Note: other than paravirt, there's little need for all this complexity. We could check RIP instead of RSP. ) Signed-off-by: Andy Lutomirski Reviewed-by: Steven Rostedt Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3824867c89933..4bd6c197563dd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1562,7 +1562,14 @@ ENTRY(nmi) /* * Now test if the previous stack was an NMI stack. This covers * the case where we interrupt an outer NMI after it clears - * "NMI executing" but before IRET. + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ @@ -1573,7 +1580,13 @@ ENTRY(nmi) cmpq %rdx, 4*8(%rsp) /* If it is below the NMI stack, it is a normal NMI */ jb first_nmi - /* Ah, it is within the NMI stack, treat it as nested */ + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ CFI_REMEMBER_STATE @@ -1691,8 +1704,16 @@ nmi_restore: /* Point RSP at the "iret" frame. */ REMOVE_PT_GPREGS_FROM_STACK 6*8 - /* Clear "NMI executing". */ - movq $0, 5*8(%rsp) + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ /* * INTERRUPT_RETURN reads the "iret" frame and exits the NMI From c0e0ec977861b29059691d51a68bf563d3bc218a Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 30 Jun 2015 14:36:57 +0200 Subject: [PATCH 0570/2091] dmaengine: at_xdmac: fix transfer data width in at_xdmac_prep_slave_sg() commit 1c8a38b1268aebc1a903b21b11575077e02d2cf7 upstream. This patch adds the missing update of the transfer data width in at_xdmac_prep_slave_sg(). Indeed, for each item in the scatter-gather list, we check whether the transfer length is aligned with the data width provided by dmaengine_slave_config(). If so, we directly use this data width for the current part of the transfer we are preparing. Otherwise, the data width is reduced to 8 bits (1 byte). Of course, the actual number of register accesses must also be updated to match the new data width. So one chunk was missing in the original patch (see Fixes tag below): the number of register accesses was correctly set to (len >> fixed_dwidth) in mbr_ubc but the real data width was not updated in mbr_cfg. Since mbr_cfg may change for each part of the scatter-gather transfer this also explains why the original patch used the Descriptor View 2 instead of the Descriptor View 1. Let's take the example of a DMA transfer to write 8bit data into an Atmel USART with FIFOs. When FIFOs are enabled in the USART, its Transmit Holding Register (THR) works in multidata mode, that is to say that up to 4 8bit data can be written into the THR in a single 32bit access and it is still possible to write only one data with a 8bit access. To take advantage of this new feature, the DMA driver was modified to allow multiple dwidths when doing slave transfers. For instance, when the total length is 22 bytes, the USART driver splits the transfer into 2 parts: First part: 20 bytes transferred through 5 32bit writes into THR Second part: 2 bytes transferred though 2 8bit writes into THR For the second part, the data width was first set to 4_BYTES by the USART driver thanks to dmaengine_slave_config() then at_xdmac_prep_slave_sg() reduces this data width to 1_BYTE because the 2 byte length is not aligned with the original 4_BYTES data width. Since the data width is modified, the actual number of writes into THR must be set accordingly. Signed-off-by: Cyrille Pitchen Fixes: 6d3a7d9e3ada ("dmaengine: at_xdmac: allow muliple dwidths when doing slave transfers") Cc: stable@vger.kernel.org #4.0 and later Acked-by: Nicolas Ferre Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_xdmac.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7992164ea9ec2..c89a7abb523fa 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -648,16 +648,17 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_sa = mem; desc->lld.mbr_da = atchan->sconfig.dst_addr; } - desc->lld.mbr_cfg = atchan->cfg; - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); + dwidth = at_xdmac_get_dwidth(atchan->cfg); fixed_dwidth = IS_ALIGNED(len, 1 << dwidth) - ? at_xdmac_get_dwidth(desc->lld.mbr_cfg) + ? dwidth : AT_XDMAC_CC_DWIDTH_BYTE; desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ | (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE) /* descriptor fetch */ | (len >> fixed_dwidth); /* microblock length */ + desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) | + AT_XDMAC_CC_DWIDTH(fixed_dwidth); dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); From b2b6e1667f52af6477f4a360cefe8c463dfb60fa Mon Sep 17 00:00:00 2001 From: Leonidas Da Silva Barbosa Date: Thu, 23 Apr 2015 17:40:30 -0300 Subject: [PATCH 0571/2091] crypto: nx - Fixing NX data alignment with nx_sg list commit c3365ce130e50176533debe1cabebcdb8e61156c upstream. In NX we need to pass always a 16 multiple size nx_sg_list to co processor. Trim function handle with this assuring all nx_sg_lists are 16 multiple size, although data was not being considerated when crop was done. It was causing an unalignment between size of the list and data, corrupting csbcpb fields returning a -23 H_ST_PARM error, or invalid operation. This patch fix this recalculating how much data should be put back in to_process variable what assures the size of sg_list will be correct with size of the data. Signed-off-by: Leonidas S. Barbosa Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 1da6dc59d0dd1..4856f7287eae8 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -215,8 +215,15 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *nx_dst, * @delta: is the amount we need to crop in order to bound the list. * */ -static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int delta) +static long int trim_sg_list(struct nx_sg *sg, + struct nx_sg *end, + unsigned int delta, + unsigned int *nbytes) { + long int oplen; + long int data_back; + unsigned int is_delta = delta; + while (delta && end > sg) { struct nx_sg *last = end - 1; @@ -228,7 +235,20 @@ static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int d delta -= last->len; } } - return (sg - end) * sizeof(struct nx_sg); + + /* There are cases where we need to crop list in order to make it + * a block size multiple, but we also need to align data. In order to + * that we need to calculate how much we need to put back to be + * processed + */ + oplen = (sg - end) * sizeof(struct nx_sg); + if (is_delta) { + data_back = (abs(oplen) / AES_BLOCK_SIZE) * sg->len; + data_back = *nbytes - (data_back & ~(AES_BLOCK_SIZE - 1)); + *nbytes -= data_back; + } + + return oplen; } /** @@ -330,8 +350,8 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, /* these lengths should be negative, which will indicate to phyp that * the input and output parameters are scatterlists, not linear * buffers */ - nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta); - nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta); + nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta, nbytes); + nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta, nbytes); return 0; } From 71a0c1ecf7004a5949cceaa4bb1240923e9d2000 Mon Sep 17 00:00:00 2001 From: Leonidas Da Silva Barbosa Date: Thu, 23 Apr 2015 17:41:43 -0300 Subject: [PATCH 0572/2091] crypto: nx - Fixing SHA update bug commit 10d87b730e1d9f1442cae6487bb3aef8632bed23 upstream. Bug happens when a data size less than SHA block size is passed. Since first attempt will be saved in buffer, second round attempt get into two step to calculate op.inlen and op.outlen. The issue resides in this step. A wrong value of op.inlen and outlen was being calculated. This patch fix this eliminate the nx_sha_build_sg_list, that is useless in SHA's algorithm context. Instead we call nx_build_sg_list directly and pass a previous calculated max_sg_len to it. Signed-off-by: Leonidas S. Barbosa Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx-sha256.c | 84 +++++++++++++++++++--------------- drivers/crypto/nx/nx-sha512.c | 85 ++++++++++++++++++++--------------- drivers/crypto/nx/nx.c | 47 ------------------- drivers/crypto/nx/nx.h | 2 - 4 files changed, 99 insertions(+), 119 deletions(-) diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 23621da624c35..4e91bdb83c594 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -33,8 +33,9 @@ static int nx_sha256_init(struct shash_desc *desc) { struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_sg *out_sg; int len; - int rc; + u32 max_sg_len; nx_ctx_init(nx_ctx, HCOP_FC_SHA); @@ -44,15 +45,18 @@ static int nx_sha256_init(struct shash_desc *desc) NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *) sctx->state, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - if (rc) - goto out; + if (len != SHA256_DIGEST_SIZE) + return -EINVAL; sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -64,7 +68,6 @@ static int nx_sha256_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be32(SHA256_H7); sctx->count = 0; -out: return 0; } @@ -74,10 +77,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count % SHA256_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -97,6 +102,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -108,25 +119,22 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + &data_len, + max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } } data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA256); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); to_process = (data_len + buf_len); leftover = total - to_process; @@ -173,12 +181,19 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; unsigned long irq_flags; - int rc; + u32 max_sg_len; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count >= SHA256_BLOCK_SIZE) { @@ -195,25 +210,24 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha256.message_bit_length = (u64) (sctx->count * 8); len = sctx->count & (SHA256_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) sctx->buf, + &len, max_sg_len); - if (rc || len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) + if (len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, max_sg_len); - if (rc || len != SHA256_DIGEST_SIZE) + if (len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; goto out; + } + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; goto out; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index b3adf10226733..e6a58d2ee6289 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -32,8 +32,9 @@ static int nx_sha512_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_sg *out_sg; int len; - int rc; + u32 max_sg_len; nx_ctx_init(nx_ctx, HCOP_FC_SHA); @@ -43,15 +44,18 @@ static int nx_sha512_init(struct shash_desc *desc) NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *)sctx->state, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - if (rc || len != SHA512_DIGEST_SIZE) - goto out; + if (len != SHA512_DIGEST_SIZE) + return -EINVAL; sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -63,7 +67,6 @@ static int nx_sha512_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be64(SHA512_H7); sctx->count[0] = 0; -out: return 0; } @@ -73,10 +76,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count[0] % SHA512_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -96,6 +101,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -108,25 +119,26 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + &data_len, max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } } data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA512); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc || data_len != (to_process - buf_len)) + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + + if (data_len != (to_process - buf_len)) { + rc = -EINVAL; goto out; + } to_process = (data_len + buf_len); leftover = total - to_process; @@ -172,13 +184,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; + u32 max_sg_len; u64 count0; unsigned long irq_flags; - int rc; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count[0] >= SHA512_BLOCK_SIZE) { @@ -200,24 +219,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha512.message_bit_length_lo = count0; len = sctx->count[0] & (SHA512_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *)sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, &len, + max_sg_len); - if (rc || len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) + if (len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, + max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 4856f7287eae8..2e2529ce8d313 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -251,53 +251,6 @@ static long int trim_sg_list(struct nx_sg *sg, return oplen; } -/** - * nx_sha_build_sg_list - walk and build sg list to sha modes - * using right bounds and limits. - * @nx_ctx: NX crypto context for the lists we're building - * @nx_sg: current sg list in or out list - * @op_len: current op_len to be used in order to build a sg list - * @nbytes: number or bytes to be processed - * @offset: buf offset - * @mode: SHA256 or SHA512 - */ -int nx_sha_build_sg_list(struct nx_crypto_ctx *nx_ctx, - struct nx_sg *nx_in_outsg, - s64 *op_len, - unsigned int *nbytes, - u8 *offset, - u32 mode) -{ - unsigned int delta = 0; - unsigned int total = *nbytes; - struct nx_sg *nx_insg = nx_in_outsg; - unsigned int max_sg_len; - - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); - - *nbytes = min_t(u64, *nbytes, nx_ctx->ap->databytelen); - nx_insg = nx_build_sg_list(nx_insg, offset, nbytes, max_sg_len); - - switch (mode) { - case NX_DS_SHA256: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA256_BLOCK_SIZE - 1)); - break; - case NX_DS_SHA512: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA512_BLOCK_SIZE - 1)); - break; - default: - return -EINVAL; - } - *op_len = trim_sg_list(nx_in_outsg, nx_insg, delta); - - return 0; -} - /** * nx_build_sg_lists - walk the input scatterlists and build arrays of NX * scatterlists based on them. diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 6c9ecaaead52f..41b87ee03fe26 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -153,8 +153,6 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm); void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function); int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op, u32 may_sleep); -int nx_sha_build_sg_list(struct nx_crypto_ctx *, struct nx_sg *, - s64 *, unsigned int *, u8 *, u32); struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int *, u32); int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *, struct scatterlist *, struct scatterlist *, unsigned int *, From 2d089963836a8e884df3788fad73d5048d0d88b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 7 Jul 2015 17:30:25 +0800 Subject: [PATCH 0573/2091] crypto: nx - Fix reentrancy bugs commit 030f4e968741d65aea9cd5f7814d1164967801ef upstream. This patch fixes a host of reentrancy bugs in the nx driver. The following algorithms are affected: * CCM * GCM * CTR * XCBC * SHA256 * SHA512 The crypto API allows a single transform to be used by multiple threads simultaneously. For example, IPsec will use a single tfm to process packets for a given SA. As packets may arrive on multiple CPUs that tfm must be reentrant. The nx driver does try to deal with this by using a spin lock. Unfortunately only the basic AES/CBC/ECB algorithms do this in the correct way. The symptom of these bugs may range from the generation of incorrect output to memory corruption. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx-aes-ccm.c | 6 ++- drivers/crypto/nx/nx-aes-ctr.c | 7 ++-- drivers/crypto/nx/nx-aes-gcm.c | 17 ++++---- drivers/crypto/nx/nx-aes-xcbc.c | 70 +++++++++++++++++++++------------ drivers/crypto/nx/nx-sha256.c | 43 +++++++++++--------- drivers/crypto/nx/nx-sha512.c | 44 ++++++++++++--------- drivers/crypto/nx/nx.c | 2 + drivers/crypto/nx/nx.h | 14 +++++-- 8 files changed, 124 insertions(+), 79 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 67f80813a06f9..e4311ce0cd78c 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -494,8 +494,9 @@ static int ccm_nx_encrypt(struct aead_request *req, static int ccm4309_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); @@ -525,8 +526,9 @@ static int ccm_aes_nx_encrypt(struct aead_request *req) static int ccm4309_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 2617cd4d54dd2..dd7e9f3f5b6b2 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -72,7 +72,7 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm, if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; - memcpy(nx_ctx->priv.ctr.iv, + memcpy(nx_ctx->priv.ctr.nonce, in_key + key_len - CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); @@ -131,14 +131,15 @@ static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc, unsigned int nbytes) { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *iv = nx_ctx->priv.ctr.iv; + u8 iv[16]; + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, desc->info, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; - desc->info = nx_ctx->priv.ctr.iv; + desc->info = iv; return ctr_aes_nx_crypt(desc, dst, src, nbytes); } diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 88c562434bc0b..c6ebeb644db4c 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -330,6 +330,7 @@ static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc, static int gcm_aes_nx_crypt(struct aead_request *req, int enc) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; @@ -339,7 +340,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) spin_lock_irqsave(&nx_ctx->lock, irq_flags); - desc.info = nx_ctx->priv.gcm.iv; + desc.info = rctx->iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; @@ -434,8 +435,8 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) static int gcm_aes_nx_encrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -444,8 +445,8 @@ static int gcm_aes_nx_encrypt(struct aead_request *req) static int gcm_aes_nx_decrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -455,7 +456,8 @@ static int gcm_aes_nx_decrypt(struct aead_request *req) static int gcm4106_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); @@ -467,7 +469,8 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req) static int gcm4106_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 8c2faffab4a35..c2f7d4befb559 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -42,6 +42,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, unsigned int key_len) { struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; switch (key_len) { case AES_KEYSIZE_128: @@ -51,7 +52,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return -EINVAL; } - memcpy(nx_ctx->priv.xcbc.key, in_key, key_len); + memcpy(csbcpb->cpb.aes_xcbc.key, in_key, key_len); return 0; } @@ -148,32 +149,29 @@ static int nx_xcbc_empty(struct shash_desc *desc, u8 *out) return rc; } -static int nx_xcbc_init(struct shash_desc *desc) +static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm) { - struct xcbc_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; - struct nx_sg *out_sg; - int len; + int err; - nx_ctx_init(nx_ctx, HCOP_FC_AES); + err = nx_crypto_ctx_aes_xcbc_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_AES); NX_CPB_SET_KEY_SIZE(csbcpb, NX_KS_AES_128); csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; - memcpy(csbcpb->cpb.aes_xcbc.key, nx_ctx->priv.xcbc.key, AES_BLOCK_SIZE); - memset(nx_ctx->priv.xcbc.key, 0, sizeof *nx_ctx->priv.xcbc.key); - - len = AES_BLOCK_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, nx_ctx->ap->sglen); + return 0; +} - if (len != AES_BLOCK_SIZE) - return -EINVAL; +static int nx_xcbc_init(struct shash_desc *desc) +{ + struct xcbc_state *sctx = shash_desc_ctx(desc); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + memset(sctx, 0, sizeof *sctx); return 0; } @@ -186,6 +184,7 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u32 to_process = 0, leftover, total; unsigned int max_sg_len; unsigned long irq_flags; @@ -213,6 +212,17 @@ static int nx_xcbc_update(struct shash_desc *desc, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = AES_BLOCK_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, nx_ctx->ap->sglen); + + if (data_len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } + + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + do { to_process = total - to_process; to_process = to_process & ~(AES_BLOCK_SIZE - 1); @@ -235,8 +245,10 @@ static int nx_xcbc_update(struct shash_desc *desc, (u8 *) sctx->buffer, &data_len, max_sg_len); - if (data_len != sctx->count) - return -EINVAL; + if (data_len != sctx->count) { + rc = -EINVAL; + goto out; + } } data_len = to_process - sctx->count; @@ -245,8 +257,10 @@ static int nx_xcbc_update(struct shash_desc *desc, &data_len, max_sg_len); - if (data_len != to_process - sctx->count) - return -EINVAL; + if (data_len != to_process - sctx->count) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); @@ -325,15 +339,19 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer, &len, nx_ctx->ap->sglen); - if (len != sctx->count) - return -EINVAL; + if (len != sctx->count) { + rc = -EINVAL; + goto out; + } len = AES_BLOCK_SIZE; out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, nx_ctx->ap->sglen); - if (len != AES_BLOCK_SIZE) - return -EINVAL; + if (len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -372,7 +390,7 @@ struct shash_alg nx_shash_aes_xcbc_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_aes_xcbc_init, + .cra_init = nx_crypto_ctx_aes_xcbc_init2, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 4e91bdb83c594..08f8d5cd63349 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -29,34 +29,28 @@ #include "nx.h" -static int nx_sha256_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { - struct sha256_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA256]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA256_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha256_init(struct shash_desc *desc) { + struct sha256_state *sctx = shash_desc_ctx(desc); - if (len != SHA256_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -78,6 +72,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; @@ -108,6 +103,16 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA256_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -282,7 +287,7 @@ struct shash_alg nx_shash_sha256_alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha256_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index e6a58d2ee6289..aff0fe58eac0b 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -28,34 +28,29 @@ #include "nx.h" -static int nx_sha512_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { - struct sha512_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA512]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA512_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha512_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); - if (len != SHA512_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -77,6 +72,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; @@ -107,6 +103,16 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA512_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA512_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -288,7 +294,7 @@ struct shash_alg nx_shash_sha512_alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha512_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 2e2529ce8d313..737d33dc50b8e 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -635,12 +635,14 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode) /* entry points from the crypto tfm initializers */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) { + tfm->crt_aead.reqsize = sizeof(struct nx_ccm_rctx); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_CCM); } int nx_crypto_ctx_aes_gcm_init(struct crypto_tfm *tfm) { + tfm->crt_aead.reqsize = sizeof(struct nx_gcm_rctx); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 41b87ee03fe26..c3ed83764fef1 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -2,6 +2,8 @@ #ifndef __NX_H__ #define __NX_H__ +#include + #define NX_NAME "nx-crypto" #define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver" #define NX_VERSION "1.0" @@ -91,8 +93,11 @@ struct nx_crypto_driver { #define NX_GCM4106_NONCE_LEN (4) #define NX_GCM_CTR_OFFSET (12) -struct nx_gcm_priv { +struct nx_gcm_rctx { u8 iv[16]; +}; + +struct nx_gcm_priv { u8 iauth_tag[16]; u8 nonce[NX_GCM4106_NONCE_LEN]; }; @@ -100,8 +105,11 @@ struct nx_gcm_priv { #define NX_CCM_AES_KEY_LEN (16) #define NX_CCM4309_AES_KEY_LEN (19) #define NX_CCM4309_NONCE_LEN (3) -struct nx_ccm_priv { +struct nx_ccm_rctx { u8 iv[16]; +}; + +struct nx_ccm_priv { u8 b0[16]; u8 iauth_tag[16]; u8 oauth_tag[16]; @@ -113,7 +121,7 @@ struct nx_xcbc_priv { }; struct nx_ctr_priv { - u8 iv[16]; + u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct nx_crypto_ctx { From b75513b0f1c734b1e084a6e9952ea6260d4724e3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Aug 2015 19:13:25 -0700 Subject: [PATCH 0574/2091] sparc64: Fix userspace FPU register corruptions. [ Upstream commit 44922150d87cef616fd183220d43d8fde4d41390 ] If we have a series of events from userpsace, with %fprs=FPRS_FEF, like follows: ETRAP ETRAP VIS_ENTRY(fprs=0x4) VIS_EXIT RTRAP (kernel FPU restore with fpu_saved=0x4) RTRAP We will not restore the user registers that were clobbered by the FPU using kernel code in the inner-most trap. Traps allocate FPU save slots in the thread struct, and FPU using sequences save the "dirty" FPU registers only. This works at the initial trap level because all of the registers get recorded into the top-level FPU save area, and we'll return to userspace with the FPU disabled so that any FPU use by the user will take an FPU disabled trap wherein we'll load the registers back up properly. But this is not how trap returns from kernel to kernel operate. The simplest fix for this bug is to always save all FPU register state for anything other than the top-most FPU save area. Getting rid of the optimized inner-slot FPU saving code ends up making VISEntryHalf degenerate into plain VISEntry. Longer term we need to do something smarter to reinstate the partial save optimizations. Perhaps the fundament error is having trap entry and exit allocate FPU save slots and restore register state. Instead, the VISEntry et al. calls should be doing that work. This bug is about two decades old. Reported-by: James Y Knight Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/visasm.h | 16 +++----- arch/sparc/lib/NG4memcpy.S | 5 ++- arch/sparc/lib/VISsave.S | 67 +-------------------------------- arch/sparc/lib/ksyms.c | 4 -- 4 files changed, 11 insertions(+), 81 deletions(-) diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 1f0aa2024e94b..6424249d5f785 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -28,16 +28,10 @@ * Must preserve %o5 between VISEntryHalf and VISExitHalf */ #define VISEntryHalf \ - rd %fprs, %o5; \ - andcc %o5, FPRS_FEF, %g0; \ - be,pt %icc, 297f; \ - sethi %hi(298f), %g7; \ - sethi %hi(VISenterhalf), %g1; \ - jmpl %g1 + %lo(VISenterhalf), %g0; \ - or %g7, %lo(298f), %g7; \ - clr %o5; \ -297: wr %o5, FPRS_FEF, %fprs; \ -298: + VISEntry + +#define VISExitHalf \ + VISExit #define VISEntryHalfFast(fail_label) \ rd %fprs, %o5; \ @@ -47,7 +41,7 @@ ba,a,pt %xcc, fail_label; \ 297: wr %o5, FPRS_FEF, %fprs; -#define VISExitHalf \ +#define VISExitHalfFast \ wr %o5, 0, %fprs; #ifndef __ASSEMBLY__ diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 140527a20e7df..83aeeb1dffdb3 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -240,8 +240,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +#ifdef NON_USER_COPY + VISExitHalfFast +#else VISExitHalf - +#endif brz,pn %o2, .Lexit cmp %o2, 19 ble,pn %icc, .Lsmall_unaligned diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index b320ae9e2e2e8..a063d84336d63 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -44,9 +44,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stx %g3, [%g6 + TI_GSR] 2: add %g6, %g1, %g3 - cmp %o5, FPRS_DU - be,pn %icc, 6f - sll %g1, 3, %g1 + mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 + sll %g1, 3, %g1 stb %o5, [%g3 + TI_FPSAVED] rd %gsr, %g2 add %g6, %g1, %g3 @@ -80,65 +79,3 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop - -6: ldub [%g3 + TI_FPSAVED], %o5 - or %o5, FPRS_DU, %o5 - add %g6, TI_FPREGS+0x80, %g2 - stb %o5, [%g3 + TI_FPSAVED] - - sll %g1, 5, %g1 - add %g6, TI_FPREGS+0xc0, %g3 - wr %g0, FPRS_FEF, %fprs - membar #Sync - stda %f32, [%g2 + %g1] ASI_BLK_P - stda %f48, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 80f - nop - - .align 32 -80: jmpl %g7 + %g0, %g0 - nop - - .align 32 -VISenterhalf: - ldub [%g6 + TI_FPDEPTH], %g1 - brnz,a,pn %g1, 1f - cmp %g1, 1 - stb %g0, [%g6 + TI_FPSAVED] - stx %fsr, [%g6 + TI_XFSR] - clr %o5 - jmpl %g7 + %g0, %g0 - wr %g0, FPRS_FEF, %fprs - -1: bne,pn %icc, 2f - srl %g1, 1, %g1 - ba,pt %xcc, vis1 - sub %g7, 8, %g7 -2: addcc %g6, %g1, %g3 - sll %g1, 3, %g1 - andn %o5, FPRS_DU, %g2 - stb %g2, [%g3 + TI_FPSAVED] - - rd %gsr, %g2 - add %g6, %g1, %g3 - stx %g2, [%g3 + TI_GSR] - add %g6, %g1, %g2 - stx %fsr, [%g2 + TI_XFSR] - sll %g1, 5, %g1 -3: andcc %o5, FPRS_DL, %g0 - be,pn %icc, 4f - add %g6, TI_FPREGS, %g2 - - add %g6, TI_FPREGS+0x40, %g3 - membar #Sync - stda %f0, [%g2 + %g1] ASI_BLK_P - stda %f16, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 4f - nop - - .align 32 -4: and %o5, FPRS_DU, %o5 - jmpl %g7 + %g0, %g0 - wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1d649a95660c8..8069ce12f20b1 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -135,10 +135,6 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); -/* CRYPTO code needs this */ -void VISenterhalf(void); -EXPORT_SYMBOL(VISenterhalf); - extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); From c6fdd1b52bb30abd861708f18509493cbb84ec44 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Fri, 29 May 2015 12:04:12 -0400 Subject: [PATCH 0575/2091] clk: keystone: add support for post divider register for main pll commit 02fdfd708fd252a778709beb6c65d5e7360341ac upstream. Main PLL controller has post divider bits in a separate register in pll controller. Use the value from this register instead of fixed divider when available. Signed-off-by: Murali Karicheri Signed-off-by: Michael Turquette Signed-off-by: Greg Kroah-Hartman --- .../bindings/clock/keystone-pll.txt | 8 ++++---- drivers/clk/keystone/pll.c | 20 +++++++++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt index 225990f79b7c5..47570d2072159 100644 --- a/Documentation/devicetree/bindings/clock/keystone-pll.txt +++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt @@ -15,8 +15,8 @@ Required properties: - compatible : shall be "ti,keystone,main-pll-clock" or "ti,keystone,pll-clock" - clocks : parent clock phandle - reg - pll control0 and pll multipler registers -- reg-names : control and multiplier. The multiplier is applicable only for - main pll clock +- reg-names : control, multiplier and post-divider. The multiplier and + post-divider registers are applicable only for main pll clock - fixed-postdiv : fixed post divider value. If absent, use clkod register bits for postdiv @@ -25,8 +25,8 @@ Example: #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; fixed-postdiv = <2>; }; diff --git a/drivers/clk/keystone/pll.c b/drivers/clk/keystone/pll.c index 0dd8a4b12747b..4a375ead70e9f 100644 --- a/drivers/clk/keystone/pll.c +++ b/drivers/clk/keystone/pll.c @@ -37,7 +37,8 @@ * Main PLL or any other PLLs in the device such as ARM PLL, DDR PLL * or PA PLL available on keystone2. These PLLs are controlled by * this register. Main PLL is controlled by a PLL controller. - * @pllm: PLL register map address + * @pllm: PLL register map address for multiplier bits + * @pllod: PLL register map address for post divider bits * @pll_ctl0: PLL controller map address * @pllm_lower_mask: multiplier lower mask * @pllm_upper_mask: multiplier upper mask @@ -53,6 +54,7 @@ struct clk_pll_data { u32 phy_pllm; u32 phy_pll_ctl0; void __iomem *pllm; + void __iomem *pllod; void __iomem *pll_ctl0; u32 pllm_lower_mask; u32 pllm_upper_mask; @@ -102,7 +104,11 @@ static unsigned long clk_pllclk_recalc(struct clk_hw *hw, /* read post divider from od bits*/ postdiv = ((val & pll_data->clkod_mask) >> pll_data->clkod_shift) + 1; - else + else if (pll_data->pllod) { + postdiv = readl(pll_data->pllod); + postdiv = ((postdiv & pll_data->clkod_mask) >> + pll_data->clkod_shift) + 1; + } else postdiv = pll_data->postdiv; rate /= (prediv + 1); @@ -172,12 +178,21 @@ static void __init _of_pll_clk_init(struct device_node *node, bool pllctrl) /* assume the PLL has output divider register bits */ pll_data->clkod_mask = CLKOD_MASK; pll_data->clkod_shift = CLKOD_SHIFT; + + /* + * Check if there is an post-divider register. If not + * assume od bits are part of control register. + */ + i = of_property_match_string(node, "reg-names", + "post-divider"); + pll_data->pllod = of_iomap(node, i); } i = of_property_match_string(node, "reg-names", "control"); pll_data->pll_ctl0 = of_iomap(node, i); if (!pll_data->pll_ctl0) { pr_err("%s: ioremap failed\n", __func__); + iounmap(pll_data->pllod); goto out; } @@ -193,6 +208,7 @@ static void __init _of_pll_clk_init(struct device_node *node, bool pllctrl) pll_data->pllm = of_iomap(node, i); if (!pll_data->pllm) { iounmap(pll_data->pll_ctl0); + iounmap(pll_data->pllod); goto out; } } From 40114a3e4210f30b5d16788e6e7b928868881cfd Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Fri, 29 May 2015 12:04:13 -0400 Subject: [PATCH 0576/2091] ARM: dts: keystone: fix dt bindings to use post div register for mainpll commit c1bfa985ded82cacdfc6403e78f329c44e35534a upstream. All of the keystone devices have a separate register to hold post divider value for main pll clock. Currently the fixed-postdiv value used for k2hk/l/e SoCs works by sheer luck as u-boot happens to use a value of 2 for this. Now that we have fixed this in the pll clock driver change the dt bindings for the same. Signed-off-by: Murali Karicheri Acked-by: Santosh Shilimkar Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/k2e-clocks.dtsi | 5 ++--- arch/arm/boot/dts/k2hk-clocks.dtsi | 5 ++--- arch/arm/boot/dts/k2l-clocks.dtsi | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 4773d6af66a0a..d56d68fe7ffc6 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -13,9 +13,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi index d5adee3c00675..af9b7190533aa 100644 --- a/arch/arm/boot/dts/k2hk-clocks.dtsi +++ b/arch/arm/boot/dts/k2hk-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi index eb1e3e29f0738..ef8464bb11ffd 100644 --- a/arch/arm/boot/dts/k2l-clocks.dtsi +++ b/arch/arm/boot/dts/k2l-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { From ba8d2b484d19090e0c19b38c4f3d8d1ab65107bb Mon Sep 17 00:00:00 2001 From: Shilpa Sreeramalu Date: Wed, 15 Jul 2015 07:58:09 -0700 Subject: [PATCH 0577/2091] ASoC: Intel: Get correct usage_count value to load firmware commit 412efa73dcd3bd03c1838c91e094533a95529039 upstream. The usage_count variable was read before it was set to the correct value, due to which the firmware load was failing. Because of this IPC messages sent to the firmware were timing out causing a delay of about 1 second while playing audio from the internal speakers. With this patch the usage_count is read after the function call pm_runtime_get_sync which will increment the usage_count variable and the firmware load is successful and all the IPC messages are processed correctly. Signed-off-by: Shilpa Sreeramalu Signed-off-by: Fang, Yang A Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst/sst_drv_interface.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/intel/atom/sst/sst_drv_interface.c b/sound/soc/intel/atom/sst/sst_drv_interface.c index 7b50a9d17ec1b..edc1869083587 100644 --- a/sound/soc/intel/atom/sst/sst_drv_interface.c +++ b/sound/soc/intel/atom/sst/sst_drv_interface.c @@ -42,6 +42,11 @@ #define MIN_FRAGMENT_SIZE (50 * 1024) #define MAX_FRAGMENT_SIZE (1024 * 1024) #define SST_GET_BYTES_PER_SAMPLE(pcm_wd_sz) (((pcm_wd_sz + 15) >> 4) << 1) +#ifdef CONFIG_PM +#define GET_USAGE_COUNT(dev) (atomic_read(&dev->power.usage_count)) +#else +#define GET_USAGE_COUNT(dev) 1 +#endif int free_stream_context(struct intel_sst_drv *ctx, unsigned int str_id) { @@ -141,15 +146,9 @@ static int sst_power_control(struct device *dev, bool state) int ret = 0; int usage_count = 0; -#ifdef CONFIG_PM - usage_count = atomic_read(&dev->power.usage_count); -#else - usage_count = 1; -#endif - if (state == true) { ret = pm_runtime_get_sync(dev); - + usage_count = GET_USAGE_COUNT(dev); dev_dbg(ctx->dev, "Enable: pm usage count: %d\n", usage_count); if (ret < 0) { dev_err(ctx->dev, "Runtime get failed with err: %d\n", ret); @@ -164,6 +163,7 @@ static int sst_power_control(struct device *dev, bool state) } } } else { + usage_count = GET_USAGE_COUNT(dev); dev_dbg(ctx->dev, "Disable: pm usage count: %d\n", usage_count); return sst_pm_runtime_put(ctx); } From b0e4d399f6c4d463250cd28c0be6cccf4c7127d5 Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Tue, 21 Jul 2015 14:46:26 -0700 Subject: [PATCH 0578/2091] ASoC: ssm4567: Keep TDM_BCLKS in ssm4567_set_dai_fmt commit a6c2a32ac83567f15e9af3dcbc73148ce68b2ced upstream. The regmap_write in ssm4567_set_dai_fmt accidentally clears the TDM_BCLKS field which was set earlier by ssm4567_set_tdm_slot. This patch fixes it by using regmap_update_bits with proper mask. Signed-off-by: Ben Zhang Acked-by: Lars-Peter Clausen Acked-by: Anatol Pomozov Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/ssm4567.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c index a984485108cd1..f7549cc7ea855 100644 --- a/sound/soc/codecs/ssm4567.c +++ b/sound/soc/codecs/ssm4567.c @@ -315,7 +315,13 @@ static int ssm4567_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (invert_fclk) ctrl1 |= SSM4567_SAI_CTRL_1_FSYNC; - return regmap_write(ssm4567->regmap, SSM4567_REG_SAI_CTRL_1, ctrl1); + return regmap_update_bits(ssm4567->regmap, SSM4567_REG_SAI_CTRL_1, + SSM4567_SAI_CTRL_1_BCLK | + SSM4567_SAI_CTRL_1_FSYNC | + SSM4567_SAI_CTRL_1_LJ | + SSM4567_SAI_CTRL_1_TDM | + SSM4567_SAI_CTRL_1_PDM, + ctrl1); } static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable) From e52e67a7832367967b3a1b363eb2f90c4f5e153a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 23 Jul 2015 23:22:26 +0800 Subject: [PATCH 0579/2091] ASoC: pcm1681: Fix setting de-emphasis sampling rate selection commit fa8173a3ef0570affde7da352de202190b3786c2 upstream. The de-emphasis sampling rate selection is controlled by BIT[3:4] of PCM1681_DEEMPH_CONTROL register. Do proper left shift to set it. Signed-off-by: Axel Lin Acked-by: Marek Belisko Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/pcm1681.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/pcm1681.c b/sound/soc/codecs/pcm1681.c index 477e13d309713..e7ba557979cb2 100644 --- a/sound/soc/codecs/pcm1681.c +++ b/sound/soc/codecs/pcm1681.c @@ -102,7 +102,7 @@ static int pcm1681_set_deemph(struct snd_soc_codec *codec) if (val != -1) { regmap_update_bits(priv->regmap, PCM1681_DEEMPH_CONTROL, - PCM1681_DEEMPH_RATE_MASK, val); + PCM1681_DEEMPH_RATE_MASK, val << 3); enable = 1; } else enable = 0; From d90d06680f8287f11a5ad6d83680790e5da86f08 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 6 Jul 2015 17:01:24 +0200 Subject: [PATCH 0580/2091] ASoC: dapm: Lock during userspace access commit e50b1e06b79e9d51efbff9627b4dd407184ef43f upstream. The DAPM lock must be held when accessing the DAPM graph status through sysfs or debugfs, otherwise concurrent changes to the graph can result in undefined behaviour. Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 158204d089249..b51fe7c785932 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1811,6 +1811,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, size_t count, loff_t *ppos) { struct snd_soc_dapm_widget *w = file->private_data; + struct snd_soc_card *card = w->dapm->card; char *buf; int in, out; ssize_t ret; @@ -1820,6 +1821,8 @@ static ssize_t dapm_widget_power_read_file(struct file *file, if (!buf) return -ENOMEM; + mutex_lock(&card->dapm_mutex); + /* Supply widgets are not handled by is_connected_{input,output}_ep() */ if (w->is_supply) { in = 0; @@ -1866,6 +1869,8 @@ static ssize_t dapm_widget_power_read_file(struct file *file, p->sink->name); } + mutex_unlock(&card->dapm_mutex); + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); @@ -2140,11 +2145,15 @@ static ssize_t dapm_widget_show(struct device *dev, struct snd_soc_pcm_runtime *rtd = dev_get_drvdata(dev); int i, count = 0; + mutex_lock(&rtd->card->dapm_mutex); + for (i = 0; i < rtd->num_codecs; i++) { struct snd_soc_codec *codec = rtd->codec_dais[i]->codec; count += dapm_widget_show_codec(codec, buf + count); } + mutex_unlock(&rtd->card->dapm_mutex); + return count; } From 07d74a4b2fd2d3ec063b5350a185a18cfedc3712 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 21 Jul 2015 11:51:35 +0200 Subject: [PATCH 0581/2091] ASoC: dapm: Don't add prefix to widget stream name commit a798c24a69b64f09e2d323ac8155a36373e5d5fd upstream. Commit fdb6eb0a1287 ("ASoC: dapm: Modify widget stream name according to prefix") fixed the case where a DAPM route between a DAI widget and a DAC/ADC/AIF widget with a matching stream name was not created when the DAPM context was using a prefix. Unfortunately the patch introduced a few issues on its own like leaking the dynamically allocated stream name memory and also not checking whether the allocation succeeded in the first place. It is also incomplete in that it still does not handle the case where stream name of the widget is a substring of the stream name of the DAI, which is explicitly allowed and works fine if no DAPM prefix is used. Revert the commit and take a slightly different approach to solving the issue. Instead of comparing the widget's stream name to the name of the DAI widget compare it to the stream name of the DAI widget. The stream name of the DAI widget is identical to the name of the DAI widget except that it wont have the DAPM prefix added. So this approach behaves identical regardless to whether the DAPM context uses a prefix or not. We don't have to worry about potentially matching with a widget with the same stream name, but from a different DAPM context with a different prefix, since the code already makes sure that both the DAI widget and the matched widget are from the same DAPM context. Fixes: fdb6eb0a1287 ("ASoC: dapm: Modify widget stream name according to prefix") Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index b51fe7c785932..b6c12dccb2591 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3109,16 +3109,10 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, } prefix = soc_dapm_prefix(dapm); - if (prefix) { + if (prefix) w->name = kasprintf(GFP_KERNEL, "%s %s", prefix, widget->name); - if (widget->sname) - w->sname = kasprintf(GFP_KERNEL, "%s %s", prefix, - widget->sname); - } else { + else w->name = kasprintf(GFP_KERNEL, "%s", widget->name); - if (widget->sname) - w->sname = kasprintf(GFP_KERNEL, "%s", widget->sname); - } if (w->name == NULL) { kfree(w); return NULL; @@ -3566,7 +3560,7 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) break; } - if (!w->sname || !strstr(w->sname, dai_w->name)) + if (!w->sname || !strstr(w->sname, dai_w->sname)) continue; if (dai_w->id == snd_soc_dapm_dai_in) { From d24944adc3fb3e128ddb9c695f339b395e798cfb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 30 Jul 2015 14:31:31 -0700 Subject: [PATCH 0582/2091] x86/xen: Probe target addresses in set_aliased_prot() before the hypercall commit aa1acff356bbedfd03b544051f5b371746735d89 upstream. The update_va_mapping hypercall can fail if the VA isn't present in the guest's page tables. Under certain loads, this can result in an OOPS when the target address is in unpopulated vmap space. While we're at it, add comments to help explain what's going on. This isn't a great long-term fix. This code should probably be changed to use something like set_memory_ro. Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt Cc: Thomas Gleixner Cc: security@kernel.org Cc: xen-devel Link: http://lkml.kernel.org/r/0b0e55b995cda11e7829f140b833ef932fcabe3a.1438291540.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060e..a671e837228d1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } From f90781aa88aaf93f78d8ef7ad002f6ae526ac132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 26 Jun 2015 03:28:24 +0200 Subject: [PATCH 0583/2091] xen/gntdevt: Fix race condition in gntdev_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 30b03d05e07467b8c6ec683ea96b5bffcbcd3931 upstream. While gntdev_release() is called the MMU notifier is still registered and can traverse priv->maps list even if no pages are mapped (which is the case -- gntdev_release() is called after all). But gntdev_release() will clear that list, so make sure that only one of those things happens at the same time. Signed-off-by: Marek Marczykowski-Górecki Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 89274850741b5..4bd23bba816fb 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -568,12 +568,14 @@ static int gntdev_release(struct inode *inode, struct file *flip) pr_debug("priv %p\n", priv); + mutex_lock(&priv->lock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); list_del(&map->next); gntdev_put_map(NULL /* already removed */, map); } WARN_ON(!list_empty(&priv->freeable_maps)); + mutex_unlock(&priv->lock); if (use_ptemod) mmu_notifier_unregister(&priv->mn, priv->mm); From 640e60174d90fe6043fd79c645dce206eb2d1ab9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Jul 2015 13:13:30 +0200 Subject: [PATCH 0584/2091] hwrng: core - correct error check of kthread_run call commit 17fb874dee093139923af8ed36061faa92cc8e79 upstream. The kthread_run() function can return two different error values but the hwrng core only checks for -ENOMEM. If the other error value -EINTR is returned it is assigned to hwrng_fill and later used on a kthread_stop() call which naturally crashes. Signed-off-by: Martin Schwidefsky Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index da8faf78536a3..5643b65cee204 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -429,7 +429,7 @@ static int hwrng_fillfn(void *unused) static void start_khwrngd(void) { hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); - if (hwrng_fill == ERR_PTR(-ENOMEM)) { + if (IS_ERR(hwrng_fill)) { pr_err("hwrng_fill thread creation failed"); hwrng_fill = NULL; } From 7047312d383203f7b8447261f1a473cf54aedec3 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 21 Jul 2015 22:07:47 -0700 Subject: [PATCH 0585/2091] crypto: qat - Fix invalid synchronization between register/unregister sym algs commit 6f043b50da8e03bdcc5703fd37ea45bc6892432f upstream. The synchronization method used atomic was bogus. Use a proper synchronization with mutex. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/qat_algs.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1dc5b0a17cf72..34139a8894a01 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -73,7 +73,8 @@ ICP_QAT_HW_CIPHER_KEY_CONVERT, \ ICP_QAT_HW_CIPHER_DECRYPT) -static atomic_t active_dev; +static DEFINE_MUTEX(algs_lock); +static unsigned int active_devs; struct qat_alg_buf { uint32_t len; @@ -1271,7 +1272,10 @@ static struct crypto_alg qat_algs[] = { { int qat_algs_register(void) { - if (atomic_add_return(1, &active_dev) == 1) { + int ret = 0; + + mutex_lock(&algs_lock); + if (++active_devs == 1) { int i; for (i = 0; i < ARRAY_SIZE(qat_algs); i++) @@ -1280,21 +1284,25 @@ int qat_algs_register(void) CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC : CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; - return crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs)); + ret = crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs)); } - return 0; + mutex_unlock(&algs_lock); + return ret; } int qat_algs_unregister(void) { - if (atomic_sub_return(1, &active_dev) == 0) - return crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs)); - return 0; + int ret = 0; + + mutex_lock(&algs_lock); + if (--active_devs == 0) + ret = crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs)); + mutex_unlock(&algs_lock); + return ret; } int qat_algs_init(void) { - atomic_set(&active_dev, 0); crypto_get_default_rng(); return 0; } From 593b1559bb5024f922724b86c161a13006948d63 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 22 Jul 2015 18:05:35 +0800 Subject: [PATCH 0586/2091] crypto: ixp4xx - Remove bogus BUG_ON on scattered dst buffer commit f898c522f0e9ac9f3177d0762b76e2ab2d2cf9c0 upstream. This patch removes a bogus BUG_ON in the ablkcipher path that triggers when the destination buffer is different from the source buffer and is scattered. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ixp4xx_crypto.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 48f453555f1fe..ede9e9e3c419c 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -904,7 +904,6 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt) crypt->mode |= NPE_OP_NOT_IN_PLACE; /* This was never tested by Intel * for more than one dst buffer, I think. */ - BUG_ON(req->dst->length < nbytes); req_ctx->dst = NULL; if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook, flags, DMA_FROM_DEVICE)) From 1fc5d70706d0de5dba99b541689c1c6a66727711 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 16 Jul 2015 17:36:11 +0300 Subject: [PATCH 0587/2091] rbd: fix copyup completion race commit 2761713d35e370fd640b5781109f753066b746c4 upstream. For write/discard obj_requests that involved a copyup method call, the opcode of the first op is CEPH_OSD_OP_CALL and the ->callback is rbd_img_obj_copyup_callback(). The latter frees copyup pages, sets ->xferred and delegates to rbd_img_obj_callback(), the "normal" image object callback, for reporting to block layer and putting refs. rbd_osd_req_callback() however treats CEPH_OSD_OP_CALL as a trivial op, which means obj_request is marked done in rbd_osd_trivial_callback(), *before* ->callback is invoked and rbd_img_obj_copyup_callback() has a chance to run. Marking obj_request done essentially means giving rbd_img_obj_callback() a license to end it at any moment, so if another obj_request from the same img_request is being completed concurrently, rbd_img_obj_end_request() may very well be called on such prematurally marked done request: handle_reply() rbd_osd_req_callback() rbd_osd_trivial_callback() rbd_obj_request_complete() rbd_img_obj_copyup_callback() rbd_img_obj_callback() handle_reply() rbd_osd_req_callback() rbd_osd_trivial_callback() for_each_obj_request(obj_request->img_request) { rbd_img_obj_end_request(obj_request-1/2) rbd_img_obj_end_request(obj_request-2/2) <-- } Calling rbd_img_obj_end_request() on such a request leads to trouble, in particular because its ->xfferred is 0. We report 0 to the block layer with blk_update_request(), get back 1 for "this request has more data in flight" and then trip on rbd_assert(more ^ (which == img_request->obj_request_count)); with rhs (which == ...) being 1 because rbd_img_obj_end_request() has been called for both requests and lhs (more) being 1 because we haven't got a chance to set ->xfferred in rbd_img_obj_copyup_callback() yet. To fix this, leverage that rbd wants to call class methods in only two cases: one is a generic method call wrapper (obj_request is standalone) and the other is a copyup (obj_request is part of an img_request). So make a dedicated handler for CEPH_OSD_OP_CALL and directly invoke rbd_img_obj_copyup_callback() from it if obj_request is part of an img_request, similar to how CEPH_OSD_OP_READ handler invokes rbd_img_obj_request_read_callback(). Since rbd_img_obj_copyup_callback() is now being called from the OSD request callback (only), it is renamed to rbd_osd_copyup_callback(). Cc: Alex Elder Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 53f253574abe7..010ce0b1f517f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -522,6 +522,7 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ +static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request); static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); static void rbd_img_parent_read(struct rbd_obj_request *obj_request); static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); @@ -1797,6 +1798,16 @@ static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) obj_request_done_set(obj_request); } +static void rbd_osd_call_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + + if (obj_request_img_data_test(obj_request)) + rbd_osd_copyup_callback(obj_request); + else + obj_request_done_set(obj_request); +} + static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, struct ceph_msg *msg) { @@ -1845,6 +1856,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_osd_discard_callback(obj_request); break; case CEPH_OSD_OP_CALL: + rbd_osd_call_callback(obj_request); + break; case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: rbd_osd_trivial_callback(obj_request); @@ -2509,13 +2522,15 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, } static void -rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) +rbd_osd_copyup_callback(struct rbd_obj_request *obj_request) { struct rbd_img_request *img_request; struct rbd_device *rbd_dev; struct page **pages; u32 page_count; + dout("%s: obj %p\n", __func__, obj_request); + rbd_assert(obj_request->type == OBJ_REQUEST_BIO || obj_request->type == OBJ_REQUEST_NODATA); rbd_assert(obj_request_img_data_test(obj_request)); @@ -2542,9 +2557,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) if (!obj_request->result) obj_request->xferred = obj_request->length; - /* Finish up with the normal image object callback */ - - rbd_img_obj_callback(obj_request); + obj_request_done_set(obj_request); } static void @@ -2629,7 +2642,6 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) /* All set, send it off. */ - orig_request->callback = rbd_img_obj_copyup_callback; osdc = &rbd_dev->rbd_client->client->osdc; img_result = rbd_obj_request_submit(osdc, orig_request); if (!img_result) From d13a03e5e2ae19babd18be7814ec52992c618c69 Mon Sep 17 00:00:00 2001 From: Denis Carikli Date: Thu, 23 Jul 2015 10:31:12 +0200 Subject: [PATCH 0588/2091] ARM: dts: i.MX35: Fix can support. commit e053f96b1a00022b4e2c7ceb7ac0229646626507 upstream. Since commit 3d42a379b6fa5b46058e3302b1802b29f64865bb ("can: flexcan: add 2nd clock to support imx53 and newer") the can driver requires a dt nodes to have a second clock. Add them to imx35 to fix probing the flex can driver on the respective platforms. Signed-off-by: Denis Carikli Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx35.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index b6478e97d6a7e..e6540b5cfa4ca 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -286,8 +286,8 @@ can1: can@53fe4000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe4000 0x1000>; - clocks = <&clks 33>; - clock-names = "ipg"; + clocks = <&clks 33>, <&clks 33>; + clock-names = "ipg", "per"; interrupts = <43>; status = "disabled"; }; @@ -295,8 +295,8 @@ can2: can@53fe8000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe8000 0x1000>; - clocks = <&clks 34>; - clock-names = "ipg"; + clocks = <&clks 34>, <&clks 34>; + clock-names = "ipg", "per"; interrupts = <44>; status = "disabled"; }; From dcee583335047bcc1501e3b49d4537a8320022c6 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 16 Jul 2015 16:16:44 +0300 Subject: [PATCH 0589/2091] ARM: OMAP2+: hwmod: Fix _wait_target_ready() for hwmods without sysc commit 9a258afa928b45e6dd2efcac46ccf7eea705d35a upstream. For hwmods without sysc, _init_mpu_rt_base(oh) won't be called and so _find_mpu_rt_port(oh) will return NULL thus preventing ready state check on those modules after the module is enabled. This can potentially cause a bus access error if the module is accessed before the module is ready. Fix this by unconditionally calling _init_mpu_rt_base() during hwmod _init(). Do ioremap only if we need SYSC access. Eventhough _wait_target_ready() check doesn't really need MPU RT port but just the PRCM registers, we still mandate that the hwmod must have an MPU RT port if ready state check needs to be done. Else it would mean that the module is not accessible by MPU so there is no point in waiting for target to be ready. e.g. this fixes the below DCAN bus access error on AM437x-gp-evm. [ 16.672978] ------------[ cut here ]------------ [ 16.677885] WARNING: CPU: 0 PID: 1580 at drivers/bus/omap_l3_noc.c:147 l3_interrupt_handler+0x234/0x35c() [ 16.687946] 44000000.ocp:L3 Custom Error: MASTER M2 (64-bit) TARGET L4_PER_0 (Read): Data Access in User mode during Functional access [ 16.700654] Modules linked in: xhci_hcd btwilink ti_vpfe dwc3 videobuf2_core ov2659 bluetooth v4l2_common videodev ti_am335x_adc kfifo_buf industrialio c_can_platform videobuf2_dma_contig media snd_soc_tlv320aic3x pixcir_i2c_ts c_can dc [ 16.731144] CPU: 0 PID: 1580 Comm: rpc.statd Not tainted 3.14.26-02561-gf733aa036398 #180 [ 16.739747] Backtrace: [ 16.742336] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 16.750285] r6:00000093 r5:00000009 r4:eab5b8a8 r3:00000000 [ 16.756252] [] (show_stack) from [] (dump_stack+0x20/0x28) [ 16.763870] [] (dump_stack) from [] (warn_slowpath_common+0x6c/0x8c) [ 16.772408] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x38/0x40) [ 16.781550] r8:c05d1f90 r7:c0730844 r6:c0730448 r5:80080003 r4:ed0cd210 [ 16.788626] [] (warn_slowpath_fmt) from [] (l3_interrupt_handler+0x234/0x35c) [ 16.797968] r3:ed0cd480 r2:c0730508 [ 16.801747] [] (l3_interrupt_handler) from [] (handle_irq_event_percpu+0x54/0x1bc) [ 16.811533] r10:ed005600 r9:c084855b r8:0000002a r7:00000000 r6:00000000 r5:0000002a [ 16.819780] r4:ed0e6d80 [ 16.822453] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x30/0x40) [ 16.831789] r10:eb2b6938 r9:eb2b6960 r8:bf011420 r7:fa240100 r6:00000000 r5:0000002a [ 16.840052] r4:ed005600 [ 16.842744] [] (handle_irq_event) from [] (handle_fasteoi_irq+0x74/0x128) [ 16.851702] r4:ed005600 r3:00000000 [ 16.855479] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x28/0x38) [ 16.864523] r4:0000002a r3:c0066164 [ 16.868294] [] (generic_handle_irq) from [] (handle_IRQ+0x38/0x8c) [ 16.876612] r4:c081c640 r3:00000202 [ 16.880380] [] (handle_IRQ) from [] (gic_handle_irq+0x30/0x5c) [ 16.888328] r6:eab5ba38 r5:c0804460 r4:fa24010c r3:00000100 [ 16.894303] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x50) [ 16.902193] Exception stack(0xeab5ba38 to 0xeab5ba80) [ 16.907499] ba20: 00000000 00000006 [ 16.916108] ba40: fa1d0000 fa1d0008 ed3d3000 eab5bab4 ed3d3460 c0842af4 bf011420 eb2b6960 [ 16.924716] ba60: eb2b6938 eab5ba8c eab5ba90 eab5ba80 bf035220 bf07702c 600f0013 ffffffff [ 16.933317] r7:eab5ba6c r6:ffffffff r5:600f0013 r4:bf07702c [ 16.939317] [] (c_can_plat_read_reg_aligned_to_16bit [c_can_platform]) from [] (c_can_get_berr_counter+0x38/0x64 [c_can]) [ 16.952696] [] (c_can_get_berr_counter [c_can]) from [] (can_fill_info+0x124/0x15c [can_dev]) [ 16.963480] r5:ec8c9740 r4:ed3d3000 [ 16.967253] [] (can_fill_info [can_dev]) from [] (rtnl_fill_ifinfo+0x58c/0x8fc) [ 16.976749] r6:ec8c9740 r5:ed3d3000 r4:eb2b6780 [ 16.981613] [] (rtnl_fill_ifinfo) from [] (rtnl_dump_ifinfo+0xf0/0x1dc) [ 16.990401] r10:ec8c9740 r9:00000000 r8:00000000 r7:00000000 r6:ebd4d1b4 r5:ed3d3000 [ 16.998671] r4:00000000 [ 17.001342] [] (rtnl_dump_ifinfo) from [] (netlink_dump+0xa8/0x1e0) [ 17.009772] r10:00000000 r9:00000000 r8:c0503318 r7:ebf3e6c0 r6:ebd4d1b4 r5:ec8c9740 [ 17.018050] r4:ebd4d000 [ 17.020714] [] (netlink_dump) from [] (__netlink_dump_start+0x104/0x154) [ 17.029591] r6:eab5bd34 r5:ec8c9980 r4:ebd4d000 [ 17.034454] [] (__netlink_dump_start) from [] (rtnetlink_rcv_msg+0x110/0x1f4) [ 17.043778] r7:00000000 r6:ec8c9980 r5:00000f40 r4:ebf3e6c0 [ 17.049743] [] (rtnetlink_rcv_msg) from [] (netlink_rcv_skb+0xb4/0xc8) [ 17.058449] r8:eab5bdac r7:ec8c9980 r6:c05054f4 r5:ec8c9980 r4:ebf3e6c0 [ 17.065534] [] (netlink_rcv_skb) from [] (rtnetlink_rcv+0x24/0x2c) [ 17.073854] r6:ebd4d000 r5:00000014 r4:ec8c9980 r3:c0504110 [ 17.079846] [] (rtnetlink_rcv) from [] (netlink_unicast+0x180/0x1ec) [ 17.088363] r4:ed0c6800 r3:c0504110 [ 17.092113] [] (netlink_unicast) from [] (netlink_sendmsg+0x2ac/0x380) [ 17.100813] r10:00000000 r8:00000008 r7:ec8c9980 r6:ebd4d000 r5:eab5be70 r4:eab5bee4 [ 17.109083] [] (netlink_sendmsg) from [] (sock_sendmsg+0x90/0xb0) [ 17.117305] r10:00000000 r9:eab5a000 r8:becdda3c r7:0000000c r6:ea978400 r5:eab5be70 [ 17.125563] r4:c05103c4 [ 17.128225] [] (sock_sendmsg) from [] (SyS_sendto+0xb8/0xdc) [ 17.136001] r6:becdda5c r5:00000014 r4:ecd37040 [ 17.140876] [] (SyS_sendto) from [] (ret_fast_syscall+0x0/0x30) [ 17.148923] r10:00000000 r8:c000e804 r7:00000122 r6:becdda5c r5:0000000c r4:becdda5c [ 17.157169] ---[ end trace 2b71e15b38f58bad ]--- Fixes: 6423d6df1440 ("ARM: OMAP2+: hwmod: check for module address space during init") Signed-off-by: Roger Quadros Signed-off-by: Paul Walmsley Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap_hwmod.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 752969ff9de04..5286e7773ed40 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2373,6 +2373,9 @@ static int of_dev_hwmod_lookup(struct device_node *np, * registers. This address is needed early so the OCP registers that * are part of the device's address space can be ioremapped properly. * + * If SYSC access is not needed, the registers will not be remapped + * and non-availability of MPU access is not treated as an error. + * * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and * -ENXIO on absent or invalid register target address space. */ @@ -2387,6 +2390,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, _save_mpu_port_index(oh); + /* if we don't need sysc access we don't need to ioremap */ + if (!oh->class->sysc) + return 0; + + /* we can't continue without MPU PORT if we need sysc access */ if (oh->_int_flags & _HWMOD_NO_MPU_PORT) return -ENXIO; @@ -2396,8 +2404,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, oh->name); /* Extract the IO space from device tree blob */ - if (!np) + if (!np) { + pr_err("omap_hwmod: %s: no dt node\n", oh->name); return -ENXIO; + } va_start = of_iomap(np, index + oh->mpu_rt_idx); } else { @@ -2456,13 +2466,11 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->name, np->name); } - if (oh->class->sysc) { - r = _init_mpu_rt_base(oh, NULL, index, np); - if (r < 0) { - WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", - oh->name); - return 0; - } + r = _init_mpu_rt_base(oh, NULL, index, np); + if (r < 0) { + WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", + oh->name); + return 0; } r = _init_clocks(oh, NULL); From 66832d90e275ca52f348d6599f4769f9a99aea1a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 25 Jul 2015 03:03:38 +0300 Subject: [PATCH 0590/2091] ALSA: hda - fix cs4210_spdif_automute() commit 44008f0896ae205b02b0882dbf807f0de149efc4 upstream. Smatch complains that we have nested checks for "spdif_present". It turns out the current behavior isn't correct, we should remove the first check and keep the second. Fixes: 1077a024812d ('ALSA: hda - Use generic parser for Cirrus codec driver') Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cirrus.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 50e9dd6755797..3a24f7739aaaa 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -1001,9 +1001,7 @@ static void cs4210_spdif_automute(struct hda_codec *codec, spec->spdif_present = spdif_present; /* SPDIF TX on/off */ - if (spdif_present) - snd_hda_set_pin_ctl(codec, spdif_pin, - spdif_present ? PIN_OUT : 0); + snd_hda_set_pin_ctl(codec, spdif_pin, spdif_present ? PIN_OUT : 0); cs_automute(codec); } From 852fbebb7c8aa17fdea227299f45be119a65bd1c Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 5 Aug 2015 18:03:34 +0800 Subject: [PATCH 0591/2091] ALSA: hda - one Dell machine needs the headphone white noise fixup commit 73851b36fe73819f8c201971e913324d4846a7ea upstream. The fixup ALC292_FIXUP_DISABLE_AAMIX can fix the white noise of the headphone on this Dell machine. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 590bcfb0e82fa..1e99f075a5ab7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5118,6 +5118,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 4a4a6ddbd97f8942916c88dba7f6a189434fd222 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 5 Aug 2015 09:21:05 +0900 Subject: [PATCH 0592/2091] ALSA: fireworks/firewire-lib: add support for recent firmware quirk commit 18f5ed365d3f188a91149d528c853000330a4a58 upstream. Fireworks uses TSB43CB43(IceLynx-Micro) as its IEC 61883-1/6 interface. This chip includes ARM7 core, and loads and runs program. The firmware is stored in on-board memory and loaded every powering-on from it. Echo Audio ships several versions of firmwares for each model. These firmwares have each quirk and the quirk changes a sequence of packets. As long as I investigated, AudioFire2/AudioFire4/AudioFirePre8 have a quirk to transfer a first packet with 0x02 in its dbc field. This causes ALSA Fireworks driver to detect discontinuity. In this case, firmware version 5.7.0, 5.7.3 and 5.8.0 are used. Payload CIP CIP quadlets header1 header2 02 00050002 90ffffff <- 42 0005000a 90013000 42 00050012 90014400 42 0005001a 90015800 02 0005001a 90ffffff 42 00050022 90019000 42 0005002a 9001a400 42 00050032 9001b800 02 00050032 90ffffff 42 0005003a 9001d000 42 00050042 9001e400 42 0005004a 9001f800 02 0005004a 90ffffff (AudioFire2 with firmware version 5.7.) $ dmesg snd-fireworks fw1.0: Detect discontinuity of CIP: 00 02 These models, AudioFire8 (since Jul 2009 ) and Gibson Robot Interface Pack series uses the same ARM binary as their firmware. Thus, this quirk may be observed among them. This commit adds a new member for AMDTP structure. This member represents the value of dbc field in a first AMDTP packet. Drivers can set it with a preferred value according to model's quirk. Tested-by: Johannes Oertei Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/amdtp.c | 5 +++-- sound/firewire/amdtp.h | 2 ++ sound/firewire/fireworks/fireworks.c | 8 ++++++++ sound/firewire/fireworks/fireworks.h | 1 + sound/firewire/fireworks/fireworks_stream.c | 9 +++++++++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/sound/firewire/amdtp.c b/sound/firewire/amdtp.c index e061355f535f0..bf20593d30852 100644 --- a/sound/firewire/amdtp.c +++ b/sound/firewire/amdtp.c @@ -730,8 +730,9 @@ static void handle_in_packet(struct amdtp_stream *s, s->data_block_counter != UINT_MAX) data_block_counter = s->data_block_counter; - if (((s->flags & CIP_SKIP_DBC_ZERO_CHECK) && data_block_counter == 0) || - (s->data_block_counter == UINT_MAX)) { + if (((s->flags & CIP_SKIP_DBC_ZERO_CHECK) && + data_block_counter == s->tx_first_dbc) || + s->data_block_counter == UINT_MAX) { lost = false; } else if (!(s->flags & CIP_DBC_IS_END_EVENT)) { lost = data_block_counter != s->data_block_counter; diff --git a/sound/firewire/amdtp.h b/sound/firewire/amdtp.h index 8a03a91e728b0..25c9055376580 100644 --- a/sound/firewire/amdtp.h +++ b/sound/firewire/amdtp.h @@ -153,6 +153,8 @@ struct amdtp_stream { /* quirk: fixed interval of dbc between previos/current packets. */ unsigned int tx_dbc_interval; + /* quirk: indicate the value of dbc field in a first packet. */ + unsigned int tx_first_dbc; bool callbacked; wait_queue_head_t callback_wait; diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index 2682e7e3e5c98..c94a432f7cc65 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -248,8 +248,16 @@ efw_probe(struct fw_unit *unit, err = get_hardware_info(efw); if (err < 0) goto error; + /* AudioFire8 (since 2009) and AudioFirePre8 */ if (entry->model_id == MODEL_ECHO_AUDIOFIRE_9) efw->is_af9 = true; + /* These models uses the same firmware. */ + if (entry->model_id == MODEL_ECHO_AUDIOFIRE_2 || + entry->model_id == MODEL_ECHO_AUDIOFIRE_4 || + entry->model_id == MODEL_ECHO_AUDIOFIRE_9 || + entry->model_id == MODEL_GIBSON_RIP || + entry->model_id == MODEL_GIBSON_GOLDTOP) + efw->is_fireworks3 = true; snd_efw_proc_init(efw); diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h index 4f0201a95222a..084d414b228cf 100644 --- a/sound/firewire/fireworks/fireworks.h +++ b/sound/firewire/fireworks/fireworks.h @@ -71,6 +71,7 @@ struct snd_efw { /* for quirks */ bool is_af9; + bool is_fireworks3; u32 firmware_version; unsigned int midi_in_ports; diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c index c55db1bddc80a..7e353f1f7bff3 100644 --- a/sound/firewire/fireworks/fireworks_stream.c +++ b/sound/firewire/fireworks/fireworks_stream.c @@ -172,6 +172,15 @@ int snd_efw_stream_init_duplex(struct snd_efw *efw) efw->tx_stream.flags |= CIP_DBC_IS_END_EVENT; /* Fireworks reset dbc at bus reset. */ efw->tx_stream.flags |= CIP_SKIP_DBC_ZERO_CHECK; + /* + * But Recent firmwares starts packets with non-zero dbc. + * Driver version 5.7.6 installs firmware version 5.7.3. + */ + if (efw->is_fireworks3 && + (efw->firmware_version == 0x5070000 || + efw->firmware_version == 0x5070300 || + efw->firmware_version == 0x5080000)) + efw->tx_stream.tx_first_dbc = 0x02; /* AudioFire9 always reports wrong dbs. */ if (efw->is_af9) efw->tx_stream.flags |= CIP_WRONG_DBS; From f03bde70841d33615dc6921cca369f4035ee6c68 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 30 Jul 2015 18:18:39 +0200 Subject: [PATCH 0593/2091] hwmon: (nct7904) Export I2C module alias information commit 1252be9ce0ab4f622b8692b648894d09c0df71ce upstream. The I2C core always reports the MODALIAS uevent as "i2c: Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7904.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index 6153df735e82c..08ff89d222e5f 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -575,6 +575,7 @@ static const struct i2c_device_id nct7904_id[] = { {"nct7904", 0}, {} }; +MODULE_DEVICE_TABLE(i2c, nct7904_id); static struct i2c_driver nct7904_driver = { .class = I2C_CLASS_HWMON, From 25ab1617bf735b8a411104c60852651cae6769fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 30 Jul 2015 20:41:57 +0200 Subject: [PATCH 0594/2091] hwmon: (dell-smm) Blacklist Dell Studio XPS 8100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a4b45b25f18d1e798965efec429ba5fc01b9f0b6 upstream. CPU fan speed going up and down on Dell Studio XPS 8100 for unknown reasons. Without further debugging on the affected machine, it is not possible to find the problem. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100121 Signed-off-by: Pali Rohár Tested-by: Jan C Peters [groeck: cleaned up description, comments] Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/char/i8k.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index a43048b5b05fd..3c1a123f909cc 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -900,6 +900,21 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = { MODULE_DEVICE_TABLE(dmi, i8k_dmi_table); +static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = { + { + /* + * CPU fan speed going up and down on Dell Studio XPS 8100 + * for unknown reasons. + */ + .ident = "Dell Studio XPS 8100", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"), + }, + }, + { } +}; + /* * Probe for the presence of a supported laptop. */ @@ -911,7 +926,8 @@ static int __init i8k_probe(void) /* * Get DMI information */ - if (!dmi_check_system(i8k_dmi_table)) { + if (!dmi_check_system(i8k_dmi_table) || + dmi_check_system(i8k_blacklist_dmi_table)) { if (!ignore_dmi && !force) return -ENODEV; From dbbf0fec573be060c7869674ec112b4c3de58957 Mon Sep 17 00:00:00 2001 From: Marcus Gelderie Date: Thu, 6 Aug 2015 15:46:10 -0700 Subject: [PATCH 0595/2091] ipc: modify message queue accounting to not take kernel data structures into account commit de54b9ac253787c366bbfb28d901a31954eb3511 upstream. A while back, the message queue implementation in the kernel was improved to use btrees to speed up retrieval of messages, in commit d6629859b36d ("ipc/mqueue: improve performance of send/recv"). That patch introducing the improved kernel handling of message queues (using btrees) has, as a by-product, changed the meaning of the QSIZE field in the pseudo-file created for the queue. Before, this field reflected the size of the user-data in the queue. Since, it also takes kernel data structures into account. For example, if 13 bytes of user data are in the queue, on my machine the file reports a size of 61 bytes. There was some discussion on this topic before (for example https://lkml.org/lkml/2014/10/1/115). Commenting on a th lkml, Michael Kerrisk gave the following background (https://lkml.org/lkml/2015/6/16/74): The pseudofiles in the mqueue filesystem (usually mounted at /dev/mqueue) expose fields with metadata describing a message queue. One of these fields, QSIZE, as originally implemented, showed the total number of bytes of user data in all messages in the message queue, and this feature was documented from the beginning in the mq_overview(7) page. In 3.5, some other (useful) work happened to break the user-space API in a couple of places, including the value exposed via QSIZE, which now includes a measure of kernel overhead bytes for the queue, a figure that renders QSIZE useless for its original purpose, since there's no way to deduce the number of overhead bytes consumed by the implementation. (The other user-space breakage was subsequently fixed.) This patch removes the accounting of kernel data structures in the queue. Reporting the size of these data-structures in the QSIZE field was a breaking change (see Michael's comment above). Without the QSIZE field reporting the total size of user-data in the queue, there is no way to deduce this number. It should be noted that the resource limit RLIMIT_MSGQUEUE is counted against the worst-case size of the queue (in both the old and the new implementation). Therefore, the kernel overhead accounting in QSIZE is not necessary to help the user understand the limitations RLIMIT imposes on the processes. Signed-off-by: Marcus Gelderie Acked-by: Doug Ledford Acked-by: Michael Kerrisk Acked-by: Davidlohr Bueso Cc: David Howells Cc: Alexander Viro Cc: John Duffy Cc: Arto Bendiken Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/mqueue.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3aaea7ffd077c..c3fc5c2b63f34 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -143,7 +143,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) if (!leaf) return -ENOMEM; INIT_LIST_HEAD(&leaf->msg_list); - info->qsize += sizeof(*leaf); } leaf->priority = msg->m_type; rb_link_node(&leaf->rb_node, parent, p); @@ -188,7 +187,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) "lazy leaf delete!\n"); rb_erase(&leaf->rb_node, &info->msg_tree); if (info->node_cache) { - info->qsize -= sizeof(*leaf); kfree(leaf); } else { info->node_cache = leaf; @@ -201,7 +199,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) if (list_empty(&leaf->msg_list)) { rb_erase(&leaf->rb_node, &info->msg_tree); if (info->node_cache) { - info->qsize -= sizeof(*leaf); kfree(leaf); } else { info->node_cache = leaf; @@ -1026,7 +1023,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, /* Save our speculative allocation into the cache */ INIT_LIST_HEAD(&new_leaf->msg_list); info->node_cache = new_leaf; - info->qsize += sizeof(*new_leaf); new_leaf = NULL; } else { kfree(new_leaf); @@ -1133,7 +1129,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, /* Save our speculative allocation into the cache */ INIT_LIST_HEAD(&new_leaf->msg_list); info->node_cache = new_leaf; - info->qsize += sizeof(*new_leaf); } else { kfree(new_leaf); } From c42af788a7e6a33ff8d19b964d9e91df204fde9b Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 6 Aug 2015 15:46:23 -0700 Subject: [PATCH 0596/2091] ocfs2: fix BUG in ocfs2_downconvert_thread_do_work() commit 209f7512d007980fd111a74a064d70a3656079cf upstream. The "BUG_ON(list_empty(&osb->blocked_lock_list))" in ocfs2_downconvert_thread_do_work can be triggered in the following case: ocfs2dc has firstly saved osb->blocked_lock_count to local varibale processed, and then processes the dentry lockres. During the dentry put, it calls iput and then deletes rw, inode and open lockres from blocked list in ocfs2_mark_lockres_freeing. And this causes the variable `processed' to not reflect the number of blocked lockres to be processed, which triggers the BUG. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8b23aa2f52dda..23157e40dd740 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) osb->dc_work_sequence = osb->dc_wake_sequence; processed = osb->blocked_lock_count; - while (processed) { - BUG_ON(list_empty(&osb->blocked_lock_list)); - + /* + * blocked lock processing in this loop might call iput which can + * remove items off osb->blocked_lock_list. Downconvert up to + * 'processed' number of locks, but stop short if we had some + * removed in ocfs2_mark_lockres_freeing when downconverting. + */ + while (processed && !list_empty(&osb->blocked_lock_list)) { lockres = list_entry(osb->blocked_lock_list.next, struct ocfs2_lock_res, l_blocked_list); list_del_init(&lockres->l_blocked_list); From 4b6bf170e83e666974ce00d7251dd0d4b5ca5439 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 6 Aug 2015 15:46:48 -0700 Subject: [PATCH 0597/2091] ocfs2: fix shift left overflow commit 32e5a2a2be6b085febaac36efff495ad65a55e6c upstream. When using a large volume, for example 9T volume with 2T already used, frequent creation of small files with O_DIRECT when the IO is not cluster aligned may clear sectors in the wrong place. This will cause filesystem corruption. This is because p_cpos is a u32. When calculating the corresponding sector it should be converted to u64 first, otherwise it may overflow. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f906a250da6ad..9ea70127074d1 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -686,7 +686,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { u64 s = i_size_read(inode); - sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) + (do_div(s, osb->s_clustersize) >> 9); ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, @@ -911,7 +911,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); ret = blkdev_issue_zeroout(osb->sb->s_bdev, - p_cpos << (osb->s_clustersize_bits - 9), + (u64)p_cpos << (osb->s_clustersize_bits - 9), zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); From c7e6f05156402364f34669e0fa6fd69b834f994b Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 7 Jul 2015 10:16:37 +0800 Subject: [PATCH 0598/2091] nfsd: Drop BUG_ON and ignore SECLABEL on absent filesystem commit c2227a39a078473115910512aa0f8d53bd915e60 upstream. On an absent filesystem (one served by another server), we need to be able to handle requests for certain attributest (like fs_locations, so the client can find out which server does have the filesystem), but others we can't. We forgot to take that into account when adding another attribute bitmask work for the SECURITY_LABEL attribute. There an export entry with the "refer" option can result in: [ 88.414272] kernel BUG at fs/nfsd/nfs4xdr.c:2249! [ 88.414828] invalid opcode: 0000 [#1] SMP [ 88.415368] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache nfsd xfs libcrc32c iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi iosf_mbi ppdev btrfs coretemp crct10dif_pclmul crc32_pclmul crc32c_intel xor ghash_clmulni_intel raid6_pq vmw_balloon parport_pc parport i2c_piix4 shpchp vmw_vmci acpi_cpufreq auth_rpcgss nfs_acl lockd grace sunrpc vmwgfx drm_kms_helper ttm drm mptspi mptscsih serio_raw mptbase e1000 scsi_transport_spi ata_generic pata_acpi [last unloaded: nfsd] [ 88.417827] CPU: 0 PID: 2116 Comm: nfsd Not tainted 4.0.7-300.fc22.x86_64 #1 [ 88.418448] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/20/2014 [ 88.419093] task: ffff880079146d50 ti: ffff8800785d8000 task.ti: ffff8800785d8000 [ 88.419729] RIP: 0010:[] [] nfsd4_encode_fattr+0x820/0x1f00 [nfsd] [ 88.420376] RSP: 0000:ffff8800785db998 EFLAGS: 00010206 [ 88.421027] RAX: 0000000000000001 RBX: 000000000018091a RCX: ffff88006668b980 [ 88.421676] RDX: 00000000fffef7fc RSI: 0000000000000000 RDI: ffff880078d05000 [ 88.422315] RBP: ffff8800785dbb58 R08: ffff880078d043f8 R09: ffff880078d4a000 [ 88.422968] R10: 0000000000010000 R11: 0000000000000002 R12: 0000000000b0a23a [ 88.423612] R13: ffff880078d05000 R14: ffff880078683100 R15: ffff88006668b980 [ 88.424295] FS: 0000000000000000(0000) GS:ffff88007c600000(0000) knlGS:0000000000000000 [ 88.424944] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 88.425597] CR2: 00007f40bc370f90 CR3: 0000000035af5000 CR4: 00000000001407f0 [ 88.426285] Stack: [ 88.426921] ffff8800785dbaa8 ffffffffa049e4af ffff8800785dba08 ffffffff813298f0 [ 88.427585] ffff880078683300 ffff8800769b0de8 0000089d00000001 0000000087f805e0 [ 88.428228] ffff880000000000 ffff880079434a00 0000000000000000 ffff88006668b980 [ 88.428877] Call Trace: [ 88.429527] [] ? exp_get_by_name+0x7f/0xb0 [nfsd] [ 88.430168] [] ? inode_doinit_with_dentry+0x210/0x6a0 [ 88.430807] [] ? d_lookup+0x2e/0x60 [ 88.431449] [] ? dput+0x33/0x230 [ 88.432097] [] ? mntput+0x24/0x40 [ 88.432719] [] ? path_put+0x22/0x30 [ 88.433340] [] ? nfsd_cross_mnt+0xb7/0x1c0 [nfsd] [ 88.433954] [] nfsd4_encode_dirent+0x1b0/0x3d0 [nfsd] [ 88.434601] [] ? nfsd4_encode_getattr+0x40/0x40 [nfsd] [ 88.435172] [] nfsd_readdir+0x1c1/0x2a0 [nfsd] [ 88.435710] [] ? nfsd_direct_splice_actor+0x20/0x20 [nfsd] [ 88.436447] [] nfsd4_encode_readdir+0x120/0x220 [nfsd] [ 88.437011] [] nfsd4_encode_operation+0x7d/0x190 [nfsd] [ 88.437566] [] nfsd4_proc_compound+0x24d/0x6f0 [nfsd] [ 88.438157] [] nfsd_dispatch+0xc3/0x220 [nfsd] [ 88.438680] [] svc_process_common+0x43b/0x690 [sunrpc] [ 88.439192] [] svc_process+0x103/0x1b0 [sunrpc] [ 88.439694] [] nfsd+0x117/0x190 [nfsd] [ 88.440194] [] ? nfsd_destroy+0x90/0x90 [nfsd] [ 88.440697] [] kthread+0xd8/0xf0 [ 88.441260] [] ? kthread_worker_fn+0x180/0x180 [ 88.441762] [] ret_from_fork+0x58/0x90 [ 88.442322] [] ? kthread_worker_fn+0x180/0x180 [ 88.442879] Code: 0f 84 93 05 00 00 83 f8 ea c7 85 a0 fe ff ff 00 00 27 30 0f 84 ba fe ff ff 85 c0 0f 85 a5 fe ff ff e9 e3 f9 ff ff 0f 1f 44 00 00 <0f> 0b 66 0f 1f 44 00 00 be 04 00 00 00 4c 89 ef 4c 89 8d 68 fe [ 88.444052] RIP [] nfsd4_encode_fattr+0x820/0x1f00 [nfsd] [ 88.444658] RSP [ 88.445232] ---[ end trace 6cb9d0487d94a29f ]--- Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 158badf945df1..d4d84451e0e63 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2142,6 +2142,7 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#define WORD2_ABSENT_FS_ATTRS 0 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 @@ -2170,7 +2171,7 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, { return 0; } #endif -static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) +static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 *rdattr_err) { /* As per referral draft: */ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || @@ -2183,6 +2184,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) } *bmval0 &= WORD0_ABSENT_FS_ATTRS; *bmval1 &= WORD1_ABSENT_FS_ATTRS; + *bmval2 &= WORD2_ABSENT_FS_ATTRS; return 0; } @@ -2246,8 +2248,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); if (exp->ex_fslocs.migrated) { - BUG_ON(bmval[2]); - status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); + status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err); if (status) goto out; } @@ -2290,8 +2291,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || - bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || + bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { err = security_inode_getsecctx(d_inode(dentry), &context, &contextlen); contextsupport = (err == 0); From be424ace3767c8b632e88d110341e2a9fba8c9bb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 14 Jul 2015 18:27:46 -0500 Subject: [PATCH 0599/2091] PCI: Restore PCI_MSIX_FLAGS_BIRMASK definition commit c9ddbac9c89110f77cb0fa07e634aaf1194899aa upstream. 09a2c73ddfc7 ("PCI: Remove unused PCI_MSIX_FLAGS_BIRMASK definition") removed PCI_MSIX_FLAGS_BIRMASK from an exported header because it was unused in the kernel. But that breaks user programs that were using it (QEMU in particular). Restore the PCI_MSIX_FLAGS_BIRMASK definition. [bhelgaas: changelog] Signed-off-by: Michael S. Tsirkin Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index efe3443572baa..413417f3707bb 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -319,6 +319,7 @@ #define PCI_MSIX_PBA 8 /* Pending Bit Array offset */ #define PCI_MSIX_PBA_BIR 0x00000007 /* BAR index */ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_FLAGS_BIRMASK PCI_MSIX_PBA_BIR /* deprecated */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ /* MSI-X Table entry format */ From 24a9fa79d5b9f3cf555c24336f56c0fcbbdd98bf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Jul 2015 11:48:52 +1000 Subject: [PATCH 0600/2091] md/raid1: extend spinlock to protect raid1_end_read_request against inconsistencies commit 423f04d63cf421ea436bcc5be02543d549ce4b28 upstream. raid1_end_read_request() assumes that the In_sync bits are consistent with the ->degaded count. raid1_spare_active updates the In_sync bit before the ->degraded count and so exposes an inconsistency, as does error() So extend the spinlock in raid1_spare_active() and error() to hide those inconsistencies. This should probably be part of Commit: 34cab6f42003 ("md/raid1: fix test for 'was read error from last working device'.") as it addresses the same issue. It fixes the same bug and should go to -stable for same reasons. Fixes: 76073054c95b ("md/raid1: clean up read_balance.") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cd7b0c1e882d7..5ce3cd5c4e1d9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1475,6 +1475,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; struct r1conf *conf = mddev->private; + unsigned long flags; /* * If it is not operational, then we have already marked it as dead @@ -1494,14 +1495,13 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) return; } set_bit(Blocked, &rdev->flags); + spin_lock_irqsave(&conf->device_lock, flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; set_bit(Faulty, &rdev->flags); - spin_unlock_irqrestore(&conf->device_lock, flags); } else set_bit(Faulty, &rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ @@ -1567,7 +1567,10 @@ static int raid1_spare_active(struct mddev *mddev) * Find all failed disks within the RAID1 configuration * and mark them readable. * Called under mddev lock, so rcu protection not needed. + * device_lock used to avoid races with raid1_end_read_request + * which expects 'In_sync' flags and ->degraded to be consistent. */ + spin_lock_irqsave(&conf->device_lock, flags); for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; @@ -1598,7 +1601,6 @@ static int raid1_spare_active(struct mddev *mddev) sysfs_notify_dirent_safe(rdev->sysfs_state); } } - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded -= count; spin_unlock_irqrestore(&conf->device_lock, flags); From 8ce686edc53a87b40c53597f03dc9c72877518a4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 3 Aug 2015 09:54:58 -0400 Subject: [PATCH 0601/2091] dm: fix dm_merge_bvec regression on 32 bit systems commit bd4aaf8f9b85d6b2df3231fd62b219ebb75d3568 upstream. A DM regression on 32 bit systems was reported against v4.2-rc3 here: https://lkml.org/lkml/2015/7/29/401 Fix this by reverting both commit 1c220c69 ("dm: fix casting bug in dm_merge_bvec()") and 148e51ba ("dm: improve documentation and code clarity in dm_merge_bvec"). This combined revert is done to eliminate the possibility of a partial revert in stable@ kernels. In hindsight the correct fix, at the time 1c220c69 was applied to fix the regression that 148e51ba introduced, should've been to simply revert 148e51ba. Reported-by: Josh Boyer Tested-by: Adam Williamson Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e8d84566f3118..697f34fba06b7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1719,7 +1719,8 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors, max_size = 0; + sector_t max_sectors; + int max_size = 0; if (unlikely(!map)) goto out; @@ -1732,18 +1733,10 @@ static int dm_merge_bvec(struct request_queue *q, * Find maximum amount of I/O that won't need splitting */ max_sectors = min(max_io_len(bvm->bi_sector, ti), - (sector_t) queue_max_sectors(q)); + (sector_t) BIO_MAX_SECTORS); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - - /* - * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t - * to the targets' merge function since it holds sectors not bytes). - * Just doing this as an interim fix for stable@ because the more - * comprehensive cleanup of switching to sector_t will impact every - * DM target that implements a ->merge hook. - */ - if (max_size > INT_MAX) - max_size = INT_MAX; + if (max_size < 0) + max_size = 0; /* * merge_bvec_fn() returns number of bytes @@ -1751,13 +1744,13 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); + max_size = ti->type->merge(ti, bvm, biovec, max_size); /* * If the target doesn't support merge method and some of the devices - * provided their merge_bvec method (we know this by looking for the - * max_hw_sectors that dm_set_device_limits may set), then we can't - * allow bios with multiple vector entries. So always set max_size - * to 0, and the code below allows just one page. + * provided their merge_bvec method (we know this by looking at + * queue_max_hw_sectors), then we can't allow bios with multiple vector + * entries. So always set max_size to 0, and the code below allows + * just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) max_size = 0; From 18d08fe123ad241d9cc9c3ab0562a607bb3189ef Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sun, 2 Aug 2015 12:34:46 +0100 Subject: [PATCH 0602/2091] staging: vt6655: vnt_bss_info_changed check conf->beacon_rate is not NULL commit 1f17124006b65482d9084c01e252b59dbca8db8f upstream. conf->beacon_rate can be NULL on association. So check conf->beacon_rate BSS_CHANGED_BEACON_INFO needs to flagged in changed as the beacon_rate will appear later. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 15baacb126ad1..376e4a0c15c66 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1486,8 +1486,9 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, } } - if (changed & BSS_CHANGED_ASSOC && priv->op_mode != NL80211_IFTYPE_AP) { - if (conf->assoc) { + if (changed & (BSS_CHANGED_ASSOC | BSS_CHANGED_BEACON_INFO) && + priv->op_mode != NL80211_IFTYPE_AP) { + if (conf->assoc && conf->beacon_rate) { CARDbUpdateTSF(priv, conf->beacon_rate->hw_value, conf->sync_tsf); From 7f49ff48592f08bc2419c0b87103c6e62bb5385a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 1 Aug 2015 07:01:24 -0700 Subject: [PATCH 0603/2091] staging: lustre: Include unaligned.h instead of access_ok.h commit fb1de5a4c825a389f054cc3803e06116d2fbdc7e upstream. Including access_ok.h causes the ia64:allmodconfig build (and maybe others) to fail with include/linux/unaligned/le_struct.h:6:19: error: redefinition of 'get_unaligned_le16' include/linux/unaligned/access_ok.h:7:19: note: previous definition of 'get_unaligned_le16' was here include/linux/unaligned/le_struct.h:26:20: error: redefinition of 'put_unaligned_le32' include/linux/unaligned/access_ok.h:42:20: note: previous definition of 'put_unaligned_le32' was here include/linux/unaligned/le_struct.h:31:20: error: redefinition of 'put_unaligned_le64' include/linux/unaligned/access_ok.h:47:20: note: previous definition of 'put_unaligned_le64' was here Include unaligned.h instead and leave it up to the architecture to decide how to implement unaligned accesses. Fixes: 8c4f136497315 ("Staging: lustre: Use put_unaligned_le64") Cc: Vaishali Thakkar Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/obdclass/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c index 9c934e6d2ea11..c61add46b4268 100644 --- a/drivers/staging/lustre/lustre/obdclass/debug.c +++ b/drivers/staging/lustre/lustre/obdclass/debug.c @@ -40,7 +40,7 @@ #define DEBUG_SUBSYSTEM D_OTHER -#include +#include #include "../include/obd_support.h" #include "../include/lustre_debug.h" From 49b38223eaf3947542f18fec68c03ae6588c4abb Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 27 Jul 2015 14:51:47 +0800 Subject: [PATCH 0604/2091] usb: gadget: f_uac2: fix calculation of uac2->p_interval commit c41b7767673cb76adeb2b5fde220209f717ea13c upstream. The p_interval should be less if the 'bInterval' at the descriptor is larger, eg, if 'bInterval' is 5 for HS, the p_interval should be 8000 / 16 = 500. It fixes the patch 9bb87f168931 ("usb: gadget: f_uac2: send reasonably sized packets") Fixes: 9bb87f168931 ("usb: gadget: f_uac2: send reasonably sized packets") Acked-by: Daniel Mack Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 6d3eb8b00a488..5318615472533 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -1162,14 +1162,14 @@ afunc_set_alt(struct usb_function *fn, unsigned intf, unsigned alt) factor = 1000; } else { ep_desc = &hs_epin_desc; - factor = 125; + factor = 8000; } /* pre-compute some values for iso_complete() */ uac2->p_framesize = opts->p_ssize * num_channels(opts->p_chmask); rate = opts->p_srate * uac2->p_framesize; - uac2->p_interval = (1 << (ep_desc->bInterval - 1)) * factor; + uac2->p_interval = factor / (1 << (ep_desc->bInterval - 1)); uac2->p_pktsize = min_t(unsigned int, rate / uac2->p_interval, prm->max_psize); From 8dc811bbac76eb7433d9731de2e7d0f91e0f9bc2 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Tue, 14 Jul 2015 22:55:06 +0200 Subject: [PATCH 0605/2091] USB: qcserial/option: make AT URCs work for Sierra Wireless MC7305/MC7355 commit 653cdc13a340ad1cef29f1bab0d05d0771fa1d57 upstream. Tests with a Sierra Wireless MC7355 have shown that 1199:9041 devices also require the option_send_setup() code to be used on the USB interface for the AT port to make unsolicited response codes work correctly. Move these devices from the qcserial driver to the option driver like it has been done for the 1199:68c0 devices in commit d80c0d14183516f184a5ac88e11008ee4c7d2a2e ("USB: qcserial/option: make AT URCs work for Sierra Wireless MC73xx"). Signed-off-by: Reinhard Speyerer Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ drivers/usb/serial/qcserial.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 19b85ee98a724..876423b8892c9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1099,6 +1099,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ { USB_DEVICE_INTERFACE_CLASS(SIERRA_VENDOR_ID, 0x68c0, 0xff), .driver_info = (kernel_ulong_t)&sierra_mc73xx_blacklist }, /* MC73xx */ + { USB_DEVICE_INTERFACE_CLASS(SIERRA_VENDOR_ID, 0x9041, 0xff), + .driver_info = (kernel_ulong_t)&sierra_mc73xx_blacklist }, /* MC7305/MC7355 */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 9c63897b3a564..552dc9a7f523b 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -145,7 +145,6 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x901c)}, /* Sierra Wireless EM7700 */ {DEVICE_SWI(0x1199, 0x901f)}, /* Sierra Wireless EM7355 */ {DEVICE_SWI(0x1199, 0x9040)}, /* Sierra Wireless Modem */ - {DEVICE_SWI(0x1199, 0x9041)}, /* Sierra Wireless MC7305/MC7355 */ {DEVICE_SWI(0x1199, 0x9051)}, /* Netgear AirCard 340U */ {DEVICE_SWI(0x1199, 0x9053)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9054)}, /* Sierra Wireless Modem */ From a1e6e21f9ed08a436290af81bc3e8b51c8b2cfff Mon Sep 17 00:00:00 2001 From: Pieter Hollants Date: Mon, 20 Jul 2015 11:56:17 +0200 Subject: [PATCH 0606/2091] USB: qcserial: Add support for Dell Wireless 5809e 4G Modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6da3700c98cdc8360f55c5510915efae1d66deea upstream. Added the USB IDs 0x413c:0x81b1 for the "Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card", a Dell-branded Sierra Wireless EM7305 LTE card in M.2 form factor, used eg. in Dell's Latitude E7540 Notebook series. "lsusb -v" output for this device: Bus 002 Device 003: ID 413c:81b1 Dell Computer Corp. Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x413c Dell Computer Corp. idProduct 0x81b1 bcdDevice 0.06 iManufacturer 1 Sierra Wireless, Incorporated iProduct 2 Dell Wireless 5809e Gobi™ 4G LTE Mobile Broadband Card iSerial 3 bNumConfigurations 2 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 204 bNumInterfaces 4 bConfigurationValue 1 iConfiguration 0 bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 ** UNRECOGNIZED: 05 24 00 10 01 ** UNRECOGNIZED: 05 24 01 00 00 ** UNRECOGNIZED: 04 24 02 02 ** UNRECOGNIZED: 05 24 06 00 00 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x000c 1x 12 bytes bInterval 9 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 ** UNRECOGNIZED: 05 24 00 10 01 ** UNRECOGNIZED: 05 24 01 00 00 ** UNRECOGNIZED: 04 24 02 02 ** UNRECOGNIZED: 05 24 06 00 00 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x000c 1x 12 bytes bInterval 9 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 8 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x000a 1x 10 bytes bInterval 9 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 ** UNRECOGNIZED: 2c ff 42 49 53 54 00 01 07 f5 40 f6 00 00 00 00 01 f7 c4 09 02 f8 c4 09 03 f9 88 13 04 fa 10 27 05 fb 10 27 06 fc c4 09 07 fd c4 09 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 95 bNumInterfaces 2 bConfigurationValue 2 iConfiguration 0 bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 500mA Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 12 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 14 bFunctionProtocol 0 iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 12 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 14 bInterfaceProtocol 0 iInterface 0 CDC Header: bcdCDC 1.10 CDC Union: bMasterInterface 12 bSlaveInterface 13 CDC MBIM: bcdMBIMVersion 1.00 wMaxControlMessage 4096 bNumberFilters 32 bMaxFilterSize 128 wMaxSegmentSize 1500 bmNetworkCapabilities 0x20 8-byte ntb input size CDC MBIM Extended: bcdMBIMExtendedVersion 1.00 bMaxOutstandingCommandMessages 64 wMTU 1500 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 9 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 13 bAlternateSetting 0 bNumEndpoints 0 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 bInterfaceProtocol 2 iInterface 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 13 bAlternateSetting 1 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 bInterfaceProtocol 2 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 bNumConfigurations 2 Device Status: 0x0000 (Bus Powered) Signed-off-by: Pieter Hollants Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 552dc9a7f523b..d156545728c2a 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -157,6 +157,7 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ From be9a404609b44c0fd35b40491d387ee5646da2d1 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 26 Jun 2015 19:43:58 -0500 Subject: [PATCH 0607/2091] mtd: nand: Fix NAND_USE_BOUNCE_BUFFER flag conflict commit 5f867db63473f32cce1b868e281ebd42a41f8fad upstream. Commit 66507c7bc8895f0da6b ("mtd: nand: Add support to use nand_base poi databuf as bounce buffer") added a flag NAND_USE_BOUNCE_BUFFER using the same bit value as the existing NAND_BUSWIDTH_AUTO. Cc: Kamal Dasu Fixes: 66507c7bc8895f0da6b ("mtd: nand: Add support to use nand_base poi databuf as bounce buffer") Signed-off-by: Scott Wood Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/nand.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3d4ea7eb2b68b..12b75f3ba0a0c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -175,11 +175,6 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 -/* - * This option could be defined by controller drivers to protect against - * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers - */ -#define NAND_USE_BOUNCE_BUFFER 0x00080000 /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode @@ -187,6 +182,11 @@ typedef enum { * before calling nand_scan_tail. */ #define NAND_BUSWIDTH_AUTO 0x00080000 +/* + * This option could be defined by controller drivers to protect against + * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers + */ +#define NAND_USE_BOUNCE_BUFFER 0x00100000 /* Options set by nand scan */ /* Nand scan has allocated controller struct */ From 1770acb5356360a373c58835a8d1e786506a3f76 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 3 Aug 2015 14:06:24 -0700 Subject: [PATCH 0608/2091] Input: alps - only Dell laptops have separate button bits for v2 dualpoint sticks commit 073e570d7c2caae9910a993d56f340be4548a4a8 upstream. It turns out that only Dell laptops have the separate button bits for v2 dualpoint sticks and that commit 92bac83dd79e ("Input: alps - non interleaved V2 dualpoint has separate stick button bits") causes regressions on Toshiba laptops. This commit adds a check for Dell laptops to the code for handling these extra button bits, fixing this regression. This patch has been tested on a Dell Latitude D620 to make sure that it does not reintroduce the original problem. Reported-and-tested-by: Douglas Christman Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- Documentation/input/alps.txt | 6 ++++-- drivers/input/mouse/alps.c | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt index c86f2f1ae4f6a..1fec1135791d9 100644 --- a/Documentation/input/alps.txt +++ b/Documentation/input/alps.txt @@ -119,8 +119,10 @@ ALPS Absolute Mode - Protocol Version 2 byte 5: 0 z6 z5 z4 z3 z2 z1 z0 Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for -the DualPoint Stick. For non interleaved dualpoint devices the pointingstick -buttons get reported separately in the PSM, PSR and PSL bits. +the DualPoint Stick. The M, R and L bits signal the combined status of both +the pointingstick and touchpad buttons, except for Dell dualpoint devices +where the pointingstick buttons get reported separately in the PSM, PSR +and PSL bits. Dualpoint device -- interleaved packet format --------------------------------------------- diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index a353b7de6d22e..bc7eed67998aa 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "psmouse.h" #include "alps.h" @@ -99,6 +100,7 @@ static const struct alps_nibble_commands alps_v6_nibble_commands[] = { #define ALPS_FOUR_BUTTONS 0x40 /* 4 direction button present */ #define ALPS_PS2_INTERLEAVED 0x80 /* 3-byte PS/2 packet interleaved with 6-byte ALPS packet */ +#define ALPS_DELL 0x100 /* device is a Dell laptop */ #define ALPS_BUTTONPAD 0x200 /* device is a clickpad */ static const struct alps_model_info alps_model_data[] = { @@ -251,9 +253,9 @@ static void alps_process_packet_v1_v2(struct psmouse *psmouse) return; } - /* Non interleaved V2 dualpoint has separate stick button bits */ + /* Dell non interleaved V2 dualpoint has separate stick button bits */ if (priv->proto_version == ALPS_PROTO_V2 && - priv->flags == (ALPS_PASS | ALPS_DUALPOINT)) { + priv->flags == (ALPS_DELL | ALPS_PASS | ALPS_DUALPOINT)) { left |= packet[0] & 1; right |= packet[0] & 2; middle |= packet[0] & 4; @@ -2542,6 +2544,8 @@ static int alps_set_protocol(struct psmouse *psmouse, priv->byte0 = protocol->byte0; priv->mask0 = protocol->mask0; priv->flags = protocol->flags; + if (dmi_name_in_vendors("Dell")) + priv->flags |= ALPS_DELL; priv->x_max = 2000; priv->y_max = 1400; From 00d707ae64f062c2c4b488c76306278a1298557a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 8 Jun 2015 10:35:49 +0900 Subject: [PATCH 0609/2091] thermal: exynos: Disable the regulator on probe failure commit 5f09a5cbd14ae16e93866040fa44d930ff885650 upstream. During probe the regulator (if present) was enabled but not disabled in case of failure. So an unsuccessful probe lead to enabling the regulator which was actually not needed because the device was not enabled. Additionally each deferred probe lead to increase of regulator enable count so it would not be effectively disabled during removal of the device. Test HW: Exynos4412 - Trats2 board Signed-off-by: Krzysztof Kozlowski Fixes: 498d22f616f6 ("thermal: exynos: Support for TMU regulator defined at device tree") Reviewed-by: Javier Martinez Canillas Signed-off-by: Lukasz Majewski Tested-by: Lukasz Majewski Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/samsung/exynos_tmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 1d30b09756515..67098a8a7a021 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -1209,6 +1209,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) if (!IS_ERR(data->clk_sec)) clk_unprepare(data->clk_sec); err_sensor: + if (!IS_ERR_OR_NULL(data->regulator)) + regulator_disable(data->regulator); thermal_zone_of_sensor_unregister(&pdev->dev, data->tzd); return ret; From 7f488aad4f9dbaced8a4529817a7404658406a96 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 4 Aug 2015 14:36:58 -0700 Subject: [PATCH 0610/2091] mm, vmscan: Do not wait for page writeback for GFP_NOFS allocations commit ecf5fc6e9654cd7a268c782a523f072b2f1959f9 upstream. Nikolay has reported a hang when a memcg reclaim got stuck with the following backtrace: PID: 18308 TASK: ffff883d7c9b0a30 CPU: 1 COMMAND: "rsync" #0 __schedule at ffffffff815ab152 #1 schedule at ffffffff815ab76e #2 schedule_timeout at ffffffff815ae5e5 #3 io_schedule_timeout at ffffffff815aad6a #4 bit_wait_io at ffffffff815abfc6 #5 __wait_on_bit at ffffffff815abda5 #6 wait_on_page_bit at ffffffff8111fd4f #7 shrink_page_list at ffffffff81135445 #8 shrink_inactive_list at ffffffff81135845 #9 shrink_lruvec at ffffffff81135ead #10 shrink_zone at ffffffff811360c3 #11 shrink_zones at ffffffff81136eff #12 do_try_to_free_pages at ffffffff8113712f #13 try_to_free_mem_cgroup_pages at ffffffff811372be #14 try_charge at ffffffff81189423 #15 mem_cgroup_try_charge at ffffffff8118c6f5 #16 __add_to_page_cache_locked at ffffffff8112137d #17 add_to_page_cache_lru at ffffffff81121618 #18 pagecache_get_page at ffffffff8112170b #19 grow_dev_page at ffffffff811c8297 #20 __getblk_slow at ffffffff811c91d6 #21 __getblk_gfp at ffffffff811c92c1 #22 ext4_ext_grow_indepth at ffffffff8124565c #23 ext4_ext_create_new_leaf at ffffffff81246ca8 #24 ext4_ext_insert_extent at ffffffff81246f09 #25 ext4_ext_map_blocks at ffffffff8124a848 #26 ext4_map_blocks at ffffffff8121a5b7 #27 mpage_map_one_extent at ffffffff8121b1fa #28 mpage_map_and_submit_extent at ffffffff8121f07b #29 ext4_writepages at ffffffff8121f6d5 #30 do_writepages at ffffffff8112c490 #31 __filemap_fdatawrite_range at ffffffff81120199 #32 filemap_flush at ffffffff8112041c #33 ext4_alloc_da_blocks at ffffffff81219da1 #34 ext4_rename at ffffffff81229b91 #35 ext4_rename2 at ffffffff81229e32 #36 vfs_rename at ffffffff811a08a5 #37 SYSC_renameat2 at ffffffff811a3ffc #38 sys_renameat2 at ffffffff811a408e #39 sys_rename at ffffffff8119e51e #40 system_call_fastpath at ffffffff815afa89 Dave Chinner has properly pointed out that this is a deadlock in the reclaim code because ext4 doesn't submit pages which are marked by PG_writeback right away. The heuristic was introduced by commit e62e384e9da8 ("memcg: prevent OOM with too many dirty pages") and it was applied only when may_enter_fs was specified. The code has been changed by c3b94f44fcb0 ("memcg: further prevent OOM with too many dirty pages") which has removed the __GFP_FS restriction with a reasoning that we do not get into the fs code. But this is not sufficient apparently because the fs doesn't necessarily submit pages marked PG_writeback for IO right away. ext4_bio_write_page calls io_submit_add_bh but that doesn't necessarily submit the bio. Instead it tries to map more pages into the bio and mpage_map_one_extent might trigger memcg charge which might end up waiting on a page which is marked PG_writeback but hasn't been submitted yet so we would end up waiting for something that never finishes. Fix this issue by replacing __GFP_IO by may_enter_fs check (for case 2) before we go to wait on the writeback. The page fault path, which is the only path that triggers memcg oom killer since 3.12, shouldn't require GFP_NOFS and so we shouldn't reintroduce the premature OOM killer issue which was originally addressed by the heuristic. As per David Chinner the xfs is doing similar thing since 2.6.15 already so ext4 is not the only affected filesystem. Moreover he notes: : For example: IO completion might require unwritten extent conversion : which executes filesystem transactions and GFP_NOFS allocations. The : writeback flag on the pages can not be cleared until unwritten : extent conversion completes. Hence memory reclaim cannot wait on : page writeback to complete in GFP_NOFS context because it is not : safe to do so, memcg reclaim or otherwise. Cc: stable@vger.kernel.org # 3.9+ [tytso@mit.edu: corrected the control flow] Fixes: c3b94f44fcb0 ("memcg: further prevent OOM with too many dirty pages") Reported-by: Nikolay Borisov Signed-off-by: Michal Hocko Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 5e8eadd71bac7..0d024fc8aa8ef 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -937,21 +937,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, * * 2) Global reclaim encounters a page, memcg encounters a * page that is not marked for immediate reclaim or - * the caller does not have __GFP_IO. In this case mark + * the caller does not have __GFP_FS (or __GFP_IO if it's + * simply going to swap, not to fs). In this case mark * the page for immediate reclaim and continue scanning. * - * __GFP_IO is checked because a loop driver thread might + * Require may_enter_fs because we would wait on fs, which + * may not have submitted IO yet. And the loop driver might * enter reclaim, and deadlock if it waits on a page for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * - * Don't require __GFP_FS, since we're not going into the - * FS, just waiting on its writeback completion. Worryingly, - * ext4 gfs2 and xfs allocate pages with - * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing - * may_enter_fs here is liable to OOM on them. - * * 3) memcg encounters a page that is not already marked * PageReclaim. memcg does not have any dirty pages * throttling so we could easily OOM just because too many @@ -968,7 +964,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Case 2 above */ } else if (global_reclaim(sc) || - !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { + !PageReclaim(page) || !may_enter_fs) { /* * This is slightly racy - end_page_writeback() * might have just cleared PageReclaim, then From 8a97f0e58abc93f67f1f6ca571e4ff758739cedf Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 6 Aug 2015 15:46:33 -0700 Subject: [PATCH 0611/2091] signalfd: fix information leak in signalfd_copyinfo commit 3ead7c52bdb0ab44f4bb1feed505a8323cc12ba7 upstream. This function may copy the si_addr_lsb field to user mode when it hasn't been initialized, which can leak kernel stack data to user mode. Just checking the value of si_code is insufficient because the same si_code value is shared between multiple signals. This is solved by checking the value of si_signo in addition to si_code. Signed-off-by: Amanieu d'Antras Cc: Oleg Nesterov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/signalfd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 7e412ad748363..270221fcef42c 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (kinfo->si_code == BUS_MCEERR_AR || - kinfo->si_code == BUS_MCEERR_AO) + if (kinfo->si_signo == SIGBUS && + (kinfo->si_code == BUS_MCEERR_AR || + kinfo->si_code == BUS_MCEERR_AO)) err |= __put_user((short) kinfo->si_addr_lsb, &uinfo->ssi_addr_lsb); #endif From c08a75d950725cdd87c19232a5a3850c51520359 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 6 Aug 2015 15:46:29 -0700 Subject: [PATCH 0612/2091] signal: fix information leak in copy_siginfo_to_user commit 26135022f85105ad725cda103fa069e29e83bd16 upstream. This function may copy the si_addr_lsb, si_lower and si_upper fields to user mode when they haven't been initialized, which can leak kernel stack data to user mode. Just checking the value of si_code is insufficient because the same si_code value is shared between multiple signals. This is solved by checking the value of si_signo in addition to si_code. Signed-off-by: Amanieu d'Antras Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal32.c | 3 ++- kernel/signal.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d26fcd4cd6e62..56e6ba74628d7 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitely for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif break; diff --git a/kernel/signal.c b/kernel/signal.c index d51c5ddd855c8..ac4c24ea2048d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2753,12 +2753,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif #ifdef SEGV_BNDERR - err |= __put_user(from->si_lower, &to->si_lower); - err |= __put_user(from->si_upper, &to->si_upper); + if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) { + err |= __put_user(from->si_lower, &to->si_lower); + err |= __put_user(from->si_upper, &to->si_upper); + } #endif break; case __SI_CHLD: From 52124831a34a52764a2ce76f1ba0703ecf9d220a Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 6 Aug 2015 15:46:26 -0700 Subject: [PATCH 0613/2091] signal: fix information leak in copy_siginfo_from_user32 commit 3c00cb5e68dc719f2fc73a33b1b230aadfcb1309 upstream. This function can leak kernel stack data when the user siginfo_t has a positive si_code value. The top 16 bits of si_code descibe which fields in the siginfo_t union are active, but they are treated inconsistently between copy_siginfo_from_user32, copy_siginfo_to_user32 and copy_siginfo_to_user. copy_siginfo_from_user32 is called from rt_sigqueueinfo and rt_tgsigqueueinfo in which the user has full control overthe top 16 bits of si_code. This fixes the following information leaks: x86: 8 bytes leaked when sending a signal from a 32-bit process to itself. This leak grows to 16 bytes if the process uses x32. (si_code = __SI_CHLD) x86: 100 bytes leaked when sending a signal from a 32-bit process to a 64-bit process. (si_code = -1) sparc: 4 bytes leaked when sending a signal from a 32-bit process to a 64-bit process. (si_code = any) parsic and s390 have similar bugs, but they are not vulnerable because rt_[tg]sigqueueinfo have checks that prevent sending a positive si_code to a different process. These bugs are also fixed for consistency. Signed-off-by: Amanieu d'Antras Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Russell King Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal32.c | 2 -- arch/mips/kernel/signal32.c | 2 -- arch/powerpc/kernel/signal_32.c | 2 -- arch/tile/kernel/compat_signal.c | 2 -- kernel/signal.c | 4 ++-- 5 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 56e6ba74628d7..c0cff3410166e 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -202,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE)) diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 19a7705f2a015..5d7f2634996fd 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f927..da50e0c9c57e6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index e8c2c04143cda..c667e104a0c25 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) return -EFAULT; - memset(to, 0, sizeof(*to)); - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); diff --git a/kernel/signal.c b/kernel/signal.c index ac4c24ea2048d..0206be728dacb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3025,7 +3025,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; int ret = copy_siginfo_from_user32(&info, uinfo); if (unlikely(ret)) return ret; @@ -3069,7 +3069,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; From f627ab0afcd983b3cb5f6d47c5006fd14cfc9a01 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 30 May 2015 14:31:24 +0200 Subject: [PATCH 0614/2091] kvm: x86: fix kvm_apic_has_events to check for NULL pointer commit ce40cd3fc7fa40a6119e5fe6c0f2bc0eb4541009 upstream. Malicious (or egregiously buggy) userspace can trigger it, but it should never happen in normal operation. Signed-off-by: Paolo Bonzini Signed-off-by: Wang Kai Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9d28383fc1e70..c4ea87eedf8a7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -150,7 +150,7 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { - return vcpu->arch.apic->pending_events; + return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); From 3b5c2aed0e5557c6bc4a305e7627a16a764b4cdb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Jun 2015 16:44:59 +0200 Subject: [PATCH 0615/2091] nfsd: refactor nfs4_preprocess_stateid_op commit a0649b2d3fffb1cde8745568c767f3a55a3462bc upstream. Split out two self contained helpers to make the function more readable. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Cc: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 97 ++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 039f9c8a95e84..ae1df45c7f3ec 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4574,20 +4574,51 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, return nfs_ok; } +static struct file * +nfs4_find_file(struct nfs4_stid *s, int flags) +{ + switch (s->sc_type) { + case NFS4_DELEG_STID: + if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) + return NULL; + return get_file(s->sc_file->fi_deleg_file); + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + if (flags & RD_STATE) + return find_readable_file(s->sc_file); + else + return find_writeable_file(s->sc_file); + break; + } + + return NULL; +} + +static __be32 +nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) +{ + __be32 status; + + status = nfs4_check_fh(fhp, ols); + if (status) + return status; + status = nfsd4_check_openowner_confirmed(ols); + if (status) + return status; + return nfs4_check_openmode(ols, flags); +} + /* -* Checks for stateid operations -*/ + * Checks for stateid operations + */ __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stid *s; - struct nfs4_ol_stateid *stp = NULL; - struct nfs4_delegation *dp = NULL; - struct svc_fh *current_fh = &cstate->current_fh; - struct inode *ino = d_inode(current_fh->fh_dentry); + struct svc_fh *fhp = &cstate->current_fh; + struct inode *ino = d_inode(fhp->fh_dentry); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct file *file = NULL; + struct nfs4_stid *s; __be32 status; if (filpp) @@ -4597,60 +4628,36 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(net, current_fh, stateid, flags); + return check_special_stateids(net, fhp, stateid, flags); status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); if (status) return status; - status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + status = check_stateid_generation(stateid, &s->sc_stateid, + nfsd4_has_session(cstate)); if (status) goto out; + switch (s->sc_type) { case NFS4_DELEG_STID: - dp = delegstateid(s); - status = nfs4_check_delegmode(dp, flags); - if (status) - goto out; - if (filpp) { - file = dp->dl_stid.sc_file->fi_deleg_file; - if (!file) { - WARN_ON_ONCE(1); - status = nfserr_serverfault; - goto out; - } - get_file(file); - } + status = nfs4_check_delegmode(delegstateid(s), flags); break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - stp = openlockstateid(s); - status = nfs4_check_fh(current_fh, stp); - if (status) - goto out; - status = nfsd4_check_openowner_confirmed(stp); - if (status) - goto out; - status = nfs4_check_openmode(stp, flags); - if (status) - goto out; - if (filpp) { - struct nfs4_file *fp = stp->st_stid.sc_file; - - if (flags & RD_STATE) - file = find_readable_file(fp); - else - file = find_writeable_file(fp); - } + status = nfs4_check_olstateid(fhp, openlockstateid(s), flags); break; default: status = nfserr_bad_stateid; - goto out; + break; + } + + if (!status && filpp) { + *filpp = nfs4_find_file(s, flags); + if (!*filpp) + status = nfserr_serverfault; } - status = nfs_ok; - if (file) - *filpp = file; out: nfs4_put_stid(s); return status; From 1ccdd6c6e9a342c2ed4ced38faa67303226a2a6a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Jul 2015 06:57:46 -0400 Subject: [PATCH 0616/2091] nfsd: do nfs4_check_fh in nfs4_check_file instead of nfs4_check_olstateid commit 8fcd461db7c09337b6d2e22d25eb411123f379e3 upstream. Currently, preprocess_stateid_op calls nfs4_check_olstateid which verifies that the open stateid corresponds to the current filehandle in the call by calling nfs4_check_fh. If the stateid is a NFS4_DELEG_STID however, then no such check is done. This could cause incorrect enforcement of permissions, because the nfsd_permission() call in nfs4_check_file uses current the current filehandle, but any subsequent IO operation will use the file descriptor in the stateid. Move the call to nfs4_check_fh into nfs4_check_file instead so that it can be done for all stateid types. Signed-off-by: Jeff Layton [bfields: moved fh check to avoid NULL deref in special stateid case] Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ae1df45c7f3ec..6e13504f736e2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4397,9 +4397,9 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { - if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) + if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -4599,9 +4599,6 @@ nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) { __be32 status; - status = nfs4_check_fh(fhp, ols); - if (status) - return status; status = nfsd4_check_openowner_confirmed(ols); if (status) return status; @@ -4652,6 +4649,9 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, status = nfserr_bad_stateid; break; } + if (status) + goto out; + status = nfs4_check_fh(fhp, s); if (!status && filpp) { *filpp = nfs4_find_file(s, flags); @@ -4761,7 +4761,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) return status; - return nfs4_check_fh(current_fh, stp); + return nfs4_check_fh(current_fh, &stp->st_stid); } /* From 4ff62ca06c0c0b084f585f7a2cfcf832b21d94fc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 16 Aug 2015 20:52:51 -0700 Subject: [PATCH 0617/2091] Linux 4.1.6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 068dd690933d4..838dabcb7f48b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Series 4800 From 7be83cf01024dd5186ae9743a5438904da1cb6ca Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Fri, 14 Aug 2015 15:35:02 -0700 Subject: [PATCH 0618/2091] ipc,sem: fix use after free on IPC_RMID after a task using same semaphore set exits commit 602b8593d2b4138c10e922eeaafe306f6b51817b upstream. The current semaphore code allows a potential use after free: in exit_sem we may free the task's sem_undo_list while there is still another task looping through the same semaphore set and cleaning the sem_undo list at freeary function (the task called IPC_RMID for the same semaphore set). For example, with a test program [1] running which keeps forking a lot of processes (which then do a semop call with SEM_UNDO flag), and with the parent right after removing the semaphore set with IPC_RMID, and a kernel built with CONFIG_SLAB, CONFIG_SLAB_DEBUG and CONFIG_DEBUG_SPINLOCK, you can easily see something like the following in the kernel log: Slab corruption (Not tainted): kmalloc-64 start=ffff88003b45c1c0, len=64 000: 6b 6b 6b 6b 6b 6b 6b 6b 00 6b 6b 6b 6b 6b 6b 6b kkkkkkkk.kkkkkkk 010: ff ff ff ff 6b 6b 6b 6b ff ff ff ff ff ff ff ff ....kkkk........ Prev obj: start=ffff88003b45c180, len=64 000: 00 00 00 00 ad 4e ad de ff ff ff ff 5a 5a 5a 5a .....N......ZZZZ 010: ff ff ff ff ff ff ff ff c0 fb 01 37 00 88 ff ff ...........7.... Next obj: start=ffff88003b45c200, len=64 000: 00 00 00 00 ad 4e ad de ff ff ff ff 5a 5a 5a 5a .....N......ZZZZ 010: ff ff ff ff ff ff ff ff 68 29 a7 3c 00 88 ff ff ........h).<.... BUG: spinlock wrong CPU on CPU#2, test/18028 general protection fault: 0000 [#1] SMP Modules linked in: 8021q mrp garp stp llc nf_conntrack_ipv4 nf_defrag_ipv4 ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc ppdev input_leds joydev parport_pc parport floppy serio_raw virtio_balloon virtio_rng virtio_console virtio_net iosf_mbi crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcspkr qxl ttm drm_kms_helper drm snd_hda_codec_generic i2c_piix4 snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore crc32c_intel virtio_pci virtio_ring virtio pata_acpi ata_generic [last unloaded: speedstep_lib] CPU: 2 PID: 18028 Comm: test Not tainted 4.2.0-rc5+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 RIP: spin_dump+0x53/0xc0 Call Trace: spin_bug+0x30/0x40 do_raw_spin_unlock+0x71/0xa0 _raw_spin_unlock+0xe/0x10 freeary+0x82/0x2a0 ? _raw_spin_lock+0xe/0x10 semctl_down.clone.0+0xce/0x160 ? __do_page_fault+0x19a/0x430 ? __audit_syscall_entry+0xa8/0x100 SyS_semctl+0x236/0x2c0 ? syscall_trace_leave+0xde/0x130 entry_SYSCALL_64_fastpath+0x12/0x71 Code: 8b 80 88 03 00 00 48 8d 88 60 05 00 00 48 c7 c7 a0 2c a4 81 31 c0 65 8b 15 eb 40 f3 7e e8 08 31 68 00 4d 85 e4 44 8b 4b 08 74 5e <45> 8b 84 24 88 03 00 00 49 8d 8c 24 60 05 00 00 8b 53 04 48 89 RIP [] spin_dump+0x53/0xc0 RSP ---[ end trace 783ebb76612867a0 ]--- NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [test:18053] Modules linked in: 8021q mrp garp stp llc nf_conntrack_ipv4 nf_defrag_ipv4 ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc ppdev input_leds joydev parport_pc parport floppy serio_raw virtio_balloon virtio_rng virtio_console virtio_net iosf_mbi crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcspkr qxl ttm drm_kms_helper drm snd_hda_codec_generic i2c_piix4 snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore crc32c_intel virtio_pci virtio_ring virtio pata_acpi ata_generic [last unloaded: speedstep_lib] CPU: 3 PID: 18053 Comm: test Tainted: G D 4.2.0-rc5+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 RIP: native_read_tsc+0x0/0x20 Call Trace: ? delay_tsc+0x40/0x70 __delay+0xf/0x20 do_raw_spin_lock+0x96/0x140 _raw_spin_lock+0xe/0x10 sem_lock_and_putref+0x11/0x70 SYSC_semtimedop+0x7bf/0x960 ? handle_mm_fault+0xbf6/0x1880 ? dequeue_task_fair+0x79/0x4a0 ? __do_page_fault+0x19a/0x430 ? kfree_debugcheck+0x16/0x40 ? __do_page_fault+0x19a/0x430 ? __audit_syscall_entry+0xa8/0x100 ? do_audit_syscall_entry+0x66/0x70 ? syscall_trace_enter_phase1+0x139/0x160 SyS_semtimedop+0xe/0x10 SyS_semop+0x10/0x20 entry_SYSCALL_64_fastpath+0x12/0x71 Code: 47 10 83 e8 01 85 c0 89 47 10 75 08 65 48 89 3d 1f 74 ff 7e c9 c3 0f 1f 44 00 00 55 48 89 e5 e8 87 17 04 00 66 90 c9 c3 0f 1f 00 <55> 48 89 e5 0f 31 89 c1 48 89 d0 48 c1 e0 20 89 c9 48 09 c8 c9 Kernel panic - not syncing: softlockup: hung tasks I wasn't able to trigger any badness on a recent kernel without the proper config debugs enabled, however I have softlockup reports on some kernel versions, in the semaphore code, which are similar as above (the scenario is seen on some servers running IBM DB2 which uses semaphore syscalls). The patch here fixes the race against freeary, by acquiring or waiting on the sem_undo_list lock as necessary (exit_sem can race with freeary, while freeary sets un->semid to -1 and removes the same sem_undo from list_proc or when it removes the last sem_undo). After the patch I'm unable to reproduce the problem using the test case [1]. [1] Test case used below: #include #include #include #include #include #include #include #include #include #define NSEM 1 #define NSET 5 int sid[NSET]; void thread() { struct sembuf op; int s; uid_t pid = getuid(); s = rand() % NSET; op.sem_num = pid % NSEM; op.sem_op = 1; op.sem_flg = SEM_UNDO; semop(sid[s], &op, 1); exit(EXIT_SUCCESS); } void create_set() { int i, j; pid_t p; union { int val; struct semid_ds *buf; unsigned short int *array; struct seminfo *__buf; } un; /* Create and initialize semaphore set */ for (i = 0; i < NSET; i++) { sid[i] = semget(IPC_PRIVATE , NSEM, 0644 | IPC_CREAT); if (sid[i] < 0) { perror("semget"); exit(EXIT_FAILURE); } } un.val = 0; for (i = 0; i < NSET; i++) { for (j = 0; j < NSEM; j++) { if (semctl(sid[i], j, SETVAL, un) < 0) perror("semctl"); } } /* Launch threads that operate on semaphore set */ for (i = 0; i < NSEM * NSET * NSET; i++) { p = fork(); if (p < 0) perror("fork"); if (p == 0) thread(); } /* Free semaphore set */ for (i = 0; i < NSET; i++) { if (semctl(sid[i], NSEM, IPC_RMID)) perror("IPC_RMID"); } /* Wait for forked processes to exit */ while (wait(NULL)) { if (errno == ECHILD) break; }; } int main(int argc, char **argv) { pid_t p; srand(time(NULL)); while (1) { p = fork(); if (p < 0) { perror("fork"); exit(EXIT_FAILURE); } if (p == 0) { create_set(); goto end; } /* Wait for forked processes to exit */ while (wait(NULL)) { if (errno == ECHILD) break; }; } end: return 0; } [akpm@linux-foundation.org: use normal comment layout] Signed-off-by: Herton R. Krzesinski Acked-by: Manfred Spraul Cc: Davidlohr Bueso Cc: Rafael Aquini CC: Aristeu Rozanski Cc: David Jeffery Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- ipc/sem.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index d1a6edd17eba2..7fbe64bae0f30 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2074,17 +2074,28 @@ void exit_sem(struct task_struct *tsk) rcu_read_lock(); un = list_entry_rcu(ulp->list_proc.next, struct sem_undo, list_proc); - if (&un->list_proc == &ulp->list_proc) - semid = -1; - else - semid = un->semid; + if (&un->list_proc == &ulp->list_proc) { + /* + * We must wait for freeary() before freeing this ulp, + * in case we raced with last sem_undo. There is a small + * possibility where we exit while freeary() didn't + * finish unlocking sem_undo_list. + */ + spin_unlock_wait(&ulp->lock); + rcu_read_unlock(); + break; + } + spin_lock(&ulp->lock); + semid = un->semid; + spin_unlock(&ulp->lock); + /* exit_sem raced with IPC_RMID, nothing to do */ if (semid == -1) { rcu_read_unlock(); - break; + continue; } - sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); + sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) { rcu_read_unlock(); From b6805da60f0196d8703963ef0d476001bcea12a4 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Fri, 14 Aug 2015 15:35:10 -0700 Subject: [PATCH 0619/2091] ipc/sem.c: update/correct memory barriers commit 3ed1f8a99d70ea1cd1508910eb107d0edcae5009 upstream. sem_lock() did not properly pair memory barriers: !spin_is_locked() and spin_unlock_wait() are both only control barriers. The code needs an acquire barrier, otherwise the cpu might perform read operations before the lock test. As no primitive exists inside and since it seems noone wants another primitive, the code creates a local primitive within ipc/sem.c. With regards to -stable: The change of sem_wait_array() is a bugfix, the change to sem_lock() is a nop (just a preprocessor redefinition to improve the readability). The bugfix is necessary for all kernels that use sem_wait_array() (i.e.: starting from 3.10). Signed-off-by: Manfred Spraul Reported-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Cc: "Paul E. McKenney" Cc: Kirill Tkhai Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/sem.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index 7fbe64bae0f30..c50aa5755c626 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -252,6 +252,16 @@ static void sem_rcu_free(struct rcu_head *head) ipc_rcu_free(head); } +/* + * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they + * are only control barriers. + * The code must pair with spin_unlock(&sem->lock) or + * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient. + * + * smp_rmb() is sufficient, as writes cannot pass the control barrier. + */ +#define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() + /* * Wait until all currently ongoing simple ops have completed. * Caller must own sem_perm.lock. @@ -275,6 +285,7 @@ static void sem_wait_array(struct sem_array *sma) sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); } + ipc_smp_acquire__after_spin_is_unlocked(); } /* @@ -327,13 +338,12 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* Then check that the global lock is free */ if (!spin_is_locked(&sma->sem_perm.lock)) { /* - * The ipc object lock check must be visible on all - * cores before rechecking the complex count. Otherwise - * we can race with another thread that does: + * We need a memory barrier with acquire semantics, + * otherwise we can race with another thread that does: * complex_count++; * spin_unlock(sem_perm.lock); */ - smp_rmb(); + ipc_smp_acquire__after_spin_is_unlocked(); /* * Now repeat the test of complex_count: From 0ebc1f07b7570ca97734c408b0ba87bf340b2cb0 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 14 Aug 2015 15:34:56 -0700 Subject: [PATCH 0620/2091] mm/hwpoison: fix page refcount of unknown non LRU page commit 4f32be677b124a49459e2603321c7a5605ceb9f8 upstream. After trying to drain pages from pagevec/pageset, we try to get reference count of the page again, however, the reference count of the page is not reduced if the page is still not on LRU list. Fix it by adding the put_page() to drop the page reference which is from __get_any_page(). Signed-off-by: Wanpeng Li Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 501820c815b33..79fa29332fa39 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1558,6 +1558,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) */ ret = __get_any_page(page, pfn, 0); if (!PageLRU(page)) { + /* Drop page reference which is from __get_any_page() */ + put_page(page); pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", pfn, page->flags); return -EIO; From d73eba8c9e33c0d5b487c04a0fc498432f074fea Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 14 Aug 2015 15:34:59 -0700 Subject: [PATCH 0621/2091] mm/hwpoison: fix fail isolate hugetlbfs page w/ refcount held commit 036138080a4376e5f3e5d0cca8ac99084c5cf06e upstream. Hugetlbfs pages will get a refcount in get_any_page() or madvise_hwpoison() if soft offlining through madvise. The refcount which is held by the soft offline path should be released if we fail to isolate hugetlbfs pages. Fix it by reducing the refcount for both isolation success and failure. Signed-off-by: Wanpeng Li Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 79fa29332fa39..9f48145c884fc 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1589,13 +1589,12 @@ static int soft_offline_huge_page(struct page *page, int flags) unlock_page(hpage); ret = isolate_huge_page(hpage, &pagelist); - if (ret) { - /* - * get_any_page() and isolate_huge_page() takes a refcount each, - * so need to drop one here. - */ - put_page(hpage); - } else { + /* + * get_any_page() and isolate_huge_page() takes a refcount each, + * so need to drop one here. + */ + put_page(hpage); + if (!ret) { pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); return -EBUSY; } From 48f8d0f5a4fec334ec2c3c5c6ec6867a885e2329 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 4 Aug 2015 08:21:33 +0200 Subject: [PATCH 0622/2091] clk: pxa: pxa3xx: fix CKEN register access commit b93028c9af807b9474789e6aba34a6135b6cb708 upstream. Clocks 0 to 31 are on CKENA, and not CKENB. The clock register names were inadequately inverted. As a consequence, all clock operations were happening on CKENB, because almost all but 2 clocks are on CKENA. As the clocks were activated by the bootloader in the former tests, it escaped the testing that the wrong clock gate was manipulated. The error was revealed by changing the pxa3xx-nand driver to a module, where upon unloading, the wrong clock was disabled in CKENB. Fixes: 9bbb8a338fb2 ("clk: pxa: add pxa3xx clock driver") Signed-off-by: Robert Jarzmik Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/pxa/clk-pxa3xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index 4b93a1efb36d1..ac03ba49e9d19 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -126,7 +126,7 @@ PARENTS(pxa3xx_ac97_bus) = { "ring_osc_60mhz", "ac97" }; PARENTS(pxa3xx_sbus) = { "ring_osc_60mhz", "system_bus" }; PARENTS(pxa3xx_smemcbus) = { "ring_osc_60mhz", "smemc" }; -#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENA : &CKENB) +#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENB : &CKENA) #define PXA3XX_CKEN(dev_id, con_id, parents, mult_lp, div_lp, mult_hp, \ div_hp, bit, is_lp, flags) \ PXA_CKEN(dev_id, con_id, bit, parents, mult_lp, div_lp, \ From 8fab9f94b4b4d5487bb3e2b683f7f85e8aba5c09 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Wed, 22 Jul 2015 14:40:09 +0800 Subject: [PATCH 0623/2091] xen-blkfront: don't add indirect pages to list when !feature_persistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7b0767502b5db11cb1f0daef2d01f6d71b1192dc upstream. We should consider info->feature_persistent when adding indirect page to list info->indirect_pages, else the BUG_ON() in blkif_free() would be triggered. When we are using persistent grants the indirect_pages list should always be empty because blkfront has pre-allocated enough persistent pages to fill all requests on the ring. Acked-by: Roger Pau Monné Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c61cf8c6f61d..89c7371ab2dcc 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1118,8 +1118,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); - list_add(&indirect_page->lru, &info->indirect_pages); + if (!info->feature_persistent) { + indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); + list_add(&indirect_page->lru, &info->indirect_pages); + } s->indirect_grants[i]->gref = GRANT_INVALID_REF; list_add_tail(&s->indirect_grants[i]->node, &info->grants); } From e46ebf0b0f94429f6813d8e4162dd060ef7ca7fd Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Wed, 22 Jul 2015 14:40:10 +0800 Subject: [PATCH 0624/2091] xen-blkback: replace work_pending with work_busy in purge_persistent_gnt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 53bc7dc004fecf39e0ba70f2f8d120a1444315d3 upstream. The BUG_ON() in purge_persistent_gnt() will be triggered when previous purge work haven't finished. There is a work_pending() before this BUG_ON, but it doesn't account if the work is still currently running. Acked-by: Roger Pau Monné Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkback/blkback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 713fc9ff11492..3e9ec9523f735 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -362,8 +362,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) return; } - if (work_pending(&blkif->persistent_purge_work)) { - pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n"); + if (work_busy(&blkif->persistent_purge_work)) { + pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); return; } From 57e25b602cab84202e69f22bce932336cb0db03b Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Tue, 28 Jul 2015 07:51:01 +0200 Subject: [PATCH 0625/2091] rsi: Fix failure to load firmware after memory leak fix and fix the leak commit 5d5cd85ff441534a52f23f821d0a7c644d3b6cce upstream. Fixes commit eae79b4f3e82 ("rsi: fix memory leak in rsi_load_ta_instructions()") which stopped the driver from functioning. Firmware data has been allocated using vmalloc(), resulting in memory that cannot be used for DMA. Hence the firmware was first copied to a buffer allocated with kmalloc() in the original code. This patch reverts the commit and only calls "kfree()" to release the buffer after sending the data. This fixes the memory leak without breaking the driver. Add a comment to the kmemdup() calls to explain why this is done, and abort if memory allocation fails. Tested on a Topic Miami-Florida board which contains the rsi SDIO chip. Also added the same kfree() call to the USB glue driver. This was not tested on actual hardware though, as I only have the SDIO version. Fixes: eae79b4f3e82 ("rsi: fix memory leak in rsi_load_ta_instructions()") Signed-off-by: Mike Looijmans Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_sdio_ops.c | 8 +++++++- drivers/net/wireless/rsi/rsi_91x_usb_ops.c | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c index b6cc9ff47fc2e..1c6788aecc626 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c @@ -172,6 +172,7 @@ static int rsi_load_ta_instructions(struct rsi_common *common) (struct rsi_91x_sdiodev *)adapter->rsi_dev; u32 len; u32 num_blocks; + const u8 *fw; const struct firmware *fw_entry = NULL; u32 block_size = dev->tx_blk_size; int status = 0; @@ -200,6 +201,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) return status; } + /* Copy firmware into DMA-accessible memory */ + fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); + if (!fw) + return -ENOMEM; len = fw_entry->size; if (len % 4) @@ -210,7 +215,8 @@ static int rsi_load_ta_instructions(struct rsi_common *common) rsi_dbg(INIT_ZONE, "%s: Instruction size:%d\n", __func__, len); rsi_dbg(INIT_ZONE, "%s: num blocks: %d\n", __func__, num_blocks); - status = rsi_copy_to_card(common, fw_entry->data, len, num_blocks); + status = rsi_copy_to_card(common, fw, len, num_blocks); + kfree(fw); release_firmware(fw_entry); return status; } diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c index 1106ce76707e1..30c2cf7fa93b0 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c @@ -146,7 +146,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) return status; } + /* Copy firmware into DMA-accessible memory */ fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); + if (!fw) + return -ENOMEM; len = fw_entry->size; if (len % 4) @@ -158,6 +161,7 @@ static int rsi_load_ta_instructions(struct rsi_common *common) rsi_dbg(INIT_ZONE, "%s: num blocks: %d\n", __func__, num_blocks); status = rsi_copy_to_card(common, fw, len, num_blocks); + kfree(fw); release_firmware(fw_entry); return status; } From 75d370fe0b1159aedf960c843b514b39c82e822f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 10:32:01 +0200 Subject: [PATCH 0626/2091] perf: Fix fasync handling on inherited events commit fed66e2cdd4f127a43fd11b8d92a99bdd429528c upstream. Vince reported that the fasync signal stuff doesn't work proper for inherited events. So fix that. Installing fasync allocates memory and sets filp->f_flags |= FASYNC, which upon the demise of the file descriptor ensures the allocation is freed and state is updated. Now for perf, we can have the events stick around for a while after the original FD is dead because of references from child events. So we cannot copy the fasync pointer around. We can however consistently use the parent's fasync, as that will be updated. Reported-and-Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho deMelo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Link: http://lkml.kernel.org/r/1434011521.1495.71.camel@twins Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0ceb386777ae7..130a5f637c0ba 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4766,12 +4766,20 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); if (event->pending_kill) { - kill_fasync(&event->fasync, SIGIO, event->pending_kill); + kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); event->pending_kill = 0; } } @@ -6117,7 +6125,7 @@ static int __perf_event_overflow(struct perf_event *event, else perf_event_output(event, data, regs); - if (event->fasync && event->pending_kill) { + if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; irq_work_queue(&event->pending); } From b8cae722c4afc4c116dc34e013c82cd5bbc68bb8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jul 2015 10:35:07 +0200 Subject: [PATCH 0627/2091] perf: Fix running time accounting commit 00a2916f7f82c348a2a94dbb572874173bc308a3 upstream. A recent fix to the shadow timestamp inadvertly broke the running time accounting. We must not update the running timestamp if we fail to schedule the event, the event will not have ran. This can (and did) result in negative total runtime because the stopped timestamp was before the running timestamp (we 'started' but never stopped the event -- because it never really started we didn't have to stop it either). Reported-and-Tested-by: Vince Weaver Fixes: 72f669c0086f ("perf: Update shadow timestamp before add event") Signed-off-by: Peter Zijlstra (Intel) Cc: Shaohua Li Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 130a5f637c0ba..26f387f243702 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1886,8 +1886,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - event->tstamp_running += tstamp - event->tstamp_stopped; - perf_set_shadow_time(event, ctx, tstamp); perf_log_itrace_start(event); @@ -1899,6 +1897,8 @@ event_sched_in(struct perf_event *event, goto out; } + event->tstamp_running += tstamp - event->tstamp_stopped; + if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) From db8d87e1cd13809687f39a3da567cb823e1b7576 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 27 Jul 2015 00:31:08 +0100 Subject: [PATCH 0628/2091] perf: Fix double-free of the AUX buffer commit ee9397a6fb9bc4e52677f5e33eed4abee0f515e6 upstream. If rb->aux_refcount is decremented to zero before rb->refcount, __rb_free_aux() may be called twice resulting in a double free of rb->aux_pages. Fix this by adding a check to __rb_free_aux(). Signed-off-by: Ben Hutchings Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 57ffc5ca679f ("perf: Fix AUX buffer refcounting") Link: http://lkml.kernel.org/r/1437953468.12842.17.camel@decadent.org.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 725c416085e31..a7604c81168e2 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -547,11 +547,13 @@ static void __rb_free_aux(struct ring_buffer *rb) rb->aux_priv = NULL; } - for (pg = 0; pg < rb->aux_nr_pages; pg++) - rb_free_aux_page(rb, pg); + if (rb->aux_nr_pages) { + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); - kfree(rb->aux_pages); - rb->aux_nr_pages = 0; + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; + } } void rb_free_aux(struct ring_buffer *rb) From 58b9cca673e06a9f98a902d95ff4cda951a7f234 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Aug 2015 19:22:49 +0200 Subject: [PATCH 0629/2091] perf: Fix PERF_EVENT_IOC_PERIOD migration race commit c7999c6f3fed9e383d3131474588f282ae6d56b9 upstream. I ran the perf fuzzer, which triggered some WARN()s which are due to trying to stop/restart an event on the wrong CPU. Use the normal IPI pattern to ensure we run the code on the correct CPU. Signed-off-by: Peter Zijlstra (Intel) Cc: Vince Weaver Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: bad7192b842c ("perf: Fix PERF_EVENT_IOC_PERIOD to force-reset the period") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 75 ++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 26f387f243702..94817491407b0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3976,28 +3976,21 @@ static void perf_event_for_each(struct perf_event *event, perf_event_for_each_child(sibling, func); } -static int perf_event_period(struct perf_event *event, u64 __user *arg) -{ - struct perf_event_context *ctx = event->ctx; - int ret = 0, active; +struct period_event { + struct perf_event *event; u64 value; +}; - if (!is_sampling_event(event)) - return -EINVAL; - - if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - - if (!value) - return -EINVAL; +static int __perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + struct perf_event_context *ctx = event->ctx; + u64 value = pe->value; + bool active; - raw_spin_lock_irq(&ctx->lock); + raw_spin_lock(&ctx->lock); if (event->attr.freq) { - if (value > sysctl_perf_event_sample_rate) { - ret = -EINVAL; - goto unlock; - } - event->attr.sample_freq = value; } else { event->attr.sample_period = value; @@ -4016,11 +4009,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) event->pmu->start(event, PERF_EF_RELOAD); perf_pmu_enable(ctx->pmu); } + raw_spin_unlock(&ctx->lock); -unlock: + return 0; +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct period_event pe = { .event = event, }; + struct perf_event_context *ctx = event->ctx; + struct task_struct *task; + u64 value; + + if (!is_sampling_event(event)) + return -EINVAL; + + if (copy_from_user(&value, arg, sizeof(value))) + return -EFAULT; + + if (!value) + return -EINVAL; + + if (event->attr.freq && value > sysctl_perf_event_sample_rate) + return -EINVAL; + + task = ctx->task; + pe.value = value; + + if (!task) { + cpu_function_call(event->cpu, __perf_event_period, &pe); + return 0; + } + +retry: + if (!task_function_call(task, __perf_event_period, &pe)) + return 0; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + raw_spin_unlock_irq(&ctx->lock); + task = ctx->task; + goto retry; + } + + __perf_event_period(&pe); raw_spin_unlock_irq(&ctx->lock); - return ret; + return 0; } static const struct file_operations perf_fops; From c82f14951c4d67eb6f6bcb28e5cf5a85ae242e64 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Jul 2015 12:14:39 +0300 Subject: [PATCH 0630/2091] iwlwifi: pcie: fix prepare card flow commit c9fdec9f3970eeaa1b176422f46167f5f5158804 upstream. When the card is not owned by the PCIe bus, we need to acquire ownership first. This flow is implemented in iwl_pcie_prepare_card_hw. Because of a hardware bug, we need to disable link power management before we can request ownership otherwise the other user of the device won't get notified that we are requesting the device which will prevent us from acquire ownership. Same holds for the down flow where we need to make sure that any other potential user is notified that the driver is going down. Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 37e6a6f914872..699a4802835fa 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -457,10 +457,16 @@ static void iwl_pcie_apm_stop(struct iwl_trans *trans, bool op_mode_leave) if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, APMG_PCIDEV_STT_VAL_WAKE_ME); - else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, CSR_HW_IF_CONFIG_REG_PREPARE | CSR_HW_IF_CONFIG_REG_ENABLE_PME); + mdelay(1); + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + } mdelay(5); } @@ -555,6 +561,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) if (ret >= 0) return 0; + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + msleep(1); + for (iter = 0; iter < 10; iter++) { /* If HW is not ready, prepare the conditions to check again */ iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, @@ -562,8 +572,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) do { ret = iwl_pcie_set_hw_ready(trans); - if (ret >= 0) - return 0; + if (ret >= 0) { + ret = 0; + goto out; + } usleep_range(200, 1000); t += 200; @@ -573,6 +585,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) IWL_ERR(trans, "Couldn't prepare the card\n"); +out: + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + return ret; } From c9557ae624bdb05c44b12a42195076a3191455e6 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 2 Aug 2015 13:24:13 -0500 Subject: [PATCH 0631/2091] rtlwifi: rtl8723be: Add module parameter for MSI interrupts commit 741e3b9902d11585e18bfc7f8d47e913616bb070 upstream. The driver code allows for the disabling of MSI interrupts; however the module_parm line was missed and the option fails to show with modinfo. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8723be/sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c index 1017f02d7bf75..7bf88d9dcdc3f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c @@ -385,6 +385,7 @@ module_param_named(debug, rtl8723be_mod_params.debug, int, 0444); module_param_named(ips, rtl8723be_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8723be_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444); +module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8723be_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); From 6d61ce6dc7c831e690a3d44228b38d155db441fc Mon Sep 17 00:00:00 2001 From: Luis Felipe Dominguez Vega Date: Wed, 29 Jul 2015 21:11:20 -0500 Subject: [PATCH 0632/2091] rtlwifi: Fix NULL dereference when PCI driver used as an AP commit 7c62940165e9ae4004ce4e6b5117330bab94df68 upstream. In commit 33511b157bbcebaef853cc1811992b664a2e5862 ("rtlwifi: add support to send beacon frame"), the mechanism for sending beacons was established. That patch works correctly for rtl8192cu, but there is a possibility of getting the following warnings in the PCI drivers: WARNING: CPU: 1 PID: 2439 at net/mac80211/driver-ops.h:12 ieee80211_bss_info_change_notify+0x179/0x1d0 [mac80211]() wlp5s0: Failed check-sdata-in-driver check, flags: 0x0 The warning is followed by a NULL pointer dereference as follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000006 IP: [] rtl_get_tcb_desc+0x5e/0x760 [rtlwifi] This problem was reported at http://thread.gmane.org/gmane.linux.kernel.wireless.general/138645, but no solution was found at that time. The problem was also reported at https://bugzilla.kernel.org/show_bug.cgi?id=9744 and this solution was developed and tested there. The USB driver works with a NULL final argument in the adapter_tx() callback; however, the PCI drivers need a struct rtl_tcb_desc in that position. Fixes: 33511b157bbc ("rtlwifi: add support to send beacon frame.") Signed-off-by: Luis Felipe Dominguez Vega Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index 3b3a88b53b119..585d0883c7e58 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1015,9 +1015,12 @@ static void send_beacon_frame(struct ieee80211_hw *hw, { struct rtl_priv *rtlpriv = rtl_priv(hw); struct sk_buff *skb = ieee80211_beacon_get(hw, vif); + struct rtl_tcb_desc tcb_desc; - if (skb) - rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, NULL); + if (skb) { + memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); + rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, &tcb_desc); + } } static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, From 0733a428056a18a6fbb1b57664a98cee4761d634 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 10 Aug 2015 15:40:27 +0200 Subject: [PATCH 0633/2091] x86/xen: build "Xen PV" APIC driver for domU as well commit fc5fee86bdd3d720e2d1d324e4fae0c35845fa63 upstream. It turns out that a PV domU also requires the "Xen PV" APIC driver. Otherwise, the flat driver is used and we get stuck in busy loops that never exit, such as in this stack trace: (gdb) target remote localhost:9999 Remote debugging using localhost:9999 __xapic_wait_icr_idle () at ./arch/x86/include/asm/ipi.h:56 56 while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) (gdb) bt #0 __xapic_wait_icr_idle () at ./arch/x86/include/asm/ipi.h:56 #1 __default_send_IPI_shortcut (shortcut=, dest=, vector=) at ./arch/x86/include/asm/ipi.h:75 #2 apic_send_IPI_self (vector=246) at arch/x86/kernel/apic/probe_64.c:54 #3 0xffffffff81011336 in arch_irq_work_raise () at arch/x86/kernel/irq_work.c:47 #4 0xffffffff8114990c in irq_work_queue (work=0xffff88000fc0e400) at kernel/irq_work.c:100 #5 0xffffffff8110c29d in wake_up_klogd () at kernel/printk/printk.c:2633 #6 0xffffffff8110ca60 in vprintk_emit (facility=0, level=, dict=0x0 , dictlen=, fmt=, args=) at kernel/printk/printk.c:1778 #7 0xffffffff816010c8 in printk (fmt=) at kernel/printk/printk.c:1868 #8 0xffffffffc00013ea in ?? () #9 0x0000000000000000 in ?? () Mailing-list-thread: https://lkml.org/lkml/2015/8/4/755 Signed-off-by: Jason A. Donenfeld Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/Makefile | 4 ++-- arch/x86/xen/xen-ops.h | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7322755f337af..4b6e29ac0968c 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,13 +13,13 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o + p2m.o apic.o obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += apic.o vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e195c683549d..bef30cbb56c47 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -101,17 +101,15 @@ struct dom0_vga_console_info; #ifdef CONFIG_XEN_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -void __init xen_init_apic(void); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) { } -static inline void __init xen_init_apic(void) -{ -} #endif +void __init xen_init_apic(void); + #ifdef CONFIG_XEN_EFI extern void xen_efi_init(void); #else From eb63f85afac03722b2229d61c39fec3083fcfa53 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 10 Aug 2015 19:10:38 +0100 Subject: [PATCH 0634/2091] xen/xenbus: Don't leak memory when unmapping the ring on HVM backend commit c22fe519e7e2b94ad173e0ea3b89c1a7d8be8d00 upstream. The commit ccc9d90a9a8b5c4ad7e9708ec41f75ff9e98d61d "xenbus_client: Extend interface to support multi-page ring" removes the call to free_xenballooned_pages() in xenbus_unmap_ring_vfree_hvm(), leaking a page for every shared ring. Only with backends running in HVM domains were affected. Signed-off-by: Julien Grall Reviewed-by: Boris Ostrovsky Reviewed-by: Wei Liu Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xenbus/xenbus_client.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 96b2011d25f35..658be6cc3db60 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -814,8 +814,10 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, addrs); - if (!rv) + if (!rv) { vunmap(vaddr); + free_xenballooned_pages(node->nr_handles, node->hvm.pages); + } else WARN(1, "Leaking %p, size %u page(s)\n", vaddr, node->nr_handles); From 32d063ab015630c0fe3d0d7de6eaaf095f87cc22 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 12 Aug 2015 15:10:21 +0100 Subject: [PATCH 0635/2091] dm thin metadata: delete btrees when releasing metadata snapshot commit 7f518ad0a212e2a6fd68630e176af1de395070a7 upstream. The device details and mapping trees were just being decremented before. Now btree_del() is called to do a deep delete. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 79f694120ddf0..cde1d6749017c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1295,8 +1295,8 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) return r; disk_super = dm_block_data(copy); - dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root)); - dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root)); + dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root)); + dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root)); dm_sm_dec_block(pmd->metadata_sm, held_root); return dm_tm_unlock(pmd->tm, copy); From c329597eab7c6bb451d4a8f7e53906248d4e0411 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 27 Jul 2015 00:06:55 +0200 Subject: [PATCH 0636/2091] localmodconfig: Use Kbuild files too commit c0ddc8c745b7f89c50385fd7aa03c78dc543fa7a upstream. In kbuild it is allowed to define objects in files named "Makefile" and "Kbuild". Currently localmodconfig reads objects only from "Makefile"s and misses modules like nouveau. Link: http://lkml.kernel.org/r/1437948415-16290-1-git-send-email-richard@nod.at Reported-and-tested-by: Leonidas Spyropoulos Signed-off-by: Richard Weinberger Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/streamline_config.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index 9cb8522d8d22a..f3d3fb42b8735 100755 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -137,7 +137,7 @@ sub read_config { my $kconfig = $ARGV[1]; my $lsmod_file = $ENV{'LSMOD'}; -my @makefiles = `find $ksource -name Makefile 2>/dev/null`; +my @makefiles = `find $ksource -name Makefile -or -name Kbuild 2>/dev/null`; chomp @makefiles; my %depends; From 0d2808f59028f624b18529bc445a1dcdcec56ae9 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 21 Jul 2015 11:00:53 +0200 Subject: [PATCH 0637/2091] EDAC, ppc4xx: Access mci->csrows array elements properly commit 5c16179b550b9fd8114637a56b153c9768ea06a5 upstream. The commit de3910eb79ac ("edac: change the mem allocation scheme to make Documentation/kobject.txt happy") changed the memory allocation for the csrows member. But ppc4xx_edac was forgotten in the patch. Fix it. Signed-off-by: Michael Walle Cc: linux-edac Cc: Mauro Carvalho Chehab Link: http://lkml.kernel.org/r/1437469253-8611-1-git-send-email-michael@walle.cc Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/ppc4xx_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 3515b381c1312..711d8ad74f116 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -920,7 +920,7 @@ static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) */ for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *csi = &mci->csrows[row]; + struct csrow_info *csi = mci->csrows[row]; /* * Get the configuration settings for this From 731fedc9a4c9192e0250e8615d2db08134749b3d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 3 Aug 2015 14:57:30 +0900 Subject: [PATCH 0638/2091] HID: hid-input: Fix accessing freed memory during device disconnect commit 0621809e37936e7c2b3eac9165cf2aad7f9189eb upstream. During unbinding the driver was dereferencing a pointer to memory already freed by power_supply_unregister(). Driver was freeing its internal description of battery through pointers stored in power_supply structure. However, because the core owns the power supply instance, after calling power_supply_unregister() this memory is freed and the driver cannot access these members. Fix this by storing the pointer to internal description of battery in a local variable before calling power_supply_unregister(), so the pointer remains valid. Signed-off-by: Krzysztof Kozlowski Reported-by: H.J. Lu Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Reviewed-by: Dmitry Torokhov Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-input.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 008e89bf6f3c3..32d52d29cc68d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -462,12 +462,15 @@ static bool hidinput_setup_battery(struct hid_device *dev, unsigned report_type, static void hidinput_cleanup_battery(struct hid_device *dev) { + const struct power_supply_desc *psy_desc; + if (!dev->battery) return; + psy_desc = dev->battery->desc; power_supply_unregister(dev->battery); - kfree(dev->battery->desc->name); - kfree(dev->battery->desc); + kfree(psy_desc->name); + kfree(psy_desc); dev->battery = NULL; } #else /* !CONFIG_HID_BATTERY_STRENGTH */ From e0a829ebdcc4732ceec327fc00c300430257b8f8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jul 2015 13:16:06 +0300 Subject: [PATCH 0639/2091] HID: uclogic: fix limit in uclogic_tablet_enable() commit 4a8e70f5d0d80675fc17b9ba1e62db8ca6b91775 upstream. The limit should be ARRAY_SIZE(params) (5 elements) here instead of sizeof(params) (20 bytes). Fixes: 08177f40bd00 ('HID: uclogic: merge hid-huion driver in hid-uclogic') Signed-off-by: Dan Carpenter Reviewed-by: Nikolai Kondrashov Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-uclogic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c index 94167310e15a4..b905d501e752d 100644 --- a/drivers/hid/hid-uclogic.c +++ b/drivers/hid/hid-uclogic.c @@ -858,7 +858,7 @@ static int uclogic_tablet_enable(struct hid_device *hdev) for (p = drvdata->rdesc; p <= drvdata->rdesc + drvdata->rsize - 4;) { if (p[0] == 0xFE && p[1] == 0xED && p[2] == 0x1D && - p[3] < sizeof(params)) { + p[3] < ARRAY_SIZE(params)) { v = params[p[3]]; put_unaligned(cpu_to_le32(v), (s32 *)p); p += 4; From 39d202573c4d5ec3b3a3930b31dc0c621e771414 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Aug 2015 15:28:49 -0400 Subject: [PATCH 0640/2091] drm/radeon: add new OLAND pci id commit e037239e5e7b61007763984aa35a8329596d8c88 upstream. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_pciids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 45c39a37f9249..8bc073d297db2 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -172,6 +172,7 @@ {0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ From 3f52099b9579b39c8e6bfbbd1a92edc303c6d9fd Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 11 Aug 2015 22:31:17 -0700 Subject: [PATCH 0641/2091] drm/vmwgfx: Fix execbuf locking issues commit 3e04e2fe6d87807d27521ad6ebb9e7919d628f25 upstream. This addresses two issues that cause problems with viewperf maya-03 in situation with memory pressure. The first issue causes attempts to unreserve buffers if batched reservation fails due to, for example, a signal pending. While previously the ttm_eu api was resistant against this type of error, it is no longer and the lockdep code will complain about attempting to unreserve buffers that are not reserved. The issue is resolved by avoid calling ttm_eu_backoff_reservation in the buffer reserve error path. The second issue is that the binding_mutex may be held when user-space fence objects are created and hence during memory reclaims. This may cause recursive attempts to grab the binding mutex. The issue is resolved by not holding the binding mutex across fence creation and submission. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 654c8daeb5ab3..97ad3bcb99a75 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2492,7 +2492,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true, NULL); if (unlikely(ret != 0)) - goto out_err; + goto out_err_nores; ret = vmw_validate_buffers(dev_priv, sw_context); if (unlikely(ret != 0)) @@ -2536,6 +2536,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, vmw_resource_relocations_free(&sw_context->res_relocations); vmw_fifo_commit(dev_priv, command_size); + mutex_unlock(&dev_priv->binding_mutex); vmw_query_bo_switch_commit(dev_priv, sw_context); ret = vmw_execbuf_fence_commands(file_priv, dev_priv, @@ -2551,7 +2552,6 @@ int vmw_execbuf_process(struct drm_file *file_priv, DRM_ERROR("Fence submission error. Syncing.\n"); vmw_resource_list_unreserve(&sw_context->resource_list, false); - mutex_unlock(&dev_priv->binding_mutex); ttm_eu_fence_buffer_objects(&ticket, &sw_context->validate_nodes, (void *) fence); From 9ddd673ff000280299c0ae654f33c519aa52789b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jun 2015 14:20:46 -0700 Subject: [PATCH 0642/2091] libfc: Fix fc_exch_recv_req() error path commit f6979adeaab578f8ca14fdd32b06ddee0d9d3314 upstream. Due to patch "libfc: Do not invoke the response handler after fc_exch_done()" (commit ID 7030fd62) the lport_recv() call in fc_exch_recv_req() is passed a dangling pointer. Avoid this by moving the fc_frame_free() call from fc_invoke_resp() to its callers. This patch fixes the following crash: general protection fault: 0000 [#3] PREEMPT SMP RIP: fc_lport_recv_req+0x72/0x280 [libfc] Call Trace: fc_exch_recv+0x642/0xde0 [libfc] fcoe_percpu_receive_thread+0x46a/0x5ed [fcoe] kthread+0x10a/0x120 ret_from_fork+0x42/0x70 Signed-off-by: Bart Van Assche Signed-off-by: Vasu Dev Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libfc/fc_exch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 1b3a094734522..30f9ef0c0d4f8 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -733,8 +733,6 @@ static bool fc_invoke_resp(struct fc_exch *ep, struct fc_seq *sp, if (resp) { resp(sp, fp, arg); res = true; - } else if (!IS_ERR(fp)) { - fc_frame_free(fp); } spin_lock_bh(&ep->ex_lock); @@ -1596,7 +1594,8 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) * If new exch resp handler is valid then call that * first. */ - fc_invoke_resp(ep, sp, fp); + if (!fc_invoke_resp(ep, sp, fp)) + fc_frame_free(fp); fc_exch_release(ep); return; @@ -1695,7 +1694,8 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) fc_exch_hold(ep); if (!rc) fc_exch_delete(ep); - fc_invoke_resp(ep, sp, fp); + if (!fc_invoke_resp(ep, sp, fp)) + fc_frame_free(fp); if (has_rec) fc_exch_timer_set(ep, ep->r_a_tov); fc_exch_release(ep); From 1070185064814a5e9807fd5f2639f7bd2698502f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jun 2015 14:20:51 -0700 Subject: [PATCH 0643/2091] libfc: Fix fc_fcp_cleanup_each_cmd() commit 8f2777f53e3d5ad8ef2a176a4463a5c8e1a16431 upstream. Since fc_fcp_cleanup_cmd() can sleep this function must not be called while holding a spinlock. This patch avoids that fc_fcp_cleanup_each_cmd() triggers the following bug: BUG: scheduling while atomic: sg_reset/1512/0x00000202 1 lock held by sg_reset/1512: #0: (&(&fsp->scsi_pkt_lock)->rlock){+.-...}, at: [] fc_fcp_cleanup_each_cmd.isra.21+0xa5/0x150 [libfc] Preemption disabled at:[] fc_fcp_cleanup_each_cmd.isra.21+0xa5/0x150 [libfc] Call Trace: [] dump_stack+0x4f/0x7b [] __schedule_bug+0x6c/0xd0 [] __schedule+0x71a/0xa10 [] schedule+0x32/0x80 [] fc_seq_set_resp+0xac/0x100 [libfc] [] fc_exch_done+0x41/0x60 [libfc] [] fc_fcp_cleanup_each_cmd.isra.21+0xcf/0x150 [libfc] [] fc_eh_device_reset+0x1c3/0x270 [libfc] [] scsi_try_bus_device_reset+0x29/0x60 [] scsi_ioctl_reset+0x258/0x2d0 [] scsi_ioctl+0x150/0x440 [] sd_ioctl+0xad/0x120 [] blkdev_ioctl+0x1b6/0x810 [] block_ioctl+0x38/0x40 [] do_vfs_ioctl+0x2f8/0x530 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x7a Signed-off-by: Bart Van Assche Signed-off-by: Vasu Dev Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libfc/fc_fcp.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index c6795941b45d9..2d5909c4685ca 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -1039,11 +1039,26 @@ static void fc_fcp_cleanup_each_cmd(struct fc_lport *lport, unsigned int id, fc_fcp_pkt_hold(fsp); spin_unlock_irqrestore(&si->scsi_queue_lock, flags); - if (!fc_fcp_lock_pkt(fsp)) { + spin_lock_bh(&fsp->scsi_pkt_lock); + if (!(fsp->state & FC_SRB_COMPL)) { + fsp->state |= FC_SRB_COMPL; + /* + * TODO: dropping scsi_pkt_lock and then reacquiring + * again around fc_fcp_cleanup_cmd() is required, + * since fc_fcp_cleanup_cmd() calls into + * fc_seq_set_resp() and that func preempts cpu using + * schedule. May be schedule and related code should be + * removed instead of unlocking here to avoid scheduling + * while atomic bug. + */ + spin_unlock_bh(&fsp->scsi_pkt_lock); + fc_fcp_cleanup_cmd(fsp, error); + + spin_lock_bh(&fsp->scsi_pkt_lock); fc_io_compl(fsp); - fc_fcp_unlock_pkt(fsp); } + spin_unlock_bh(&fsp->scsi_pkt_lock); fc_fcp_pkt_release(fsp); spin_lock_irqsave(&si->scsi_queue_lock, flags); From cdda95937ec1f62e29f6e89c87600555594f3dec Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 5 Aug 2015 18:54:37 +0200 Subject: [PATCH 0644/2091] ARM: imx6: correct i.MX6 PCIe interrupt routing commit 1a9fa190956f45c1e58c4d8bfa5ac051691ea590 upstream. The PCIe interrupts are also routed through the GPC. This has been missed from the conversion to stacked IRQ domains as the PCIe controller uses an explicit interrupt map and thus doesn't inherit the SoC global interrupt parent. Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6qdl.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index f74a8ded515f2..38c786018a09d 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -153,10 +153,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_PCIE_AXI>, <&clks IMX6QDL_CLK_LVDS1_GATE>, <&clks IMX6QDL_CLK_PCIE_REF_125M>; From 374f87fdfece43a29dd8bf7ced27a16a2de36dff Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Jul 2015 17:46:38 +0530 Subject: [PATCH 0645/2091] ARM: dts: omap243x: Fix broken pbias device creation commit 4317c8c9121e4685561422ac267b292df8e80806 upstream. commit <72b10ac00eb1> ("ARM: dts: omap24xx: add minimal l4 bus layout with control module support") moved pbias_regulator dt node from being a child node of ocp to be the child node of scm_conf. After this device for pbias_regulator is not created. Fix it by adding "simple-bus" compatible property to scm_conf dt node. Fixes: 72b10ac00eb1 ("ARM: dts: omap24xx: add minimal l4 bus layout with control module support") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap2430.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi index 11a7963be0035..2390f387c2716 100644 --- a/arch/arm/boot/dts/omap2430.dtsi +++ b/arch/arm/boot/dts/omap2430.dtsi @@ -51,7 +51,8 @@ }; scm_conf: scm_conf@270 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x270 0x240>; #address-cells = <1>; #size-cells = <1>; From 12fd9b8085a741711e1cb1a5375b307ea6c779e1 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Jul 2015 17:46:41 +0530 Subject: [PATCH 0646/2091] ARM: dts: dra7: Fix broken pbias device creation commit cd4556733b30cc363adc7b1cea3bffa7e2dd0c7c upstream. commit ("ARM: dts: dra7: add minimal l4 bus layout with control module support") moved pbias_regulator dt node from being a child node of ocp to be the child node of scm_conf. After this device for pbias_regulator is not created. Fix it by adding "simple-bus" compatible property to scm_conf dt node. Fixes: d919501feffa ("ARM: dts: dra7: add minimal l4 bus layout with control module support") Suggested-by: Tero Kristo Signed-off-by: Kishon Vijay Abraham I Tested-by: Grygorii Strashko Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index f03a091cd0766..dfcc0dd637e5b 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -116,7 +116,7 @@ ranges = <0 0x2000 0x2000>; scm_conf: scm_conf@0 { - compatible = "syscon"; + compatible = "syscon", "simple-bus"; reg = <0x0 0x1400>; #address-cells = <1>; #size-cells = <1>; From 1d38be7b2c53fb4ac2af60bd137ea66541dfdecd Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Jul 2015 17:46:39 +0530 Subject: [PATCH 0647/2091] ARM: dts: OMAP4: Fix broken pbias device creation commit 89a898df87e114952191ab0e061aa18e3c617880 upstream. commit <7415b0b4c645> ("ARM: dts: omap4: add minimal l4 bus layout with control module support") moved pbias_regulator dt node from being a child node of ocp to be the child node of omap4_padconf_global. After this device for pbias_regulator is not created. Fix it by adding "simple-bus" compatible property to omap4_padconf_global dt node. Fixes: 7415b0b4c645 ("ARM: dts: omap4: add minimal l4 bus layout with control module support") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap4.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index f884d6adb71e8..84be9da74c7e5 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -191,7 +191,8 @@ }; omap4_padconf_global: omap4_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0x170>; #address-cells = <1>; #size-cells = <1>; From a6040ad81032cc6d90e761eeb81d65a6880b73c7 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Jul 2015 17:46:40 +0530 Subject: [PATCH 0648/2091] ARM: dts: OMAP5: Fix broken pbias device creation commit 70caac3f25291cf715cf8f2d8c7db46f6cbefe7c upstream. commit ("ARM: dts: omap5: add minimal l4 bus layout with control module support") moved pbias_regulator dt node from being a child node of ocp to be the child node of omap5_padconf_global. After this device for pbias_regulator is not created. Fix it by adding "simple-bus" compatible property to omap5_padconf_global dt node. Fixes: ed8509edddeb ("ARM: dts: omap5: add minimal l4 bus layout with control module support") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap5.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 7d24ae0306b56..874a26f9dc0ff 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -180,7 +180,8 @@ }; omap5_padconf_global: omap5_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0xec>; #address-cells = <1>; #size-cells = <1>; From 2f9204986c71763ea48ad02882b3db805357907a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 3 Jun 2015 00:46:04 +0100 Subject: [PATCH 0649/2091] ARM: 8385/1: VDSO: group link options commit d33ce23b2160d26b27a47092da5d556b5b11a12a upstream. Currently the VDSO's link options are kind of a mess spread between ccflags-y and cmd_vdsold. Collect linker directives into one variable, VDSO_LDFLAGS, and use that in cmd_vdsold. Signed-off-by: Nathan Lynch Signed-off-by: Russell King Cc: Alexander Kochetkov Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/Makefile | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8aa7910510299..89182ad44ee8c 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -6,9 +6,14 @@ obj-vdso := vgettimeofday.o datapage.o targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING -ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds @@ -40,10 +45,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Actual build commands quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ - $(call cc-ldoption, -Wl$(comma)--build-id) \ - -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ - -Wl,-z,common-page-size=4096 -o $@ + cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ From 5211aabc75f8173cc0d9b0c990737756eae7f6c6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 3 Jun 2015 00:41:15 +0100 Subject: [PATCH 0650/2091] ARM: 8384/1: VDSO: force use of BFD linker commit d2b30cd4b7223a96e606dfc8120626f66d81e091 upstream. When using a toolchain with gold as the default linker, the VDSO build fails: VDSO arch/arm/vdso/vdso.so.raw HOSTCC arch/arm/vdso/vdsomunge MUNGE arch/arm/vdso/vdso.so.dbg OBJCOPY arch/arm/vdso/vdso.so BFD: arch/arm/vdso/vdso.so: Not enough room for program headers, try linking with -N For whatever reason, ld.gold is omitting an exidx program header that ld.bfd emits, and even when I work around that, I don't get a working VDSO. For now, instead of supporting gold (which will fail to link the kernel anyway since it does not implement --pic-veneer), direct the compiler to use the traditional bfd linker. This is accomplished by using -fuse-ld, which is implemented in GCC 4.8 and later. Note: one limitation of this is that if the toolchain is configured to use gold by default, and the bfd linker is not in $PATH, the VDSO build will fail: VDSO arch/arm/vdso/vdso.so.raw collect2: fatal error: cannot find 'ld' This will happen if CROSS_COMPILE begins with a path such as /opt/bin/arm-linux-gnu- but /opt/bin is not in $PATH. This is considered an acceptable corner-case limitation and is easily worked around. Additonal note: we use cc-option instead of cc-ldoption so that -fuse-ld=bfd is placed in the command line if the compiler recognizes the option. Using cc-ldoption results in an attempt to link, which fails in the situation just described, causing -fuse-ld=bfd to be omitted and gold to be used for the VDSO link, which is what we're trying to prevent. Reported-by: Stefan Agner Signed-off-by: Nathan Lynch Signed-off-by: Russell King Cc: Alexander Kochetkov Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 89182ad44ee8c..9d259d94e429c 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,6 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 VDSO_LDFLAGS += -nostdlib -shared VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds From a3595b864a2d64cf5084cbd822be622a4b0f5664 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 19 May 2015 17:06:44 +0100 Subject: [PATCH 0651/2091] ARM: v7 setup function should invalidate L1 cache commit 02b4e2756e01c623cc4dbceae4b07be75252db5b upstream. All ARMv5 and older CPUs invalidate their caches in the early assembly setup function, prior to enabling the MMU. This is because the L1 cache should not contain any data relevant to the execution of the kernel at this point; all data should have been flushed out to memory. This requirement should also be true for ARMv6 and ARMv7 CPUs - indeed, these typically do not search their caches when caching is disabled (as it needs to be when the MMU is disabled) so this change should be safe. ARMv7 allows there to be CPUs which search their caches while caching is disabled, and it's permitted that the cache is uninitialised at boot; for these, the architecture reference manual requires that an implementation specific code sequence is used immediately after reset to ensure that the cache is placed into a sane state. Such functionality is definitely outside the remit of the Linux kernel, and must be done by the SoC's firmware before _any_ CPU gets to the Linux kernel. Changing the data cache clean+invalidate to a mere invalidate allows us to get rid of a lot of platform specific hacks around this issue for their secondary CPU bringup paths - some of which were buggy. Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Tested-by: Heiko Stuebner Tested-by: Dinh Nguyen Acked-by: Sebastian Hesselbarth Tested-by: Sebastian Hesselbarth Acked-by: Shawn Guo Tested-by: Thierry Reding Acked-by: Thierry Reding Tested-by: Geert Uytterhoeven Tested-by: Michal Simek Tested-by: Wei Xu Signed-off-by: Russell King Cc: Alexander Kochetkov Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-bcm/Makefile | 2 +- arch/arm/mach-bcm/brcmstb.h | 19 --------------- arch/arm/mach-bcm/headsmp-brcmstb.S | 33 --------------------------- arch/arm/mach-bcm/platsmp-brcmstb.c | 4 +--- arch/arm/mach-berlin/headsmp.S | 6 ----- arch/arm/mach-berlin/platsmp.c | 3 +-- arch/arm/mach-hisi/Makefile | 2 +- arch/arm/mach-hisi/core.h | 1 - arch/arm/mach-hisi/headsmp.S | 16 ------------- arch/arm/mach-hisi/platsmp.c | 4 ++-- arch/arm/mach-imx/headsmp.S | 1 - arch/arm/mach-mvebu/headsmp-a9.S | 1 - arch/arm/mach-prima2/headsmp.S | 1 - arch/arm/mach-rockchip/core.h | 1 - arch/arm/mach-rockchip/headsmp.S | 8 ------- arch/arm/mach-rockchip/platsmp.c | 5 ++-- arch/arm/mach-shmobile/common.h | 1 - arch/arm/mach-shmobile/headsmp-scu.S | 4 ++-- arch/arm/mach-shmobile/headsmp.S | 7 ------ arch/arm/mach-shmobile/platsmp-apmu.c | 2 +- arch/arm/mach-socfpga/core.h | 1 - arch/arm/mach-socfpga/headsmp.S | 5 ---- arch/arm/mach-socfpga/platsmp.c | 2 +- arch/arm/mach-tegra/Makefile | 2 +- arch/arm/mach-tegra/headsmp.S | 12 ---------- arch/arm/mach-tegra/reset.c | 2 +- arch/arm/mach-tegra/reset.h | 1 - arch/arm/mach-zynq/common.h | 2 -- arch/arm/mach-zynq/headsmp.S | 5 ---- arch/arm/mach-zynq/platsmp.c | 5 ++-- arch/arm/mm/proc-v7.S | 2 +- 31 files changed, 17 insertions(+), 143 deletions(-) delete mode 100644 arch/arm/mach-bcm/brcmstb.h delete mode 100644 arch/arm/mach-bcm/headsmp-brcmstb.S delete mode 100644 arch/arm/mach-hisi/headsmp.S delete mode 100644 arch/arm/mach-tegra/headsmp.S diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile index 4c38674c73ecb..54d274da7ccba 100644 --- a/arch/arm/mach-bcm/Makefile +++ b/arch/arm/mach-bcm/Makefile @@ -43,5 +43,5 @@ obj-$(CONFIG_ARCH_BCM_63XX) := bcm63xx.o ifeq ($(CONFIG_ARCH_BRCMSTB),y) CFLAGS_platsmp-brcmstb.o += -march=armv7-a obj-y += brcmstb.o -obj-$(CONFIG_SMP) += headsmp-brcmstb.o platsmp-brcmstb.o +obj-$(CONFIG_SMP) += platsmp-brcmstb.o endif diff --git a/arch/arm/mach-bcm/brcmstb.h b/arch/arm/mach-bcm/brcmstb.h deleted file mode 100644 index ec0c3d112b367..0000000000000 --- a/arch/arm/mach-bcm/brcmstb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __BRCMSTB_H__ -#define __BRCMSTB_H__ - -void brcmstb_secondary_startup(void); - -#endif /* __BRCMSTB_H__ */ diff --git a/arch/arm/mach-bcm/headsmp-brcmstb.S b/arch/arm/mach-bcm/headsmp-brcmstb.S deleted file mode 100644 index 199c1ea582480..0000000000000 --- a/arch/arm/mach-bcm/headsmp-brcmstb.S +++ /dev/null @@ -1,33 +0,0 @@ -/* - * SMP boot code for secondary CPUs - * Based on arch/arm/mach-tegra/headsmp.S - * - * Copyright (C) 2010 NVIDIA, Inc. - * Copyright (C) 2013-2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include - - .section ".text.head", "ax" - -ENTRY(brcmstb_secondary_startup) - /* - * Ensure CPU is in a sane state by disabling all IRQs and switching - * into SVC mode. - */ - setmode PSR_I_BIT | PSR_F_BIT | SVC_MODE, r0 - - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(brcmstb_secondary_startup) diff --git a/arch/arm/mach-bcm/platsmp-brcmstb.c b/arch/arm/mach-bcm/platsmp-brcmstb.c index e209e6fc7cafa..44d6bddf7a4e7 100644 --- a/arch/arm/mach-bcm/platsmp-brcmstb.c +++ b/arch/arm/mach-bcm/platsmp-brcmstb.c @@ -30,8 +30,6 @@ #include #include -#include "brcmstb.h" - enum { ZONE_MAN_CLKEN_MASK = BIT(0), ZONE_MAN_RESET_CNTL_MASK = BIT(1), @@ -153,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu) * Set the reset vector to point to the secondary_startup * routine */ - cpu_set_boot_addr(cpu, virt_to_phys(brcmstb_secondary_startup)); + cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup)); /* Unhalt the cpu */ cpu_rst_cfg_set(cpu, 0); diff --git a/arch/arm/mach-berlin/headsmp.S b/arch/arm/mach-berlin/headsmp.S index 4a4c56a58ad35..dc82a3486b05e 100644 --- a/arch/arm/mach-berlin/headsmp.S +++ b/arch/arm/mach-berlin/headsmp.S @@ -12,12 +12,6 @@ #include #include -ENTRY(berlin_secondary_startup) - ARM_BE8(setend be) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(berlin_secondary_startup) - /* * If the following instruction is set in the reset exception vector, CPUs * will fetch the value of the software reset address vector when being diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c index 702e7982015ab..34a3753e73564 100644 --- a/arch/arm/mach-berlin/platsmp.c +++ b/arch/arm/mach-berlin/platsmp.c @@ -22,7 +22,6 @@ #define RESET_VECT 0x00 #define SW_RESET_ADDR 0x94 -extern void berlin_secondary_startup(void); extern u32 boot_inst; static void __iomem *cpu_ctrl; @@ -85,7 +84,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus) * Write the secondary startup address into the SW reset address * vector. This is used by boot_inst. */ - writel(virt_to_phys(berlin_secondary_startup), vectors_base + SW_RESET_ADDR); + writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR); iounmap(vectors_base); unmap_scu: diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile index 6b7b3033de0bc..659db1933ed36 100644 --- a/arch/arm/mach-hisi/Makefile +++ b/arch/arm/mach-hisi/Makefile @@ -6,4 +6,4 @@ CFLAGS_platmcpm.o := -march=armv7-a obj-y += hisilicon.o obj-$(CONFIG_MCPM) += platmcpm.o -obj-$(CONFIG_SMP) += platsmp.o hotplug.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o hotplug.o diff --git a/arch/arm/mach-hisi/core.h b/arch/arm/mach-hisi/core.h index 92a682d8e9394..c7648ef1825c7 100644 --- a/arch/arm/mach-hisi/core.h +++ b/arch/arm/mach-hisi/core.h @@ -12,7 +12,6 @@ extern void hi3xxx_cpu_die(unsigned int cpu); extern int hi3xxx_cpu_kill(unsigned int cpu); extern void hi3xxx_set_cpu(int cpu, bool enable); -extern void hisi_secondary_startup(void); extern struct smp_operations hix5hd2_smp_ops; extern void hix5hd2_set_cpu(int cpu, bool enable); extern void hix5hd2_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-hisi/headsmp.S b/arch/arm/mach-hisi/headsmp.S deleted file mode 100644 index 81e35b159e75e..0000000000000 --- a/arch/arm/mach-hisi/headsmp.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2014 Hisilicon Limited. - * Copyright (c) 2014 Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include - - __CPUINIT - -ENTRY(hisi_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index 8880c8e8b296f..51744127db666 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle) { phys_addr_t jumpaddr; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr); hix5hd2_set_cpu(cpu, true); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) struct device_node *node; - jumpaddr = virt_to_phys(hisi_secondary_startup); + jumpaddr = virt_to_phys(secondary_startup); hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr); node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl"); diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S index de5047c8a6c87..b5e976816b63c 100644 --- a/arch/arm/mach-imx/headsmp.S +++ b/arch/arm/mach-imx/headsmp.S @@ -25,7 +25,6 @@ diag_reg_offset: .endm ENTRY(v7_secondary_startup) - bl v7_invalidate_l1 set_diag_reg b secondary_startup ENDPROC(v7_secondary_startup) diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 08d5ed46b996b..48e4c4b3cd1c9 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -21,7 +21,6 @@ ENTRY(mvebu_cortex_a9_secondary_startup) ARM_BE8(setend be) - bl v7_invalidate_l1 bl armada_38x_scu_power_up b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S index d86fe33c5f538..209d9fc5c16cf 100644 --- a/arch/arm/mach-prima2/headsmp.S +++ b/arch/arm/mach-prima2/headsmp.S @@ -15,7 +15,6 @@ * ready for them to initialise. */ ENTRY(sirfsoc_secondary_startup) - bl v7_invalidate_l1 mrc p15, 0, r0, c0, c0, 5 and r0, r0, #15 adr r4, 1f diff --git a/arch/arm/mach-rockchip/core.h b/arch/arm/mach-rockchip/core.h index 39bca96b555a6..492c048813da6 100644 --- a/arch/arm/mach-rockchip/core.h +++ b/arch/arm/mach-rockchip/core.h @@ -17,4 +17,3 @@ extern char rockchip_secondary_trampoline; extern char rockchip_secondary_trampoline_end; extern unsigned long rockchip_boot_fn; -extern void rockchip_secondary_startup(void); diff --git a/arch/arm/mach-rockchip/headsmp.S b/arch/arm/mach-rockchip/headsmp.S index 46c22dedf632a..d69708b072829 100644 --- a/arch/arm/mach-rockchip/headsmp.S +++ b/arch/arm/mach-rockchip/headsmp.S @@ -15,14 +15,6 @@ #include #include -ENTRY(rockchip_secondary_startup) - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - ldr r1, =0x00000c09 @ Cortex-A9 primary part number - teq r0, r1 - beq v7_invalidate_l1 - b secondary_startup -ENDPROC(rockchip_secondary_startup) - ENTRY(rockchip_secondary_trampoline) ldr pc, 1f ENDPROC(rockchip_secondary_trampoline) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 5b4ca3c3c8797..2e6ab67e22844 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -149,8 +149,7 @@ static int __cpuinit rockchip_boot_secondary(unsigned int cpu, * sram_base_addr + 8: start address for pc * */ udelay(10); - writel(virt_to_phys(rockchip_secondary_startup), - sram_base_addr + 8); + writel(virt_to_phys(secondary_startup), sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } @@ -189,7 +188,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) } /* set the boot function for the sram code */ - rockchip_boot_fn = virt_to_phys(rockchip_secondary_startup); + rockchip_boot_fn = virt_to_phys(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index afc60bad6fd6b..476092b86c6e4 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -14,7 +14,6 @@ extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); extern int shmobile_smp_cpu_disable(unsigned int cpu); -extern void shmobile_invalidate_start(void); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S index 69df8bfac1672..fa5248c52399c 100644 --- a/arch/arm/mach-shmobile/headsmp-scu.S +++ b/arch/arm/mach-shmobile/headsmp-scu.S @@ -22,7 +22,7 @@ * Boot code for secondary CPUs. * * First we turn on L1 cache coherency for our CPU. Then we jump to - * shmobile_invalidate_start that invalidates the cache and hands over control + * secondary_startup that invalidates the cache and hands over control * to the common ARM startup code. */ ENTRY(shmobile_boot_scu) @@ -36,7 +36,7 @@ ENTRY(shmobile_boot_scu) bic r2, r2, r3 @ Clear bits of our CPU (Run Mode) str r2, [r0, #8] @ write back - b shmobile_invalidate_start + b secondary_startup ENDPROC(shmobile_boot_scu) .text diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index 50c491567e11c..330c1fc63197d 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -16,13 +16,6 @@ #include #include -#ifdef CONFIG_SMP -ENTRY(shmobile_invalidate_start) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(shmobile_invalidate_start) -#endif - /* * Reset vector for secondary CPUs. * This will be mapped at address 0 by SBAR register. diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index f483b560b066a..b0790fc322824 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -133,7 +133,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus, int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle) { /* For this particular CPU register boot vector */ - shmobile_smp_hook(cpu, virt_to_phys(shmobile_invalidate_start), 0); + shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0); return apmu_wrap(cpu, apmu_power_on); } diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index a0f3b1cd497cc..767c09e954a0f 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -31,7 +31,6 @@ #define RSTMGR_MPUMODRST_CPU1 0x2 /* CPU1 Reset */ -extern void socfpga_secondary_startup(void); extern void __iomem *socfpga_scu_base_addr; extern void socfpga_init_clocks(void); diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S index f65ea0af4af37..5bb0164271076 100644 --- a/arch/arm/mach-socfpga/headsmp.S +++ b/arch/arm/mach-socfpga/headsmp.S @@ -30,8 +30,3 @@ ENTRY(secondary_trampoline) 1: .long . .long socfpga_cpu1start_addr ENTRY(secondary_trampoline_end) - -ENTRY(socfpga_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(socfpga_secondary_startup) diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index c64d89b7c0ca8..79c5336c569ff 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); - writel(virt_to_phys(socfpga_secondary_startup), + writel(virt_to_phys(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); flush_cache_all(); diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index e48a74458c258..fffad2426ee4b 100644 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += pm-tegra30.o ifeq ($(CONFIG_CPU_IDLE),y) obj-$(CONFIG_ARCH_TEGRA_3x_SOC) += cpuidle-tegra30.o endif -obj-$(CONFIG_SMP) += platsmp.o headsmp.o +obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_ARCH_TEGRA_114_SOC) += sleep-tegra30.o diff --git a/arch/arm/mach-tegra/headsmp.S b/arch/arm/mach-tegra/headsmp.S deleted file mode 100644 index 2072e7322c398..0000000000000 --- a/arch/arm/mach-tegra/headsmp.S +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include - -#include "sleep.h" - - .section ".text.head", "ax" - -ENTRY(tegra_secondary_startup) - check_cpu_part_num 0xc09, r8, r9 - bleq v7_invalidate_l1 - b secondary_startup -ENDPROC(tegra_secondary_startup) diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 894c5c472184f..6fd9db54887ee 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -94,7 +94,7 @@ void __init tegra_cpu_reset_handler_init(void) __tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] = *((u32 *)cpu_possible_mask); __tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] = - virt_to_phys((void *)tegra_secondary_startup); + virt_to_phys((void *)secondary_startup); #endif #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h index 29c3dec0126a4..9c479c7925b85 100644 --- a/arch/arm/mach-tegra/reset.h +++ b/arch/arm/mach-tegra/reset.h @@ -37,7 +37,6 @@ void __tegra_cpu_reset_handler_start(void); void __tegra_cpu_reset_handler(void); void __tegra20_cpu1_resettable_status_offset(void); void __tegra_cpu_reset_handler_end(void); -void tegra_secondary_startup(void); #ifdef CONFIG_PM_SLEEP #define tegra_cpu_lp1_mask \ diff --git a/arch/arm/mach-zynq/common.h b/arch/arm/mach-zynq/common.h index 382c60e9aa160..7038cae95ddcd 100644 --- a/arch/arm/mach-zynq/common.h +++ b/arch/arm/mach-zynq/common.h @@ -17,8 +17,6 @@ #ifndef __MACH_ZYNQ_COMMON_H__ #define __MACH_ZYNQ_COMMON_H__ -void zynq_secondary_startup(void); - extern int zynq_slcr_init(void); extern int zynq_early_slcr_init(void); extern void zynq_slcr_system_reset(void); diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S index dd8c071941e7f..045c72720a4d5 100644 --- a/arch/arm/mach-zynq/headsmp.S +++ b/arch/arm/mach-zynq/headsmp.S @@ -22,8 +22,3 @@ zynq_secondary_trampoline_jump: .globl zynq_secondary_trampoline_end zynq_secondary_trampoline_end: ENDPROC(zynq_secondary_trampoline) - -ENTRY(zynq_secondary_startup) - bl v7_invalidate_l1 - b secondary_startup -ENDPROC(zynq_secondary_startup) diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c index 52d768ff78571..f66816c491869 100644 --- a/arch/arm/mach-zynq/platsmp.c +++ b/arch/arm/mach-zynq/platsmp.c @@ -87,10 +87,9 @@ int zynq_cpun_start(u32 address, int cpu) } EXPORT_SYMBOL(zynq_cpun_start); -static int zynq_boot_secondary(unsigned int cpu, - struct task_struct *idle) +static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle) { - return zynq_cpun_start(virt_to_phys(zynq_secondary_startup), cpu); + return zynq_cpun_start(virt_to_phys(secondary_startup), cpu); } /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3d1054f11a8ae..75ae72160099a 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -336,7 +336,7 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_louis + bl v7_invalidate_l1 ldmia r12, {r0-r5, r7, r9, r11, lr} mrc p15, 0, r0, c0, c0, 0 @ read main ID register From 3a9570eadcc10d5952c94ff3180da66c0a0c685e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Jul 2015 00:30:24 +0100 Subject: [PATCH 0652/2091] ARM: invalidate L1 before enabling coherency commit bac51ad9d14f6baed3730ef53bedc1eb2238563a upstream. We must invalidate the L1 cache before enabling coherency, otherwise secondary CPUs can inject invalid cache lines into the coherent CPU cluster, which could then be migrated to other CPUs. This fixes a recent regression with SoCFPGA randomly failing to boot. Fixes: 02b4e2756e01 ("ARM: v7 setup function should invalidate L1 cache") Signed-off-by: Russell King Cc: Alexander Kochetkov Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/proc-v7.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 75ae72160099a..7911f14c2157a 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -268,7 +268,10 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: +1: adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -277,7 +280,7 @@ __v7_ca17mp_setup: orreq r0, r0, r10 @ Enable CPU-specific SMP bits mcreq p15, 0, r0, c1, c0, 1 #endif - b __v7_setup + b __v7_setup_cont __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -335,10 +338,11 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, r7, r9, r11, lr} + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 bl v7_invalidate_l1 - ldmia r12, {r0-r5, r7, r9, r11, lr} + ldmia r12, {r0-r5, lr} +__v7_setup_cont: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 @@ -460,7 +464,7 @@ ENDPROC(__v7_setup) .align 2 __v7_setup_stack: - .space 4 * 11 @ 11 registers + .space 4 * 7 @ 12 registers __INITDATA From e8dd540d01845c634350182e3a5489ff95a130a0 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Sun, 14 Jun 2015 15:41:50 +0100 Subject: [PATCH 0653/2091] mfd: arizona: Fix initialisation of the PM runtime commit 72e43164fd472f6c2659c8313b87da962322dbcf upstream. The PM runtime core by default assumes a chip is suspended when runtime PM is enabled. Currently the arizona driver enables runtime PM when the chip is fully active and then disables the DCVDD regulator at the end of arizona_dev_init. This however has several problems, firstly the if we reach the end of arizona_dev_init, we did not properly follow all the proceedures for shutting down the chip, and most notably we never marked the chip as cache only so any writes occurring between then and the next PM runtime resume will be lost. Secondly, if we are already resumed when we reach the end of dev_init, then at best we get unbalanced regulator enable/disables at work we lose DCVDD whilst we need it. Additionally, since the commit 4f0216409f7c ("mfd: arizona: Add better support for system suspend"), the PM runtime operations may disable/enable the IRQ, so the IRQs must now be enabled before we call any PM operations. This patch adds a call to pm_runtime_set_active to inform the PM core that the device is starting up active and moves the PM enabling to around the IRQ initialisation to avoid any PM callbacks happening until the IRQs are initialised. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/arizona-core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index 6ca6dfab50ebf..6523903e15fe5 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -912,10 +912,6 @@ int arizona_dev_init(struct arizona *arizona) arizona->pdata.gpio_defaults[i]); } - pm_runtime_set_autosuspend_delay(arizona->dev, 100); - pm_runtime_use_autosuspend(arizona->dev); - pm_runtime_enable(arizona->dev); - /* Chip default */ if (!arizona->pdata.clk32k_src) arizona->pdata.clk32k_src = ARIZONA_32KZ_MCLK2; @@ -1012,11 +1008,17 @@ int arizona_dev_init(struct arizona *arizona) arizona->pdata.spk_fmt[i]); } + pm_runtime_set_active(arizona->dev); + pm_runtime_enable(arizona->dev); + /* Set up for interrupts */ ret = arizona_irq_init(arizona); if (ret != 0) goto err_reset; + pm_runtime_set_autosuspend_delay(arizona->dev, 100); + pm_runtime_use_autosuspend(arizona->dev); + arizona_request_irq(arizona, ARIZONA_IRQ_CLKGEN_ERR, "CLKGEN error", arizona_clkgen_err, arizona); arizona_request_irq(arizona, ARIZONA_IRQ_OVERCLOCKED, "Overclocked", @@ -1045,10 +1047,6 @@ int arizona_dev_init(struct arizona *arizona) goto err_irq; } -#ifdef CONFIG_PM - regulator_disable(arizona->dcvdd); -#endif - return 0; err_irq: From 1cc4bf08d789b5daf679244d93f4adebb19ff64d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Aug 2015 08:25:20 -0700 Subject: [PATCH 0654/2091] Revert x86 sigcontext cleanups commit ed596cde9425509ec6ce88e19f03e9b13b6f518b upstream. This reverts commits 9a036b93a344 ("x86/signal/64: Remove 'fs' and 'gs' from sigcontext") and c6f2062935c8 ("x86/signal/64: Fix SS handling for signals delivered to 64-bit programs"). They were cleanups, but they break dosemu by changing the signal return behavior (and removing 'fs' and 'gs' from the sigcontext struct - while not actually changing any behavior - causes build problems). Reported-and-tested-by: Stas Sergeev Acked-by: Andy Lutomirski Cc: Ingo Molnar Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/sigcontext.h | 6 +++--- arch/x86/include/uapi/asm/sigcontext.h | 21 +++------------------ arch/x86/kernel/signal.c | 26 +++++++++++--------------- 3 files changed, 17 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 6fe6b182c9981..9dfce4e0417d9 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short __pad2; /* Was called gs, but was always zero. */ - unsigned short __pad1; /* Was called fs, but was always zero. */ - unsigned short ss; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8a2cd34..d8b9f9081e86f 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,24 +177,9 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - - /* - * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), - * Linux saved and restored fs and gs in these slots. This - * was counterproductive, as fsbase and gsbase were never - * saved, so arch_prctl was presumably unreliable. - * - * If these slots are ever needed for any other purpose, there - * is some risk that very old 64-bit binaries could get - * confused. I doubt that many such binaries still work, - * though, since the same patch in 2.5.64 also removed the - * 64-bit set_thread_area syscall, so it appears that there is - * no TLS API that works in both pre- and post-2.5.64 kernels. - */ - __u16 __pad2; /* Was gs. */ - __u16 __pad1; /* Was fs. */ - - __u16 ss; + __u16 gs; + __u16 fs; + __u16 __pad0; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1ea14fd53933b..e0fd5f47fbb9a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); +#else /* !CONFIG_X86_32 */ + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); +#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->__pad2); - put_user_ex(0, &sc->__pad1); - put_user_ex(regs->ss, &sc->ss); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -450,19 +456,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* - * Set up the CS and SS registers to run signal handlers in - * 64-bit mode, even if the handler happens to be interrupting - * 32-bit or 16-bit code. - * - * SS is subtle. In 64-bit mode, we don't need any particular - * SS descriptor, but we do need SS to be valid. It's possible - * that the old SS is entirely bogus -- this can happen if the - * signal we're trying to deliver is #GP or #SS caused by a bad - * SS value. - */ + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - regs->ss = __USER_DS; return 0; } From fd1090c256f53c12d3840056a15112264ee26c0a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 26 Jul 2015 21:34:50 -0700 Subject: [PATCH 0655/2091] regmap: regcache-rbtree: Clean new present bits on present bitmap resize commit 8ef9724bf9718af81cfc5132253372f79c71b7e2 upstream. When inserting a new register into a block, the present bit map size is increased using krealloc. krealloc does not clear the additionally allocated memory, leaving it filled with random values. Result is that some registers are considered cached even though this is not the case. Fix the problem by clearing the additionally allocated memory. Also, if the bitmap size does not increase, do not reallocate the bitmap at all to reduce overhead. Fixes: 3f4ff561bc88 ("regmap: rbtree: Make cache_present bitmap per node") Signed-off-by: Guenter Roeck Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regcache-rbtree.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 81751a49d8bf2..56486d92c4e72 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -296,11 +296,20 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; - present = krealloc(rbnode->cache_present, - BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); - return -ENOMEM; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { + present = krealloc(rbnode->cache_present, + BITS_TO_LONGS(blklen) * sizeof(*present), + GFP_KERNEL); + if (!present) { + kfree(blk); + return -ENOMEM; + } + + memset(present + BITS_TO_LONGS(rbnode->blklen), 0, + (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) + * sizeof(*present)); + } else { + present = rbnode->cache_present; } /* insert the register value in the correct place in the rbnode block */ From 6829ed43ba4fb8b4957bc6a6915e8372ad848d97 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Thu, 13 Aug 2015 08:47:59 +0100 Subject: [PATCH 0656/2091] MIPS: Fix seccomp syscall argument for MIPS64 commit 9f161439e4104b641a7bfb9b89581d801159fec8 upstream. Commit 4c21b8fd8f14 ("MIPS: seccomp: Handle indirect system calls (o32)") fixed indirect system calls on O32 but it also introduced a bug for MIPS64 where it erroneously modified the v0 (syscall) register with the assumption that the sycall offset hasn't been taken into consideration. This breaks seccomp on MIPS64 n64 and n32 ABIs. We fix this by replacing the addition with a move instruction. Fixes: 4c21b8fd8f14 ("MIPS: seccomp: Handle indirect system calls (o32)") Reviewed-by: James Hogan Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10951/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index ad4d44635c760..a6f6b762c47a4 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -80,7 +80,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_64_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 446cc654da56c..4b2010654c463 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -72,7 +72,7 @@ n32_syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_N32_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall From 71b6a23ea944c6a30958bd15a69211e1fd521e1e Mon Sep 17 00:00:00 2001 From: John Soni Jose Date: Wed, 24 Jun 2015 06:41:58 +0530 Subject: [PATCH 0657/2091] libiscsi: Fix host busy blocking during connection teardown commit 660d0831d1494a6837b2f810d08b5be092c1f31d upstream. In case of hw iscsi offload, an host can have N-number of active connections. There can be IO's running on some connections which make host->host_busy always TRUE. Now if logout from a connection is tried then the code gets into an infinite loop as host->host_busy is always TRUE. iscsi_conn_teardown(....) { ......... /* * Block until all in-progress commands for this connection * time out or fail. */ for (;;) { spin_lock_irqsave(session->host->host_lock, flags); if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */ spin_unlock_irqrestore(session->host->host_lock, flags); break; } spin_unlock_irqrestore(session->host->host_lock, flags); msleep_interruptible(500); iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): " "host_busy %d host_failed %d\n", atomic_read(&session->host->host_busy), session->host->host_failed); ................ ............... } } This is not an issue with software-iscsi/iser as each cxn is a separate host. Fix: Acquiring eh_mutex in iscsi_conn_teardown() before setting session->state = ISCSI_STATE_TERMINATE. Signed-off-by: John Soni Jose Reviewed-by: Mike Christie Reviewed-by: Chris Leech Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libiscsi.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 8053f24f03499..98d9bb6ff725f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2941,10 +2941,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; - unsigned long flags; del_timer_sync(&conn->transport_timer); + mutex_lock(&session->eh_mutex); spin_lock_bh(&session->frwd_lock); conn->c_stage = ISCSI_CONN_CLEANUP_WAIT; if (session->leadconn == conn) { @@ -2956,28 +2956,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) } spin_unlock_bh(&session->frwd_lock); - /* - * Block until all in-progress commands for this connection - * time out or fail. - */ - for (;;) { - spin_lock_irqsave(session->host->host_lock, flags); - if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */ - spin_unlock_irqrestore(session->host->host_lock, flags); - break; - } - spin_unlock_irqrestore(session->host->host_lock, flags); - msleep_interruptible(500); - iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): " - "host_busy %d host_failed %d\n", - atomic_read(&session->host->host_busy), - session->host->host_failed); - /* - * force eh_abort() to unblock - */ - wake_up(&conn->ehwait); - } - /* flush queued up work because we free the connection below */ iscsi_suspend_tx(conn); @@ -2994,6 +2972,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) if (session->leadconn == conn) session->leadconn = NULL; spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); } From 2a51c43d1d6824c3fce1511e0f30db51274e7cd7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 23 Jun 2015 12:13:59 -0400 Subject: [PATCH 0658/2091] sd: Fix maximum I/O size for BLOCK_PC requests commit 4f258a46346c03fa0bbb6199ffaf4e1f9f599660 upstream. Commit bcdb247c6b6a ("sd: Limit transfer length") clamped the maximum size of an I/O request to the MAXIMUM TRANSFER LENGTH field in the BLOCK LIMITS VPD. This had the unfortunate effect of also limiting the maximum size of non-filesystem requests sent to the device through sg/bsg. Avoid using blk_queue_max_hw_sectors() and set the max_sectors queue limit directly. Also update the comment in blk_limits_max_hw_sectors() to clarify that max_hw_sectors defines the limit for the I/O controller only. Signed-off-by: Martin K. Petersen Reported-by: Brian King Tested-by: Brian King Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- block/blk-settings.c | 4 ++-- drivers/scsi/sd.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 12600bfffca93..e0057d035200c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -241,8 +241,8 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); * Description: * Enables a low level driver to set a hard upper limit, * max_hw_sectors, on the size of requests. max_hw_sectors is set by - * the device driver based upon the combined capabilities of I/O - * controller and storage device. + * the device driver based upon the capabilities of the I/O + * controller. * * max_sectors is a soft limit imposed by the block layer for * filesystem type requests. This value can be overridden on a diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 7f9d65fe4fd9a..11ea52b2c36b3 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2770,9 +2770,9 @@ static int sd_revalidate_disk(struct gendisk *disk) max_xfer = sdkp->max_xfer_blocks; max_xfer <<= ilog2(sdp->sector_size) - 9; - max_xfer = min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), - max_xfer); - blk_queue_max_hw_sectors(sdkp->disk->queue, max_xfer); + sdkp->disk->queue->limits.max_sectors = + min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer); + set_capacity(disk, sdkp->capacity); sd_config_write_same(sdkp); kfree(buffer); From 686cb03a4e6bf3aef1dd46bf29ebb20866dbbd12 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Sat, 8 Aug 2015 08:47:28 +0200 Subject: [PATCH 0659/2091] crypto: nx - respect sg limit bounds when building sg lists for SHA commit d3392f41f6d3cd0a034bd0aca47fabea2b47218e upstream. Commit 000851119e80 changed sha256/512 update functions to pass more data to nx_build_sg_list(), which ends with sg list overflows and usually with update functions failing for data larger than max_sg_len * NX_PAGE_SIZE. This happens because: - both "total" and "to_process" are updated, which leads to "to_process" getting overflowed for some data lengths For example: In first iteration "total" is 50, and let's assume "to_process" is 30 due to sg limits. At the end of first iteration "total" is set to 20. At start of 2nd iteration "to_process" overflows on: to_process = total - to_process; - "in_sg" is not reset to nx_ctx->in_sg after each iteration - nx_build_sg_list() is hitting overflow because the amount of data passed to it would require more than sgmax elements - as consequence of previous item, data stored in overflowed sg list may no longer be aligned to SHA*_BLOCK_SIZE This patch changes sha256/512 update functions so that "to_process" respects sg limits and never tries to pass more data to nx_build_sg_list() to avoid overflows. "to_process" is calculated as minimum of "total" and sg limits at start of every iteration. Fixes: 000851119e80 ("crypto: nx - Fix SHA concurrence issue and sg limit bounds") Signed-off-by: Jan Stancek Cc: Leonidas Da Silva Barbosa Cc: Marcelo Henrique Cerri Cc: Fionnuala Gunter Cc: "David S. Miller" Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/nx/nx-sha256.c | 27 ++++++++++++++++----------- drivers/crypto/nx/nx-sha512.c | 28 ++++++++++++++++------------ 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 08f8d5cd63349..becb738c897b1 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -71,7 +71,6 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; - struct nx_sg *in_sg; struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; @@ -97,7 +96,6 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - in_sg = nx_ctx->in_sg; max_sg_len = min_t(u64, nx_ctx->ap->sglen, nx_driver.of.max_sg_len/sizeof(struct nx_sg)); max_sg_len = min_t(u64, max_sg_len, @@ -114,17 +112,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, } do { - /* - * to_process: the SHA256_BLOCK_SIZE data chunk to process in - * this update. This value is also restricted by the sg list - * limits. - */ - to_process = total - to_process; - to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + int used_sgs = 0; + struct nx_sg *in_sg = nx_ctx->in_sg; if (buf_len) { data_len = buf_len; - in_sg = nx_build_sg_list(nx_ctx->in_sg, + in_sg = nx_build_sg_list(in_sg, (u8 *) sctx->buf, &data_len, max_sg_len); @@ -133,15 +126,27 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, rc = -EINVAL; goto out; } + used_sgs = in_sg - nx_ctx->in_sg; } + /* to_process: SHA256_BLOCK_SIZE aligned chunk to be + * processed in this iteration. This value is restricted + * by sg list limits and number of sgs we already used + * for leftover data. (see above) + * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len, + * but because data may not be aligned, we need to account + * for that too. */ + to_process = min_t(u64, total, + (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE); + to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + data_len = to_process - buf_len; in_sg = nx_build_sg_list(in_sg, (u8 *) data, &data_len, max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); - to_process = (data_len + buf_len); + to_process = data_len + buf_len; leftover = total - to_process; /* diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index aff0fe58eac0b..b6e183d58d73d 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -71,7 +71,6 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; - struct nx_sg *in_sg; struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; @@ -97,7 +96,6 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - in_sg = nx_ctx->in_sg; max_sg_len = min_t(u64, nx_ctx->ap->sglen, nx_driver.of.max_sg_len/sizeof(struct nx_sg)); max_sg_len = min_t(u64, max_sg_len, @@ -114,18 +112,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, } do { - /* - * to_process: the SHA512_BLOCK_SIZE data chunk to process in - * this update. This value is also restricted by the sg list - * limits. - */ - to_process = total - leftover; - to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); - leftover = total - to_process; + int used_sgs = 0; + struct nx_sg *in_sg = nx_ctx->in_sg; if (buf_len) { data_len = buf_len; - in_sg = nx_build_sg_list(nx_ctx->in_sg, + in_sg = nx_build_sg_list(in_sg, (u8 *) sctx->buf, &data_len, max_sg_len); @@ -133,8 +125,20 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, rc = -EINVAL; goto out; } + used_sgs = in_sg - nx_ctx->in_sg; } + /* to_process: SHA512_BLOCK_SIZE aligned chunk to be + * processed in this iteration. This value is restricted + * by sg list limits and number of sgs we already used + * for leftover data. (see above) + * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len, + * but because data may not be aligned, we need to account + * for that too. */ + to_process = min_t(u64, total, + (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE); + to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); + data_len = to_process - buf_len; in_sg = nx_build_sg_list(in_sg, (u8 *) data, &data_len, max_sg_len); @@ -146,7 +150,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, goto out; } - to_process = (data_len + buf_len); + to_process = data_len + buf_len; leftover = total - to_process; /* From 47c47104b9908154a09565122f3dc6cf82182ee2 Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Tue, 11 Aug 2015 20:19:20 +0300 Subject: [PATCH 0660/2091] crypto: caam - fix memory corruption in ahash_final_ctx commit b310c178e6d897f82abb9da3af1cd7c02b09f592 upstream. When doing pointer operation for accessing the HW S/G table, a value representing number of entries (and not number of bytes) must be used. Fixes: 045e36780f115 ("crypto: caam - ahash hmac support") Signed-off-by: Horia Geant? Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamhash.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 332c8ef8dae2c..0436997e054b3 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -909,13 +909,14 @@ static int ahash_final_ctx(struct ahash_request *req) state->buflen_1; u32 *sh_desc = ctx->sh_desc_fin, *desc; dma_addr_t ptr = ctx->sh_desc_fin_dma; - int sec4_sg_bytes; + int sec4_sg_bytes, sec4_sg_src_index; int digestsize = crypto_ahash_digestsize(ahash); struct ahash_edesc *edesc; int ret = 0; int sh_len; - sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry); + sec4_sg_src_index = 1 + (buflen ? 1 : 0); + sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + @@ -942,7 +943,7 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); From e344b9213af5de206f78bf5b8789351dc4cbabef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2015 11:41:33 -0400 Subject: [PATCH 0661/2091] Revert "libata-eh: Set 'information' field for autosense" commit fe16d4f202c59a560533a223bc6375739ee30944 upstream. This reverts commit a1524f226a02aa6edebd90ae0752e97cfd78b159. As implemented, ACS-4 sense reporting for ATA devices bypasses error diagnosis and handling in libata degrading EH behavior significantly. Revert the related changes for now. Signed-off-by: Tejun Heo Cc: Hannes Reinecke Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 ++-- drivers/ata/libata-eh.c | 3 --- drivers/ata/libata-scsi.c | 12 ------------ drivers/ata/libata.h | 5 +---- drivers/scsi/scsi_error.c | 31 ------------------------------- include/scsi/scsi_eh.h | 1 - 6 files changed, 3 insertions(+), 53 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 41c99be9bd412..336eb23f00b1e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -694,11 +694,11 @@ static int ata_rwcmd_protocol(struct ata_taskfile *tf, struct ata_device *dev) * RETURNS: * Block address read from @tf. */ -u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) +u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev) { u64 block = 0; - if (!dev || tf->flags & ATA_TFLAG_LBA) { + if (tf->flags & ATA_TFLAG_LBA) { if (tf->flags & ATA_TFLAG_LBA48) { block |= (u64)tf->hob_lbah << 40; block |= (u64)tf->hob_lbam << 32; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 7465031a893c6..af08d32af4e0c 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1864,7 +1864,6 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) ata_dev_dbg(dev, "NCQ Autosense %02x/%02x/%02x\n", sense_key, asc, ascq); ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); - ata_scsi_set_sense_information(qc->scsicmd, &qc->result_tf); qc->flags |= ATA_QCFLAG_SENSE_VALID; } @@ -1907,8 +1906,6 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, tmp = ata_eh_request_sense(qc, qc->scsicmd); if (tmp) qc->err_mask |= tmp; - else - ata_scsi_set_sense_information(qc->scsicmd, tf); } else { ata_dev_warn(qc->dev, "sense data available but port frozen\n"); } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 641a61a59e89c..e1ecd2ab37249 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -280,18 +280,6 @@ void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); } -void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, - const struct ata_taskfile *tf) -{ - u64 information; - - if (!cmd) - return; - - information = ata_tf_read_block(tf, NULL); - scsi_set_sense_information(cmd->sense_buffer, information); -} - static ssize_t ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index a998a175f9f14..8cfdd9616d16c 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -67,8 +67,7 @@ extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); -extern u64 ata_tf_read_block(const struct ata_taskfile *tf, - struct ata_device *dev); +extern u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev); extern unsigned ata_exec_internal(struct ata_device *dev, struct ata_taskfile *tf, const u8 *cdb, int dma_dir, void *buf, unsigned int buflen, @@ -139,8 +138,6 @@ extern int ata_scsi_add_hosts(struct ata_host *host, extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); -extern void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, - const struct ata_taskfile *tf); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 59c31bf88d922..ce6c770d74d51 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -2587,33 +2586,3 @@ void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq) } } EXPORT_SYMBOL(scsi_build_sense_buffer); - -/** - * scsi_set_sense_information - set the information field in a - * formatted sense data buffer - * @buf: Where to build sense data - * @info: 64-bit information value to be set - * - **/ -void scsi_set_sense_information(u8 *buf, u64 info) -{ - if ((buf[0] & 0x7f) == 0x72) { - u8 *ucp, len; - - len = buf[7]; - ucp = (char *)scsi_sense_desc_find(buf, len + 8, 0); - if (!ucp) { - buf[7] = len + 0xa; - ucp = buf + 8 + len; - } - ucp[0] = 0; - ucp[1] = 0xa; - ucp[2] = 0x80; /* Valid bit */ - ucp[3] = 0; - put_unaligned_be64(info, &ucp[4]); - } else if ((buf[0] & 0x7f) == 0x70) { - buf[0] |= 0x80; - put_unaligned_be64(info, &buf[3]); - } -} -EXPORT_SYMBOL(scsi_set_sense_information); diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 5a4bb5bb66b3b..1e1421b06565c 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -59,7 +59,6 @@ extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, u64 * info_out); extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); -extern void scsi_set_sense_information(u8 *buf, u64 info); extern int scsi_ioctl_reset(struct scsi_device *, int __user *); From 0a72deb0923c023b3e84087a5ae2a94fd4daf43c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2015 11:45:34 -0400 Subject: [PATCH 0662/2091] Revert "libata: Implement support for sense data reporting" commit 84ded2f8e7dda336fc2fb3570726ceb3b3b3590f upstream. This reverts commit fe7173c206de63fc28475ee6ae42ff95c05692de. As implemented, ACS-4 sense reporting for ATA devices bypasses error diagnosis and handling in libata degrading EH behavior significantly. Revert the related changes for now. ATA_ID_COMMAND_SET_3/4 constants are not reverted as they're used by later changes. Signed-off-by: Tejun Heo Cc: Hannes Reinecke Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 20 +-------- drivers/ata/libata-eh.c | 86 ++------------------------------------- include/linux/ata.h | 16 -------- 3 files changed, 4 insertions(+), 118 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 336eb23f00b1e..fb2a208736ba6 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2147,24 +2147,6 @@ static int ata_dev_config_ncq(struct ata_device *dev, return 0; } -static void ata_dev_config_sense_reporting(struct ata_device *dev) -{ - unsigned int err_mask; - - if (!ata_id_has_sense_reporting(dev->id)) - return; - - if (ata_id_sense_reporting_enabled(dev->id)) - return; - - err_mask = ata_dev_set_feature(dev, SETFEATURE_SENSE_DATA, 0x1); - if (err_mask) { - ata_dev_dbg(dev, - "failed to enable Sense Data Reporting, Emask 0x%x\n", - err_mask); - } -} - /** * ata_dev_configure - Configure the specified ATA/ATAPI device * @dev: Target device to configure @@ -2387,7 +2369,7 @@ int ata_dev_configure(struct ata_device *dev) dev->devslp_timing[i] = sata_setting[j]; } } - ata_dev_config_sense_reporting(dev); + dev->cdb_len = 16; } diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index af08d32af4e0c..16125be34893b 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1629,70 +1629,6 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) return err_mask; } -/** - * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT - * @dev: device to perform REQUEST_SENSE_SENSE_DATA_EXT to - * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) - * @dfl_sense_key: default sense key to use - * - * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK - * SENSE. This function is EH helper. - * - * LOCKING: - * Kernel thread context (may sleep). - * - * RETURNS: - * encoded sense data on success, 0 on failure or if sense data - * is not available. - */ -static u32 ata_eh_request_sense(struct ata_queued_cmd *qc, - struct scsi_cmnd *cmd) -{ - struct ata_device *dev = qc->dev; - struct ata_taskfile tf; - unsigned int err_mask; - - if (!cmd) - return 0; - - DPRINTK("ATA request sense\n"); - ata_dev_warn(dev, "request sense\n"); - if (!ata_id_sense_reporting_enabled(dev->id)) { - ata_dev_warn(qc->dev, "sense data reporting disabled\n"); - return 0; - } - ata_tf_init(dev, &tf); - - tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; - tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; - tf.command = ATA_CMD_REQ_SENSE_DATA; - tf.protocol = ATA_PROT_NODATA; - - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); - /* - * ACS-4 states: - * The device may set the SENSE DATA AVAILABLE bit to one in the - * STATUS field and clear the ERROR bit to zero in the STATUS field - * to indicate that the command returned completion without an error - * and the sense data described in table 306 is available. - * - * IOW the 'ATA_SENSE' bit might not be set even though valid - * sense data is available. - * So check for both. - */ - if ((tf.command & ATA_SENSE) || - tf.lbah != 0 || tf.lbam != 0 || tf.lbal != 0) { - ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - ata_dev_warn(dev, "sense data %02x/%02x/%02x\n", - tf.lbah, tf.lbam, tf.lbal); - } else { - ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", - tf.command, err_mask); - } - return err_mask; -} - /** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to @@ -1896,22 +1832,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - /* - * Sense data reporting does not work if the - * device fault bit is set. - */ - if ((stat & ATA_SENSE) && !(stat & ATA_DF) && - !(qc->flags & ATA_QCFLAG_SENSE_VALID)) { - if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { - tmp = ata_eh_request_sense(qc, qc->scsicmd); - if (tmp) - qc->err_mask |= tmp; - } else { - ata_dev_warn(qc->dev, "sense data available but port frozen\n"); - } - } - - /* Set by NCQ autosense or request sense above */ + /* Set by NCQ autosense */ if (qc->flags & ATA_QCFLAG_SENSE_VALID) return 0; @@ -2658,15 +2579,14 @@ static void ata_eh_link_report(struct ata_link *link) #ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | - ATA_SENSE | ATA_ERR)) { + ATA_ERR)) { if (res->command & ATA_BUSY) ata_dev_err(qc->dev, "status: { Busy }\n"); else - ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", + ata_dev_err(qc->dev, "status: { %s%s%s%s}\n", res->command & ATA_DRDY ? "DRDY " : "", res->command & ATA_DF ? "DF " : "", res->command & ATA_DRQ ? "DRQ " : "", - res->command & ATA_SENSE ? "SENSE " : "", res->command & ATA_ERR ? "ERR " : ""); } diff --git a/include/linux/ata.h b/include/linux/ata.h index 533dbb6428f5a..3395536ca89b6 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -385,8 +385,6 @@ enum { SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ - SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ - /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, @@ -710,20 +708,6 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_3] & (1 << 3); } -static inline bool ata_id_has_sense_reporting(const u16 *id) -{ - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) - return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 6); -} - -static inline bool ata_id_sense_reporting_enabled(const u16 *id) -{ - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) - return false; - return id[ATA_ID_COMMAND_SET_4] & (1 << 6); -} - /** * ata_id_major_version - get ATA level of drive * @id: Identify data From 4cd8e7b8c016b3ba1104341fa86c000af33850aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2015 11:46:39 -0400 Subject: [PATCH 0663/2091] Revert "libata: Implement NCQ autosense" commit 74a80d67b8316eb3fbeb73dafc060a5a0a708587 upstream. This reverts commit 42b966fbf35da9c87f08d98f9b8978edf9e717cf. As implemented, ACS-4 sense reporting for ATA devices bypasses error diagnosis and handling in libata degrading EH behavior significantly. Revert the related changes for now. Signed-off-by: Tejun Heo Cc: Hannes Reinecke Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-eh.c | 18 ------------------ drivers/ata/libata-scsi.c | 9 ++------- drivers/ata/libata.h | 1 - include/linux/ata.h | 2 -- 4 files changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 16125be34893b..cb0508af1459a 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1592,8 +1592,6 @@ static int ata_eh_read_log_10h(struct ata_device *dev, tf->hob_lbah = buf[10]; tf->nsect = buf[12]; tf->hob_nsect = buf[13]; - if (ata_id_has_ncq_autosense(dev->id)) - tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; return 0; } @@ -1791,18 +1789,6 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) memcpy(&qc->result_tf, &tf, sizeof(tf)); qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; - if (qc->result_tf.auxiliary) { - char sense_key, asc, ascq; - - sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; - asc = (qc->result_tf.auxiliary >> 8) & 0xff; - ascq = qc->result_tf.auxiliary & 0xff; - ata_dev_dbg(dev, "NCQ Autosense %02x/%02x/%02x\n", - sense_key, asc, ascq); - ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - } - ehc->i.err_mask &= ~AC_ERR_DEV; } @@ -1832,10 +1818,6 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - /* Set by NCQ autosense */ - if (qc->flags & ATA_QCFLAG_SENSE_VALID) - return 0; - if (stat & (ATA_ERR | ATA_DF)) qc->err_mask |= AC_ERR_DEV; else diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e1ecd2ab37249..0d7f0da3a2692 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -270,11 +270,8 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); -void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { - if (!cmd) - return; - cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); @@ -1780,9 +1777,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) ((cdb[2] & 0x20) || need_sense)) { ata_gen_passthru_sense(qc); } else { - if (qc->flags & ATA_QCFLAG_SENSE_VALID) { - cmd->result = SAM_STAT_CHECK_CONDITION; - } else if (!need_sense) { + if (!need_sense) { cmd->result = SAM_STAT_GOOD; } else { /* TODO: decide which descriptor format to use diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 8cfdd9616d16c..f840ca18a7c01 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -137,7 +137,6 @@ extern int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht); extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); -extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); diff --git a/include/linux/ata.h b/include/linux/ata.h index 3395536ca89b6..5dfbcd8887bb5 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -528,8 +528,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) -#define ata_id_has_ncq_autosense(id) \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { From 2f9de0cc23d064bcae6756976a7560a55290514e Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 6 Aug 2015 16:21:05 +0200 Subject: [PATCH 0664/2091] cpuset: use trialcs->mems_allowed as a temp variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 24ee3cf89bef04e8bc23788aca4e029a3f0f06d9 upstream. The comment says it's using trialcs->mems_allowed as a temp variable but it didn't match the code. Change the code to match the comment. This fixes an issue when writing in cpuset.mems when a sub-directory exists: we need to write several times for the information to persist: | root@alban:/sys/fs/cgroup/cpuset# mkdir footest9 | root@alban:/sys/fs/cgroup/cpuset# cd footest9 | root@alban:/sys/fs/cgroup/cpuset/footest9# mkdir aa | root@alban:/sys/fs/cgroup/cpuset/footest9# cat cpuset.mems | | root@alban:/sys/fs/cgroup/cpuset/footest9# echo 0 > cpuset.mems | root@alban:/sys/fs/cgroup/cpuset/footest9# cat cpuset.mems | | root@alban:/sys/fs/cgroup/cpuset/footest9# echo 0 > cpuset.mems | root@alban:/sys/fs/cgroup/cpuset/footest9# cat cpuset.mems | 0 | root@alban:/sys/fs/cgroup/cpuset/footest9# cat aa/cpuset.mems | | root@alban:/sys/fs/cgroup/cpuset/footest9# echo 0 > aa/cpuset.mems | root@alban:/sys/fs/cgroup/cpuset/footest9# cat aa/cpuset.mems | 0 | root@alban:/sys/fs/cgroup/cpuset/footest9# This should help to fix the following issue in Docker: https://github.com/opencontainers/runc/issues/133 In some conditions, a Docker container needs to be started twice in order to work. Signed-off-by: Alban Crequy Tested-by: Iago López Galeiras Acked-by: Li Zefan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ee14e3a35a299..f0acff0f66c91 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1223,7 +1223,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, spin_unlock_irq(&callback_lock); /* use trialcs->mems_allowed as a temp variable */ - update_nodemasks_hier(cs, &cs->mems_allowed); + update_nodemasks_hier(cs, &trialcs->mems_allowed); done: return retval; } From 56577410e1e08afcc1a37d0b75d529e8be2f03d0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 19 Aug 2015 07:20:14 +0200 Subject: [PATCH 0665/2091] ALSA: usb-audio: Fix runtime PM unbalance commit 9003ebb13f61e8c78a641e0dda7775183ada0625 upstream. The fix for deadlock in PM in commit [1ee23fe07ee8: ALSA: usb-audio: Fix deadlocks at resuming] introduced a new check of in_pm flag. However, the brainless patch author evaluated it in a wrong way (logical AND instead of logical OR), thus usb_autopm_get_interface() is wrongly called at probing, leading to unbalance of runtime PM refcount. This patch fixes it by correcting the logic. Reported-by: Hans Yang Fixes: 1ee23fe07ee8 ('ALSA: usb-audio: Fix deadlocks at resuming') Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 1fab9778807a0..0450593980fd3 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -638,7 +638,7 @@ int snd_usb_autoresume(struct snd_usb_audio *chip) int err = -ENODEV; down_read(&chip->shutdown_rwsem); - if (chip->probing && chip->in_pm) + if (chip->probing || chip->in_pm) err = 0; else if (!chip->shutdown) err = usb_autopm_get_interface(chip->pm_intf); From 2e4728c6359824fcbd46dad5a3acc85f441ecf6f Mon Sep 17 00:00:00 2001 From: Woodrow Shen Date: Thu, 13 Aug 2015 11:20:40 +0800 Subject: [PATCH 0666/2091] ALSA: hda - Fix the white noise on Dell laptop commit 7ccb0a9917a511de1d5f92980f26885484d9a914 upstream. Dell laptop causes the white noise by login screen and headphone, and the fixup function ALC292_FIXUP_DISABLE_AAMIX can eliminate this noise. Codec: Realtek ALC3235 Vendor Id: 0x10ec0293 Subsystem Id: 0x102806db BugLink: https://bugs.launchpad.net/bugs/1484334 Signed-off-by: Woodrow Shen Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1e99f075a5ab7..91f6928560e18 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5119,6 +5119,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06db, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From e6212cca33e130cf5c5222a3f5b3608564c2cd49 Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Fri, 21 Aug 2015 09:48:35 +0200 Subject: [PATCH 0667/2091] ALSA: usb: Add native DSD support for Gustard DAC-X20U commit 9544f8b6e2ee9ed02d2322ff018837b185f51d45 upstream. This patch adds native DSD support for the Gustard DAC-X20U. Signed-off-by: Jurgen Kramer Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 754e689596a21..00ebc0ca008e0 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1268,6 +1268,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; + case USB_ID(0x20b1, 0x000a): /* Gustard DAC-X20U */ case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */ case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */ if (fp->altsetting == 3) From f27db157e2bfc330e7e6599b9427c7e9821d646f Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 21 Aug 2015 09:42:35 +0200 Subject: [PATCH 0668/2091] ALSA: hda - Shutdown CX20722 on reboot/free to avoid spurious noises commit f6b28e4ded45bb91bc4cd115d55e35badedfce5f upstream. On shutdown/reboot of CX20722, first shut down all EAPDs, then shut down the afg node to D3. Failure to do so can lead to spurious noises from the internal speaker directly after reboot (and before the codec is reinitialized again, i e in BIOS setup or GRUB menus). BugLink: https://bugs.launchpad.net/bugs/1487345 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 78b719b5b34dd..06cc9d57ba3da 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -200,12 +200,33 @@ static int cx_auto_init(struct hda_codec *codec) return 0; } -#define cx_auto_free snd_hda_gen_free +static void cx_auto_reboot_notify(struct hda_codec *codec) +{ + struct conexant_spec *spec = codec->spec; + + if (codec->core.vendor_id != 0x14f150f2) + return; + + /* Turn the CX20722 codec into D3 to avoid spurious noises + from the internal speaker during (and after) reboot */ + cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); + + snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); + snd_hda_codec_write(codec, codec->core.afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); +} + +static void cx_auto_free(struct hda_codec *codec) +{ + cx_auto_reboot_notify(codec); + snd_hda_gen_free(codec); +} static const struct hda_codec_ops cx_auto_patch_ops = { .build_controls = cx_auto_build_controls, .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, + .reboot_notify = cx_auto_reboot_notify, .free = cx_auto_free, .unsol_event = snd_hda_jack_unsol_event, #ifdef CONFIG_PM From 93bebe57621df3753f8b533dbfbd055438d52534 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 24 Aug 2015 10:45:27 +0200 Subject: [PATCH 0669/2091] ALSA: hda - Check all inputs for is_active_nid_for_any() commit 9d2b48f7304aafaefbf0794a556ab4e307929d24 upstream. The is_active_nid_for_any() function in the generic parser is supposed to check all connections from/to the given widget, but the current code checks only the first input connection (index = 0). This patch corrects the code to check all inputs by passing -1 to index argument. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=102521 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index ac0db1679f098..7e896092c6211 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -671,7 +671,8 @@ static bool is_active_nid(struct hda_codec *codec, hda_nid_t nid, } for (i = 0; i < path->depth; i++) { if (path->path[i] == nid) { - if (dir == HDA_OUTPUT || path->idx[i] == idx) + if (dir == HDA_OUTPUT || idx == -1 || + path->idx[i] == idx) return true; break; } @@ -682,7 +683,7 @@ static bool is_active_nid(struct hda_codec *codec, hda_nid_t nid, /* check whether the NID is referred by any active paths */ #define is_active_nid_for_any(codec, nid) \ - is_active_nid(codec, nid, HDA_OUTPUT, 0) + is_active_nid(codec, nid, HDA_OUTPUT, -1) /* get the default amp value for the target state */ static int get_amp_val_to_activate(struct hda_codec *codec, hda_nid_t nid, From ac44e50e61aaa6ef939970ad1532ce7a274a0ca5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 24 Aug 2015 10:52:06 +0200 Subject: [PATCH 0670/2091] ALSA: hda - Fix path power activation commit c7cd0ef66aade29e37ee08821a0e195ee776c6e6 upstream. The widget power-saving code tries to turn up/down the power of each widget in the I/O paths that are modified at each jack plug/unplug. The recent report revealed that the power activation leaves some widgets unpowered after plugging. This is because snd_hda_activate_path() turns on path->active flag at the end of the function while the path power management is done before that. Then it's regarded as if nothing is active, and the driver turns off the power. The fix is simply to set the flag at the beginning of the function, before trying to power up. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=102521 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 7e896092c6211..5bc7f2e2715cb 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -884,8 +884,7 @@ void snd_hda_activate_path(struct hda_codec *codec, struct nid_path *path, struct hda_gen_spec *spec = codec->spec; int i; - if (!enable) - path->active = false; + path->active = enable; /* make sure the widget is powered up */ if (enable && (spec->power_down_unused || codec->power_save_node)) @@ -903,9 +902,6 @@ void snd_hda_activate_path(struct hda_codec *codec, struct nid_path *path, if (has_amp_out(codec, path, i)) activate_amp_out(codec, path, i, enable); } - - if (enable) - path->active = true; } EXPORT_SYMBOL_GPL(snd_hda_activate_path); From 360c2a1448754b76fbf742b6270f5da1d4db3700 Mon Sep 17 00:00:00 2001 From: Markus Osterhoff Date: Mon, 24 Aug 2015 14:11:39 +0200 Subject: [PATCH 0671/2091] ALSA: hda: fix possible NULL dereference commit c7e69ae6b4ff49edf50180c0a32f3dd9d7967e31 upstream. After a for-loop was replaced by list_for_each_entry, see Commit bbbc7e8502c9 ("ALSA: hda - Allocate hda_pcm objects dynamically"), Commit 751e2216899c ("ALSA: hda: fix possible null dereference"), a possible NULL pointer dereference has been introduced; this patch adds the NULL check on pcm->pcm, while leaving a potentially superfluous check on pcm itself untouched. Signed-off-by: Markus Osterhoff Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 5645481af3d95..36e8f12366371 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3259,7 +3259,7 @@ static int add_std_chmaps(struct hda_codec *codec) struct snd_pcm_chmap *chmap; const struct snd_pcm_chmap_elem *elem; - if (!pcm || pcm->own_chmap || + if (!pcm || !pcm->pcm || pcm->own_chmap || !hinfo->substreams) continue; elem = hinfo->chmap ? hinfo->chmap : snd_pcm_std_chmaps; From 82cb8352e372319b2fc57bd6ed34c726a37fe24b Mon Sep 17 00:00:00 2001 From: Adrien Schildknecht Date: Tue, 28 Jul 2015 10:30:16 +0200 Subject: [PATCH 0672/2091] mac80211: fix invalid read in minstrel_sort_best_tp_rates() commit f5eeb5fa191fd7b634cbc4883ac58f3b2184dbc5 upstream. At the last iteration of the loop, j may equal zero and thus tp_list[j - 1] causes an invalid read. Change the logic of the loop so that j - 1 is always >= 0. Signed-off-by: Adrien Schildknecht Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rc80211_minstrel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 247552a7f6c2f..3ece7d1034c81 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -92,14 +92,15 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) static inline void minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) { - int j = MAX_THR_RATES; - struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats; + int j; + struct minstrel_rate_stats *tmp_mrs; struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) > - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) { - j--; + for (j = MAX_THR_RATES; j > 0; --j) { tmp_mrs = &mi->r[tp_list[j - 1]].stats; + if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <= + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma)) + break; } if (j < MAX_THR_RATES - 1) From e1768fd6fdba1345321ab350f60ec109cb832c01 Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Tue, 21 Jul 2015 15:07:56 -0700 Subject: [PATCH 0673/2091] target/iscsi: Fix double free of a TUR followed by a solicited NOPOUT commit 9547308bda296b6f69876c840a0291fcfbeddbb8 upstream. Make sure all non-READ SCSI commands get targ_xfer_tag initialized to 0xffffffff, not just WRITEs. Double-free of a TUR cmd object occurs under the following scenario: 1. TUR received (targ_xfer_tag is uninitialized and left at 0) 2. TUR status sent 3. First unsolicited NOPIN is sent to initiator (gets targ_xfer_tag of 0) 4. NOPOUT for NOPIN (with TTT=0) arrives - its ExpStatSN acks TUR status, TUR is queued for removal - LIO tries to find NOPIN with TTT=0, but finds the same TUR instead, TUR is queued for removal for the 2nd time (Drop unbalanced conditional bracket usage - nab) Signed-off-by: Alexei Potashnik Signed-off-by: Spencer Baugh Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 305a5cbc099a6..0ab6e2efd28ce 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -968,9 +968,9 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA; conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; - if (hdr->flags & ISCSI_FLAG_CMD_READ) { + if (hdr->flags & ISCSI_FLAG_CMD_READ) cmd->targ_xfer_tag = session_get_next_ttt(conn->sess); - } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE) + else cmd->targ_xfer_tag = 0xFFFFFFFF; cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); From 7aadce453a529fdca8d8e2d4d7d494f9646748c4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 20 Aug 2015 00:08:15 -0500 Subject: [PATCH 0674/2091] PCI: Don't use 64-bit bus addresses on PA-RISC commit 45ea2a5fed6dacb9bb0558d8b21eacc1c45d5bb4 upstream. Meelis and Helge reported that 3a9ad0b4fdcd ("PCI: Add pci_bus_addr_t") caused HPMCs on A500 and hangs on rp5470. PA-RISC does not set ARCH_DMA_ADDR_T_64BIT, even for 64-bit kernels, so prior to 3a9ad0b4fdcd, we always used 32-bit PCI addresses. After 3a9ad0b4fdcd, we do use 64-bit PCI addresses in 64-bit kernels, and apparently there's some PA-RISC problem related to them. Fixes: 3a9ad0b4fdcd ("PCI: Add pci_bus_addr_t") Link: http://lkml.kernel.org/r/alpine.LRH.2.11.1507260929000.30065@math.ut.ee Reported-by: Meelis Roos Reported-by: Helge Deller Tested-by: Helge Deller Based-on-idea-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: Yinghai Lu Signed-off-by: Greg Kroah-Hartman --- drivers/pci/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 73de4efcbe6ed..944f50015ed07 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -2,7 +2,7 @@ # PCI configuration # config PCI_BUS_ADDR_T_64BIT - def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) + def_bool y if (ARCH_DMA_ADDR_T_64BIT || (64BIT && !PARISC)) depends on PCI config PCI_MSI From ca1868ab9206637d98316a19e56a477b7a69432d Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Thu, 20 Aug 2015 12:00:19 -0700 Subject: [PATCH 0675/2091] Input: gpio_keys_polled - request GPIO pin as input. commit 1ae5ddb6f8837558928a1a694c7b8af7f09fdd21 upstream. GPIOF_IN flag was lost in: Commit 633a21d80b4a("input: gpio_keys_polled: Add support for GPIO descriptors"). Without this flag, legacy code path (for non-descriptor GPIO declarations) would configure GPIO as output (0 meaning GPIOF_DIR_OUT | GPIOF_INIT_LOW). Signed-off-by: Vincent Pelletier Reviewed-by: Mika Westerberg Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/gpio_keys_polled.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index 097d7216d98ee..c6dc644aa5806 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -246,7 +246,7 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) * convert it to descriptor. */ if (!button->gpiod && gpio_is_valid(button->gpio)) { - unsigned flags = 0; + unsigned flags = GPIOF_IN; if (button->active_low) flags |= GPIOF_ACTIVE_LOW; From c407ed50a8f9d0be5839a7af9a4a6e14aea8cb33 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 14 Aug 2015 13:58:20 +0200 Subject: [PATCH 0676/2091] drm/atmel-hlcdc: Compile suspend/resume for PM_SLEEP only commit dbb3df2d58754e4df58620e60370d166c2cb6744 upstream. If PM is enabled but PM_SLEEP is disabled, the suspend/resume functions are still unused and produce a compiler warning. Signed-off-by: Thierry Reding Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index 60b0c13d7ff5c..aebc4595afa02 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -559,7 +559,7 @@ static int atmel_hlcdc_dc_drm_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int atmel_hlcdc_dc_drm_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); From 8de8663a93eee38e82bcdd511d2b4f9e99de02a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Aug 2015 12:59:19 +0100 Subject: [PATCH 0677/2091] drm/i915: Flag the execlists context object as dirty after every use commit 903ecd0bb970438c3a60c2c33ec9032d6443bf67 upstream. Everytime we use the logical context with execlists it becomes dirty (as the hardware will write the new register values afterwards, as well as the GPU state that will be used). We need to then flag the context as dirty everytime since after a swap-out/swap-in cycle the dirty flag will be cleared, and a further swap-out cycle will then loose the most recent GPU state. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 424e621977871..9ab7c1c758aeb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -848,6 +848,8 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); if (ret) goto unpin_ctx_obj; + + ctx_obj->dirty = true; } return ret; From d0cd6730fd070442e97dd6bc32fc315dc40f1068 Mon Sep 17 00:00:00 2001 From: "Thulasimani,Sivakumar" Date: Tue, 18 Aug 2015 11:07:57 +0530 Subject: [PATCH 0678/2091] drm/i915: remove HBR2 from chv supported list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e86dfe39f54ab13fd8079ac3d6cb100318909a3 upstream. This patch removes 5.4Gbps from supported link rate for CHV since it is not supported in it. v2: change the ordering for better readability (Ville) Reviewed-by: Ville Syrjälä Signed-off-by: Sivakumar Thulasimani Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index d714a4b5711e4..dd58375c73395 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1163,11 +1163,12 @@ intel_dp_source_rates(struct drm_device *dev, const int **source_rates) *source_rates = default_rates; + /* WaDisableHBR2:skl */ if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) - /* WaDisableHBR2:skl */ return (DP_LINK_BW_2_7 >> 3) + 1; - else if (INTEL_INFO(dev)->gen >= 8 || - (IS_HASWELL(dev) && !IS_HSW_ULX(dev))) + + if ((IS_HASWELL(dev) && !IS_HSW_ULX(dev)) || IS_BROADWELL(dev) || + (INTEL_INFO(dev)->gen >= 9)) return (DP_LINK_BW_5_4 >> 3) + 1; else return (DP_LINK_BW_2_7 >> 3) + 1; From 5598cbeda80ea71125855e9ffa6777ded745af5c Mon Sep 17 00:00:00 2001 From: "Thulasimani,Sivakumar" Date: Tue, 18 Aug 2015 15:30:37 +0530 Subject: [PATCH 0679/2091] drm/i915: Avoid TP3 on CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ed63baaf849e91c84ac3e042b1fd6a0af07c16f3 upstream. This patch removes TP3 support on CHV since there is no support for HBR2 on this platform. v2: rename the function to indicate it checks source rates (Jani) v3: update comment to indicate TP3 dependency on HBR2 supported hardware (Jani) Reviewed-by: Ville Syrjälä Signed-off-by: Sivakumar Thulasimani [Jani: fixed a couple of checkpatch warnings.] Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index dd58375c73395..b1fe32b119ef8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1150,6 +1150,19 @@ intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) return (intel_dp_max_link_bw(intel_dp) >> 3) + 1; } +static bool intel_dp_source_supports_hbr2(struct drm_device *dev) +{ + /* WaDisableHBR2:skl */ + if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) + return false; + + if ((IS_HASWELL(dev) && !IS_HSW_ULX(dev)) || IS_BROADWELL(dev) || + (INTEL_INFO(dev)->gen >= 9)) + return true; + else + return false; +} + static int intel_dp_source_rates(struct drm_device *dev, const int **source_rates) { @@ -1163,12 +1176,8 @@ intel_dp_source_rates(struct drm_device *dev, const int **source_rates) *source_rates = default_rates; - /* WaDisableHBR2:skl */ - if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) - return (DP_LINK_BW_2_7 >> 3) + 1; - - if ((IS_HASWELL(dev) && !IS_HSW_ULX(dev)) || IS_BROADWELL(dev) || - (INTEL_INFO(dev)->gen >= 9)) + /* This depends on the fact that 5.4 is last value in the array */ + if (intel_dp_source_supports_hbr2(dev)) return (DP_LINK_BW_5_4 >> 3) + 1; else return (DP_LINK_BW_2_7 >> 3) + 1; @@ -3784,10 +3793,15 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) } } - /* Training Pattern 3 support, both source and sink */ + /* Training Pattern 3 support, Intel platforms that support HBR2 alone + * have support for TP3 hence that check is used along with dpcd check + * to ensure TP3 can be enabled. + * SKL < B0: due it's WaDisableHBR2 is the only exception where TP3 is + * supported but still not enabled. + */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x12 && intel_dp->dpcd[DP_MAX_LANE_COUNT] & DP_TPS3_SUPPORTED && - (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)) { + intel_dp_source_supports_hbr2(dev)) { intel_dp->use_tps3 = true; DRM_DEBUG_KMS("Displayport TPS3 supported\n"); } else From e18d264c4e706faf0dcb25abb3ebaa82ff4e8210 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 15 Aug 2015 15:49:13 +0200 Subject: [PATCH 0680/2091] 9p: ensure err is initialized to 0 in p9_client_read/write commit 999b8b88c6060adf7a9b7907740ae86ace65291e upstream. Some use of those functions were providing unitialized values to those functions. Notably, when reading 0 bytes from an empty file on a 9P filesystem, the return code of read() was not 0. Tested with this simple program: #include #include #include #include #include int main(int argc, const char **argv) { assert(argc == 2); char buffer[256]; int fd = open(argv[1], O_RDONLY|O_NOCTTY); assert(fd >= 0); assert(read(fd, buffer, 0) == 0); return 0; } Signed-off-by: Vincent Bernat Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- net/9p/client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/9p/client.c b/net/9p/client.c index 81925b9233185..fcf6fe063d826 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1541,6 +1541,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (unsigned long long) offset, (int)iov_iter_count(to)); @@ -1616,6 +1617,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", fid->fid, (unsigned long long) offset, From f10bb883e421d4a35e9849515bdf1926278d8cd8 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Aug 2015 15:20:27 +0300 Subject: [PATCH 0681/2091] irqchip/crossbar: Restore the irq_set_type() mechanism commit e269ec42328783e51be08c191aa935dba56141fc upstream. The conversion of the crossbar irqchip to hierarchical irq domains failed to provide a mechanism to properly set the trigger type of an interrupt. The crossbar irq chip itself has no mechanism and therefor no irq_set_type() callback. The code before the conversion relayed the trigger configuration directly to the underlying GIC. Restore the correct behaviour by setting the crossbar irq_set_type callback to irq_chip_set_type_parent(). This propagates the set_trigger() call to the underlying GIC irqchip. [ tglx: Massaged changelog ] Fixes: 783d31863fb8 ('irqchip: crossbar: Convert dra7 crossbar...') Signed-off-by: Grygorii Strashko Cc: Sudeep Holla Cc: Cc: Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1439554830-19502-4-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-crossbar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 692fe2bc81979..3ba58e7b4724c 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -69,6 +69,7 @@ static struct irq_chip crossbar_chip = { .irq_unmask = irq_chip_unmask_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_wake = irq_chip_set_wake_parent, + .irq_set_type = irq_chip_set_type_parent, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif From 0c7ff29d25807ae4be8fbea3b2b1d3d8fef8455d Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Aug 2015 15:20:29 +0300 Subject: [PATCH 0682/2091] irqchip/crossbar: Restore the mask on suspend behaviour commit 4fd8f47e7e5b64a74b60f23c2e08ba8234d659d1 upstream. The ARM GIC requires that all interrupts which are not used as a wakeup source have to be masked during suspend. The conversion of the crossbar irqchip to hierarchical irq domains failed to mark the crossbar irqchip with the IRQCHIP_MASK_ON_SUSPEND flag and therefor broke the suspend requirement of the GIC. Before the conversion the flags were visible because the GIC was the top level irqchip. After the conversion the crossbar irqchip is the top level irq chip whose flags are evaluated in suspend_device_irq(). As the flag is not set the masking of the non-wakeup irqs is not invoked which breaks suspend. Add the IRQCHIP_MASK_ON_SUSPEND flag to the crossbar irqchip, so the GIC interrupts get masked properly. [ tglx: Massaged changelog ] Fixes: 783d31863fb8 ('irqchip: crossbar: Convert dra7 crossbar...') Signed-off-by: Grygorii Strashko Cc: Sudeep Holla Cc: Cc: Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1439554830-19502-6-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-crossbar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 3ba58e7b4724c..f5a72ccb4376a 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -70,6 +70,7 @@ static struct irq_chip crossbar_chip = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_wake = irq_chip_set_wake_parent, .irq_set_type = irq_chip_set_type_parent, + .flags = IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif From 8e5c21ff49d4f033c102ffd6ff1c76a660f71715 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Aug 2015 15:20:30 +0300 Subject: [PATCH 0683/2091] irqchip/crossbar: Restore set_wake functionality commit 8200fe4347870d4ad6475048bcdf3e7c106c5268 upstream. The TI crossbar irqchip doesn't provides any facility to configure the wakeup sources, but the conversion to hierarchical irqdomains set the irq_set_wake callback to irq_chip_set_wake_parent. The parent chip (OMAP wakeupgen) has no irq_set_wake function either so the call will fail with -ENOSYS. As a result the irq_set_wake() call in the resume path will trigger an 'Unbalanced wake disable' warning. Before the conversion the GIC irqchip was the top level irqchip and correctly flagged with IRQCHIP_SKIP_SET_WAKE. Restore the correct behaviour by removing the irq_set_type callback from the crossbar irqchip and set the IRQCHIP_SKIP_SET_WAKE flag which lets the irq_set_irq_wake() call from the driver succeed. [ tglx: Massaged changelog ] Fixes: 783d31863fb8 ('irqchip: crossbar: Convert dra7 crossbar...') Signed-off-by: Grygorii Strashko Cc: Sudeep Holla Cc: Cc: Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1439554830-19502-7-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-crossbar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index f5a72ccb4376a..c12bb93334ff9 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -68,9 +68,9 @@ static struct irq_chip crossbar_chip = { .irq_mask = irq_chip_mask_parent, .irq_unmask = irq_chip_unmask_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, - .irq_set_wake = irq_chip_set_wake_parent, .irq_set_type = irq_chip_set_type_parent, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif From f683ad037883682204b83185991fbd03282b079b Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Aug 2015 15:20:28 +0300 Subject: [PATCH 0684/2091] ARM: OMAP: wakeupgen: Restore the irq_set_type() mechanism commit 63059a272398ef5dc1bd7065a036e8b6e82d1af7 upstream. The conversion of the wakeupgen irqchip to hierarchical irq domains failed to provide a mechanism to properly set the trigger type of an interrupt. The wakeupgen irq chip itself has no mechanism and therefor no irq_set_type() callback. The code before the conversion relayed the trigger configuration directly to the underlying GIC. Restore the correct behaviour by setting the wakeupgen irq_set_type callback to irq_chip_set_type_parent(). This propagates the set_trigger() call to the underlying GIC irqchip. [ tglx: Massaged changelog ] Fixes: 7136d457f365 ('ARM: omap: convert wakeupgen to stacked domains') Signed-off-by: Grygorii Strashko Acked-by: Tony Lindgren Cc: Sudeep Holla Cc: Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1439554830-19502-5-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap-wakeupgen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 3b56722dfd8a9..6833df45d7b1d 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = { .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, From 533eb4c087ba86e232ac9b49db9e553e1bdb4c59 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Aug 2015 15:20:25 +0300 Subject: [PATCH 0685/2091] genirq: Don't return ENOSYS in irq_chip_retrigger_hierarchy commit 6d4affea7d5aa5ca5ff4c3e5fbf3ee16801cc527 upstream. irq_chip_retrigger_hierarchy() returns -ENOSYS if it was not able to find at least one .irq_retrigger() callback implemented in the IRQ domain hierarchy. That's wrong, because check_irq_resend() expects a 0 return value from the callback in case that the hardware assisted resend was not possible. If the return value is non zero the core code assumes hardware resend success and the software resend is not invoked. This results in lost interrupts on platforms where none of the parent irq chips in the hierarchy implements the retrigger callback. This is observable on TI OMAP, where the hierarchy is: ARM GIC <- OMAP wakeupgen <- TI Crossbar Return 0 instead so the software resend mechanism gets invoked. [ tglx: Massaged changelog ] Fixes: 85f08c17de26 ('genirq: Introduce helper functions...') Signed-off-by: Grygorii Strashko Reviewed-by: Marc Zyngier Reviewed-by: Jiang Liu Cc: Sudeep Holla Cc: Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1439554830-19502-2-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index eb9a4ea394ab3..3e03c79c1829d 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -946,7 +946,7 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data) if (data->chip && data->chip->irq_retrigger) return data->chip->irq_retrigger(data); - return -ENOSYS; + return 0; } /** From 0c7e8b8a0a4c216d5f9d1b6115858f70ede814ed Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Aug 2015 15:20:26 +0300 Subject: [PATCH 0686/2091] genirq: Introduce irq_chip_set_type_parent() helper commit b7560de198222994374c1340a389f12d5efb244a upstream. This helper is required for irq chips which do not implement a irq_set_type callback and need to call down the irq domain hierarchy for the actual trigger type change. This helper is required to fix further wreckage caused by the conversion of TI OMAP to hierarchical irq domains and therefor tagged for stable. [ tglx: Massaged changelog ] Signed-off-by: Grygorii Strashko Cc: Sudeep Holla Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: stable@vger.kernel.org # 4.1 Link: http://lkml.kernel.org/r/1439554830-19502-3-git-send-email-grygorii.strashko@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/irq.h | 1 + kernel/irq/chip.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 62c6901cab550..3532dca843f46 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -467,6 +467,7 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force); extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); +extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); #endif /* Handling of unhandled and spurious interrupts: */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3e03c79c1829d..94bbd8fee90da 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -933,6 +933,23 @@ int irq_chip_set_affinity_parent(struct irq_data *data, return -ENOSYS; } +/** + * irq_chip_set_type_parent - Set IRQ type on the parent interrupt + * @data: Pointer to interrupt specific data + * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h + * + * Conditional, as the underlying parent chip might not implement it. + */ +int irq_chip_set_type_parent(struct irq_data *data, unsigned int type) +{ + data = data->parent_data; + + if (data->chip->irq_set_type) + return data->chip->irq_set_type(data, type); + + return -ENOSYS; +} + /** * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware * @data: Pointer to interrupt specific data From ec970fe98aa5d81215c3ba9729793d166b152966 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 17 Aug 2015 11:02:42 -0400 Subject: [PATCH 0687/2091] SCSI: Fix NULL pointer dereference in runtime PM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 49718f0fb8c9af192b33d8af3a2826db04025371 upstream. The routines in scsi_rpm.c assume that if a runtime-PM callback is invoked for a SCSI device, it can only mean that the device's driver has asked the block layer to handle the runtime power management (by calling blk_pm_runtime_init(), which among other things sets q->dev). However, this assumption turns out to be wrong for things like the ses driver. Normally ses devices are not allowed to do runtime PM, but userspace can override this setting. If this happens, the kernel gets a NULL pointer dereference when blk_post_runtime_resume() tries to use the uninitialized q->dev pointer. This patch fixes the problem by calling the block layer's runtime-PM routines only if the device's driver really does have a runtime-PM callback routine. Since ses doesn't define any such callbacks, the crash won't occur. This fixes Bugzilla #101371. Signed-off-by: Alan Stern Reported-by: Stanisław Pitucha Reported-by: Ilan Cohen Tested-by: Ilan Cohen Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_pm.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index 9e43ae1d2163d..e4b7998379485 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -217,15 +217,15 @@ static int sdev_runtime_suspend(struct device *dev) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; struct scsi_device *sdev = to_scsi_device(dev); - int err; + int err = 0; - err = blk_pre_runtime_suspend(sdev->request_queue); - if (err) - return err; - if (pm && pm->runtime_suspend) + if (pm && pm->runtime_suspend) { + err = blk_pre_runtime_suspend(sdev->request_queue); + if (err) + return err; err = pm->runtime_suspend(dev); - blk_post_runtime_suspend(sdev->request_queue, err); - + blk_post_runtime_suspend(sdev->request_queue, err); + } return err; } @@ -248,11 +248,11 @@ static int sdev_runtime_resume(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int err = 0; - blk_pre_runtime_resume(sdev->request_queue); - if (pm && pm->runtime_resume) + if (pm && pm->runtime_resume) { + blk_pre_runtime_resume(sdev->request_queue); err = pm->runtime_resume(dev); - blk_post_runtime_resume(sdev->request_queue, err); - + blk_post_runtime_resume(sdev->request_queue, err); + } return err; } From cad4bab399358cbcfe3c73f9da69bc05456ffa12 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 6 Aug 2015 09:48:34 +0200 Subject: [PATCH 0688/2091] can: pcan_usb: don't provide CAN FD bittimings by non-FD adapters commit 06b23f7fbbf26a025fd68395c7586949db586b47 upstream. The CAN FD data bittiming constants are provided via netlink only when there are valid CAN FD constants available in priv->data_bittiming_const. Due to the indirection of pointer assignments in the peak_usb driver the priv->data_bittiming_const never becomes NULL - not even for non-FD adapters. The data_bittiming_const points to zero'ed data which leads to this result when running 'ip -details link show can0': 35: can0: mtu 16 qdisc noop state DOWN mode DEFAULT group default qlen 10 link/can promiscuity 0 can state STOPPED restart-ms 0 pcan_usb: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 : dtseg1 0..0 dtseg2 0..0 dsjw 1..0 dbrp 0..0 dbrp-inc 0 <== BROKEN! clock 8000000 This patch changes the struct peak_usb_adapter::bittiming_const and struct peak_usb_adapter::data_bittiming_const to pointers to fix the assignemnt problems. Reported-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb.c | 24 ++--- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 4 +- drivers/net/can/usb/peak_usb/pcan_usb_core.h | 4 +- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 96 +++++++++++--------- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 24 ++--- 5 files changed, 82 insertions(+), 70 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 72427f21edffa..edfec540c8939 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -855,6 +855,18 @@ static int pcan_usb_probe(struct usb_interface *intf) /* * describe the PCAN-USB adapter */ +static const struct can_bittiming_const pcan_usb_const = { + .name = "pcan_usb", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 64, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb = { .name = "PCAN-USB", .device_id = PCAN_USB_PRODUCT_ID, @@ -863,17 +875,7 @@ const struct peak_usb_adapter pcan_usb = { .clock = { .freq = PCAN_USB_CRYSTAL_HZ / 2 , }, - .bittiming_const = { - .name = "pcan_usb", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 64, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb), diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 7921cff93a63b..5a2e341a6d1ea 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -792,9 +792,9 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, dev->ep_msg_out = peak_usb_adapter->ep_msg_out[ctrl_idx]; dev->can.clock = peak_usb_adapter->clock; - dev->can.bittiming_const = &peak_usb_adapter->bittiming_const; + dev->can.bittiming_const = peak_usb_adapter->bittiming_const; dev->can.do_set_bittiming = peak_usb_set_bittiming; - dev->can.data_bittiming_const = &peak_usb_adapter->data_bittiming_const; + dev->can.data_bittiming_const = peak_usb_adapter->data_bittiming_const; dev->can.do_set_data_bittiming = peak_usb_set_data_bittiming; dev->can.do_set_mode = peak_usb_set_mode; dev->can.do_get_berr_counter = peak_usb_adapter->do_get_berr_counter; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index 9e624f05ad4d9..506fe506c9d37 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -48,8 +48,8 @@ struct peak_usb_adapter { u32 device_id; u32 ctrlmode_supported; struct can_clock clock; - const struct can_bittiming_const bittiming_const; - const struct can_bittiming_const data_bittiming_const; + const struct can_bittiming_const * const bittiming_const; + const struct can_bittiming_const * const data_bittiming_const; unsigned int ctrl_count; int (*intf_probe)(struct usb_interface *intf); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 09d14e70abd74..ce44a033f63bb 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -990,6 +990,30 @@ static void pcan_usb_fd_free(struct peak_usb_device *dev) } /* describes the PCAN-USB FD adapter */ +static const struct can_bittiming_const pcan_usb_fd_const = { + .name = "pcan_usb_fd", + .tseg1_min = 1, + .tseg1_max = 64, + .tseg2_min = 1, + .tseg2_max = 16, + .sjw_max = 16, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_fd_data_const = { + .name = "pcan_usb_fd", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb_fd = { .name = "PCAN-USB FD", .device_id = PCAN_USBFD_PRODUCT_ID, @@ -999,28 +1023,8 @@ const struct peak_usb_adapter pcan_usb_fd = { .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, - .bittiming_const = { - .name = "pcan_usb_fd", - .tseg1_min = 1, - .tseg1_max = 64, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, - .data_bittiming_const = { - .name = "pcan_usb_fd", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_fd_const, + .data_bittiming_const = &pcan_usb_fd_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), @@ -1058,6 +1062,30 @@ const struct peak_usb_adapter pcan_usb_fd = { }; /* describes the PCAN-USB Pro FD adapter */ +static const struct can_bittiming_const pcan_usb_pro_fd_const = { + .name = "pcan_usb_pro_fd", + .tseg1_min = 1, + .tseg1_max = 64, + .tseg2_min = 1, + .tseg2_max = 16, + .sjw_max = 16, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + +static const struct can_bittiming_const pcan_usb_pro_fd_data_const = { + .name = "pcan_usb_pro_fd", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb_pro_fd = { .name = "PCAN-USB Pro FD", .device_id = PCAN_USBPROFD_PRODUCT_ID, @@ -1067,28 +1095,8 @@ const struct peak_usb_adapter pcan_usb_pro_fd = { .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, - .bittiming_const = { - .name = "pcan_usb_pro_fd", - .tseg1_min = 1, - .tseg1_max = 64, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, - .data_bittiming_const = { - .name = "pcan_usb_pro_fd", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_pro_fd_const, + .data_bittiming_const = &pcan_usb_pro_fd_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index dec51717635e9..a5ad2e6aa73a4 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -1004,6 +1004,18 @@ int pcan_usb_pro_probe(struct usb_interface *intf) /* * describe the PCAN-USB Pro adapter */ +static const struct can_bittiming_const pcan_usb_pro_const = { + .name = "pcan_usb_pro", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 1024, + .brp_inc = 1, +}; + const struct peak_usb_adapter pcan_usb_pro = { .name = "PCAN-USB Pro", .device_id = PCAN_USBPRO_PRODUCT_ID, @@ -1012,17 +1024,7 @@ const struct peak_usb_adapter pcan_usb_pro = { .clock = { .freq = PCAN_USBPRO_CRYSTAL_HZ, }, - .bittiming_const = { - .name = "pcan_usb_pro", - .tseg1_min = 1, - .tseg1_max = 16, - .tseg2_min = 1, - .tseg2_max = 8, - .sjw_max = 4, - .brp_min = 1, - .brp_max = 1024, - .brp_inc = 1, - }, + .bittiming_const = &pcan_usb_pro_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_pro_device), From df56874d1424bdc220a8197dbfff59c4a2b529e6 Mon Sep 17 00:00:00 2001 From: "Guillermo A. Amaral" Date: Tue, 25 Aug 2015 23:29:13 -0700 Subject: [PATCH 0689/2091] Add factory recertified Crucial M500s to blacklist commit 7a7184b01aa9deb86df661c6f7cbcf69a95b728c upstream. The Crucial M500 is known to have issues with queued TRIM commands, the factory recertified SSDs use a different model number naming convention which causes them to get ignored by the blacklist. The new naming convention boils down to: s/Crucial_/FC/ Signed-off-by: Guillermo A. Amaral Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fb2a208736ba6..e0064d180f041 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4230,6 +4230,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 8*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* devices that don't properly handle TRIM commands */ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, From 6d02086227037e9dea0e15ae0b59cb9910f047e1 Mon Sep 17 00:00:00 2001 From: Hiral Shah Date: Tue, 14 Jul 2015 07:08:57 -0700 Subject: [PATCH 0690/2091] fnic: Use the local variable instead of I/O flag to acquire io_req_lock in fnic_queuecommand() to avoid deadloack commit db196935d9562abec4510f48d887bc1f1e054fcf upstream. We added changes in fnic driver patch 1.6.0.16 to acquire io_req_lock in fnic_queuecommand() before issuing I/O so that io completion is serialized. But when releasing the lock we check for the I/O flag and this could be modified if IO abort occurs before I/O completion. In this case we wont release the lock and causes deadlock in some scenerios. Using the local variable to check the IO lock status will resolve the problem. Fixes: 41df7b02db82cf6c14f094757bac3830d10a827f Signed-off-by: Hiral Shah Signed-off-by: Sesidhar Baddela Signed-off-by: Anil Chintalapati Reviewed-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/fnic/fnic.h | 2 +- drivers/scsi/fnic/fnic_scsi.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 26270c351624f..ce129e595b55b 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -39,7 +39,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.6.0.17" +#define DRV_VERSION "1.6.0.17a" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 155b286f1a9d3..25436cd2860cc 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -425,6 +425,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ unsigned long ptr; struct fc_rport_priv *rdata; spinlock_t *io_lock = NULL; + int io_lock_acquired = 0; if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED))) return SCSI_MLQUEUE_HOST_BUSY; @@ -518,6 +519,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ spin_lock_irqsave(io_lock, flags); /* initialize rest of io_req */ + io_lock_acquired = 1; io_req->port_id = rport->port_id; io_req->start_time = jiffies; CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; @@ -571,7 +573,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc))); /* if only we issued IO, will we have the io lock */ - if (CMD_FLAGS(sc) & FNIC_IO_INITIALIZED) + if (io_lock_acquired) spin_unlock_irqrestore(io_lock, flags); atomic_dec(&fnic->in_flight); From f223ee716ec995fd863e5ba1006150c3ea03defc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Aug 2015 16:10:01 +0100 Subject: [PATCH 0691/2091] arm64: KVM: Fix host crash when injecting a fault into a 32bit guest commit 126c69a0bd0e441bf6766a5d9bf20de011be9f68 upstream. When injecting a fault into a misbehaving 32bit guest, it seems rather idiotic to also inject a 64bit fault that is only going to corrupt the guest state. This leads to a situation where we perform an illegal exception return at EL2 causing the host to crash instead of killing the guest. Just fix the stupid bug that has been there from day 1. Reported-by: Russell King Tested-by: Russell King Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/inject_fault.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index f02530e726f69..85c57158dcd96 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, false, addr); - - inject_abt64(vcpu, false, addr); + else + inject_abt64(vcpu, false, addr); } /** @@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, true, addr); - - inject_abt64(vcpu, true, addr); + else + inject_abt64(vcpu, true, addr); } /** @@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_undef32(vcpu); - - inject_undef64(vcpu); + else + inject_undef64(vcpu); } From 4925e8b95623c85ed10fbf7263f9f2a4a6eb90ac Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Mon, 29 Jun 2015 09:02:40 +0100 Subject: [PATCH 0692/2091] arm64: perf: fix unassigned cpu_pmu->plat_device when probing PMU PPIs commit b265da5a45ce60bd3d7505cc0eaa6cfba50946a1 upstream. Commit d795ef9aa831 ("arm64: perf: don't warn about missing interrupt-affinity property for PPIs") added a check for PPIs so that we avoid parsing the interrupt-affinity property for these naturally affine interrupts. Unfortunately, this check can trigger an early (successful) return and we will not assign the value of cpu_pmu->plat_device. This patch fixes the issue. Signed-off-by: Shannon Zhao Signed-off-by: Will Deacon Cc: Kefeng Wang Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cce18c85d2e8e..7778453762d8c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1318,7 +1318,7 @@ static int armpmu_device_probe(struct platform_device *pdev) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) - return 0; + goto out; irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -1355,6 +1355,7 @@ static int armpmu_device_probe(struct platform_device *pdev) else kfree(irqs); +out: cpu_pmu->plat_device = pdev; return 0; } From 5c9c2d3c9e6fdd7cc94e833b81f950068edfac95 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 20 Aug 2015 11:33:41 +0100 Subject: [PATCH 0693/2091] x86/xen: make CONFIG_XEN depend on CONFIG_X86_LOCAL_APIC commit 87ffd2b9bb74061c120f450e4d0f3409bb603ae0 upstream. Since commit feb44f1f7a4ac299d1ab1c3606860e70b9b89d69 (x86/xen: Provide a "Xen PV" APIC driver to support >255 VCPUs) Xen guests need a full APIC driver and thus should depend on X86_LOCAL_APIC. This fixes an i386 build failure with !SMP && !CONFIG_X86_UP_APIC by disabling Xen support in this configuration. Users needing Xen support in a non-SMP i386 kernel will need to enable CONFIG_X86_UP_APIC. Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e88fda867a33b..484145368a241 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,7 +8,7 @@ config XEN select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_TSC + depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -17,7 +17,7 @@ config XEN config XEN_DOM0 def_bool y depends on XEN && PCI_XEN && SWIOTLB_XEN - depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + depends on X86_IO_APIC && ACPI && PCI config XEN_PVHVM def_bool y From 07a015ad4dd3a5a8651fca4cc1477ed8f69c7393 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 22 Aug 2015 16:41:17 +0200 Subject: [PATCH 0694/2091] x86/apic: Fix fallout from x2apic cleanup commit a57e456a7b28431b55e407e5ab78ebd5b378d19e upstream. In the recent x2apic cleanup I got two things really wrong: 1) The safety check in __disable_x2apic which allows the function to be called unconditionally is backwards. The check is there to prevent access to the apic MSR in case that the machine has no apic. Though right now it returns if the machine has an apic and therefor the disabling of x2apic is never invoked. 2) x2apic_disable() sets x2apic_mode to 0 after registering the local apic. That's wrong, because register_lapic_address() checks x2apic mode and therefor takes the wrong code path. This results in boot failures on machines with x2apic preenabled by BIOS and can also lead to an fatal MSR access on machines without apic. The solutions are simple: 1) Correct the sanity check for apic availability 2) Clear x2apic_mode _before_ calling register_lapic_address() Fixes: 659006bf3ae3 'x86/x2apic: Split enable and setup function' Reported-and-tested-by: Javier Monteagudo Signed-off-by: Thomas Gleixner Link: https://bugzilla.redhat.com/show_bug.cgi?id=1224764 Cc: Laura Abbott Cc: Jiang Liu Cc: Joerg Roedel Cc: Tony Luck Cc: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dcb52850a28fc..cde732c1b4959 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1424,7 +1424,7 @@ static inline void __x2apic_disable(void) { u64 msr; - if (cpu_has_apic) + if (!cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1483,10 +1483,13 @@ void x2apic_setup(void) static __init void x2apic_disable(void) { - u32 x2apic_id; + u32 x2apic_id, state = x2apic_state; - if (x2apic_state != X2APIC_ON) - goto out; + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; + + if (state != X2APIC_ON) + return; x2apic_id = read_apic_id(); if (x2apic_id >= 255) @@ -1494,9 +1497,6 @@ static __init void x2apic_disable(void) __x2apic_disable(); register_lapic_address(mp_lapic_addr); -out: - x2apic_state = X2APIC_DISABLED; - x2apic_mode = 0; } static __init void x2apic_enable(void) From e9da4d6ac69af64671ea76662b74a522a6e54dd2 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 20 Aug 2015 12:54:39 +0800 Subject: [PATCH 0695/2091] x86/idle: Restore trace_cpu_idle to mwait_idle() calls commit e43d0189ac02415fe4487f79fc35e8f147e9ea0d upstream. Commit b253149b843f ("sched/idle/x86: Restore mwait_idle() to fix boot hangs, to improve power savings and to improve performance") restores mwait_idle(), but the trace_cpu_idle related calls are missing. This causes powertop on my old desktop powered by Intel Core2 E6550 to report zero wakeups and zero events. Add them back to restore the proper behaviour. Fixes: b253149b843f ("sched/idle/x86: Restore mwait_idle() to ...") Signed-off-by: Jisheng Zhang Cc: Link: http://lkml.kernel.org/r/1440046479-4262-1-git-send-email-jszhang@marvell.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3b1dc04..9717437742489 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -453,6 +453,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static void mwait_idle(void) { if (!current_set_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { smp_mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -464,6 +465,7 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } From c84847f370c56509b2bddc357f43bd0dd4ffb404 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 17 Jul 2015 21:40:28 +0100 Subject: [PATCH 0696/2091] ARM: 8405/1: VDSO: fix regression with toolchains lacking ld.bfd executable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3473f26592c1c365d376aee29433d7db75f14d1e upstream. The Sourcery CodeBench Lite 2014.05 toolchain (gcc 4.8.3, binutils 2.24.51) has a GCC which implements -fuse-ld, and it doesn't include the gold linker, but it lacks an ld.bfd executable in its installation. This means that passing -fuse-ld=bfd fails with: VDSO arch/arm/vdso/vdso.so.raw collect2: fatal error: cannot find 'ld' Arguably this is a deficiency in the toolchain, but I suspect it's commonly used enough that it's worth accommodating: just use cc-ldoption (to cause a link attempt) instead of cc-option to test whether we can use -fuse-ld. So -fuse-ld=bfd won't be used with this toolchain, but the build will rightly succeed, just as it does for toolchains which don't implement -fuse-ld (and don't use gold as the default linker). Note: this will change the failure mode for a corner case I was trying to handle in d2b30cd4b722, where the toolchain defaults to the gold linker and the BFD linker is not found in PATH, from: VDSO arch/arm/vdso/vdso.so.raw collect2: fatal error: cannot find 'ld' i.e. the BFD linker is not found, to: OBJCOPY arch/arm/vdso/vdso.so BFD: arch/arm/vdso/vdso.so: Not enough room for program headers, try linking with -N that is, we fail to prevent gold from being used as the linker, and it produces an object that objcopy can't digest. Reported-by: Baruch Siach Tested-by: Baruch Siach Tested-by: Raphaël Poggi Fixes: d2b30cd4b722 ("ARM: 8384/1: VDSO: force use of BFD linker") Cc: stable@vger.kernel.org Signed-off-by: Nathan Lynch Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 9d259d94e429c..1160434eece05 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,7 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 VDSO_LDFLAGS += -nostdlib -shared VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) -VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd) +VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds From 0c5c1f1a4f991ee015da85cce6d2d9f9c9380b4f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 13 Sep 2015 09:12:06 -0700 Subject: [PATCH 0697/2091] Linux 4.1.7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 838dabcb7f48b..b8591e5f79b8b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = Series 4800 From 618683abc90f828b980a969d4f55d0d322155c8f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 30 Jul 2015 14:31:32 -0700 Subject: [PATCH 0698/2091] x86/ldt: Make modify_ldt synchronous commit 37868fe113ff2ba814b3b4eb12df214df555f8dc upstream. modify_ldt() has questionable locking and does not synchronize threads. Improve it: redesign the locking and synchronize all threads' LDTs using an IPI on all modifications. This will dramatically slow down modify_ldt in multithreaded programs, but there shouldn't be any multithreaded programs that care about modify_ldt's performance in the first place. This fixes some fallout from the CVE-2015-5157 fixes. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt Cc: Thomas Gleixner Cc: security@kernel.org Cc: xen-devel Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/desc.h | 15 -- arch/x86/include/asm/mmu.h | 3 +- arch/x86/include/asm/mmu_context.h | 54 +++++- arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_event.c | 12 +- arch/x86/kernel/ldt.c | 262 ++++++++++++++++------------- arch/x86/kernel/process_64.c | 4 +- arch/x86/kernel/step.c | 6 +- arch/x86/power/cpu.c | 3 +- 9 files changed, 210 insertions(+), 153 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd26470..4e10d73cf0184 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b48..364d27481a52a 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; + struct ldt_struct *ldt; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index e997f70f80c49..80d67dd803511 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) static inline void load_mm_cr4(struct mm_struct *mm) {} #endif +/* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + int size; +}; + +static inline void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} + /* * Used for LDT copy/destruction. */ @@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); } #ifdef CONFIG_SMP else { @@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04dac8ae..205e0f3df501b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1434,7 +1434,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); clear_all_debug_regs(); dbg_restore_debug_regs(); @@ -1483,7 +1483,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index aa4e3a74e5410..4cc98a4e8ea95 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2170,21 +2170,25 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { + struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0; - if (idx > current->active_mm->context.size) + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0; - desc = current->active_mm->context.ldt; + desc = &ldt->entries[idx]; } else { if (idx > GDT_ENTRIES) return 0; - desc = raw_cpu_ptr(gdt_page.gdt); + desc = raw_cpu_ptr(gdt_page.gdt) + idx; } - return get_desc_base(desc + idx); + return get_desc_base(desc); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cca..2bcc0525f1c10 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,82 +21,82 @@ #include #include -#ifdef CONFIG_SMP +/* context.lock is held for us, so we don't need any locking. */ static void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + mm_context_t *pc; + + if (current->active_mm != current_mm) + return; + + pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); } -#endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ +static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + struct ldt_struct *new_ldt; + int alloc_size; + + if (size > LDT_ENTRIES) + return NULL; + + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + if (!new_ldt) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = size * LDT_ENTRY_SIZE; + + /* + * Xen is very picky: it requires a page-aligned LDT that has no + * trailing nonzero bytes in any page that contains LDT descriptors. + * Keep it simple: zero the whole allocation and never allocate less + * than PAGE_SIZE. + */ + if (alloc_size > PAGE_SIZE) + new_ldt->entries = vzalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; + new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); + if (!new_ldt->entries) { + kfree(new_ldt); + return NULL; + } - paravirt_alloc_ldt(newldt, mincount); + new_ldt->size = size; + return new_ldt; +} -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; +/* After calling this, the LDT is immutable. */ +static void finalize_ldt_struct(struct ldt_struct *ldt) +{ + paravirt_alloc_ldt(ldt->entries, ldt->size); } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +/* context.lock is held */ +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; + /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ + on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); +} - if (err < 0) - return err; +static void free_ldt_struct(struct ldt_struct *ldt) +{ + if (likely(!ldt)) + return; - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + paravirt_free_ldt(ldt->entries, ldt->size); + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + kfree(ldt->entries); + kfree(ldt); } /* @@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + struct ldt_struct *new_ldt; struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); - mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (!old_mm->context.ldt) { + mm->context.ldt = NULL; + goto out_unlock; + } + + new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + memcpy(new_ldt->entries, old_mm->context.ldt->entries, + new_ldt->size * LDT_ENTRY_SIZE); + finalize_ldt_struct(new_ldt); + + mm->context.ldt = new_ldt; + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; } @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; } static int read_ldt(void __user *ptr, unsigned long bytecount) { - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm; - if (!mm->context.size) - return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + /* Zero-fill the rest and pretend we read bytecount bytes. */ + if (clear_user(ptr + size, bytecount - size)) { + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; } static int read_default_ldt(void __user *ptr, unsigned long bytecount) @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) struct desc_struct ldt; int error; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; } - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; } - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { - error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - } - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; + if (old_ldt) + memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; + finalize_ldt_struct(new_ldt); - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0; out_unlock: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f1744..5e0bf57d9944a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -122,11 +122,11 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } } diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d01..6273324186ac5 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include #include #include +#include unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re seg &= ~7UL; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + (seg >> 3) >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 757678fb26e1a..bf9384488399c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -23,6 +23,7 @@ #include #include /* pcntxt_mask */ #include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -154,7 +155,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */ } /** From 7a1644e246c65eb58b04c3167dd5307de7ac86c9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 6 Aug 2015 10:04:38 +0200 Subject: [PATCH 0699/2091] x86/ldt: Correct LDT access in single stepping logic commit 136d9d83c07c5e30ac49fc83b27e8c4842f108fc upstream. Commit 37868fe113ff ("x86/ldt: Make modify_ldt synchronous") introduced a new struct ldt_struct anchored at mm->context.ldt. convert_ip_to_linear() was changed to reflect this, but indexing into the ldt has to be changed as the pointer is no longer void *. Signed-off-by: Juergen Gross Reviewed-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@suse.de Link: http://lkml.kernel.org/r/1438848278-12906-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/step.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 6273324186ac5..0ccb53a9fcd93 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -28,11 +28,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); if (unlikely(!child->mm->context.ldt || - (seg >> 3) >= child->mm->context.ldt->size)) + seg >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { desc = &child->mm->context.ldt->entries[seg]; From b33c0beeedf3fd7e329b0a81add3a15920184019 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 6 Aug 2015 19:54:34 +0200 Subject: [PATCH 0700/2091] x86/ldt: Correct FPU emulation access to LDT commit 4809146b86c3d41ce588fdb767d021e2a80600dd upstream. Commit 37868fe113ff ("x86/ldt: Make modify_ldt synchronous") introduced a new struct ldt_struct anchored at mm->context.ldt. Adapt the x86 fpu emulation code to use that new structure. Signed-off-by: Juergen Gross Reviewed-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: billm@melbpc.org.au Link: http://lkml.kernel.org/r/1438883674-1240-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/math-emu/fpu_entry.c | 3 +-- arch/x86/math-emu/fpu_system.h | 21 ++++++++++++++++++--- arch/x86/math-emu/get_address.c | 3 +-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b868124128d7..274a52b1183ea 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -29,7 +29,6 @@ #include #include -#include #include #include @@ -185,7 +184,7 @@ void math_emulate(struct math_emu_info *info) math_abort(FPU_info, SIGILL); } - code_descriptor = LDT_DESCRIPTOR(FPU_CS); + code_descriptor = FPU_get_ldt_descriptor(FPU_CS); if (SEG_D_SIZE(code_descriptor)) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2c614410a5f39..d342fce49447c 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,9 +16,24 @@ #include #include -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#include +#include + +static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) +{ + static struct desc_struct zero_desc; + struct desc_struct ret = zero_desc; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + seg >>= 3; + mutex_lock(¤t->mm->context.lock); + if (current->mm->context.ldt && seg < current->mm->context.ldt->size) + ret = current->mm->context.ldt->entries[seg]; + mutex_unlock(¤t->mm->context.lock); +#endif + return ret; +} + #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 6ef5e99380f92..d13cab2aec454 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -20,7 +20,6 @@ #include #include -#include #include "fpu_system.h" #include "exception.h" @@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, addr->selector = PM_REG_(segment); } - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + descriptor = FPU_get_ldt_descriptor(segment); base_address = SEG_BASE_ADDR(descriptor); address = base_address + offset; limit = base_address From 7b638e8e6d2cca69e5756cf4eb66fc41a661a037 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 14 Aug 2015 15:02:55 -0700 Subject: [PATCH 0701/2091] x86/ldt: Further fix FPU emulation commit 12e244f4b550498bbaf654a52f93633f7dde2dc7 upstream. The previous fix confused a selector with a segment prefix. Fix it. Compile-tested only. Cc: Juergen Gross Reported-by: Linus Torvalds Fixes: 4809146b86c3 ("x86/ldt: Correct FPU emulation access to LDT") Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/math-emu/get_address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index d13cab2aec454..8300db71c2a62 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -157,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, addr->selector = PM_REG_(segment); } - descriptor = FPU_get_ldt_descriptor(segment); + descriptor = FPU_get_ldt_descriptor(addr->selector); base_address = SEG_BASE_ADDR(descriptor); address = base_address + offset; limit = base_address From 5a206fe2675e696828be0df1c48a7e9f30e03003 Mon Sep 17 00:00:00 2001 From: Stephen Chandler Paul Date: Fri, 21 Aug 2015 14:16:12 -0400 Subject: [PATCH 0702/2091] DRM - radeon: Don't link train DisplayPort on HPD until we get the dpcd commit 924f92bf12bfbef3662619e3ed24a1cea7c1cbcd upstream. Most of the time this isn't an issue since hotplugging an adaptor will trigger a crtc mode change which in turn, causes the driver to probe every DisplayPort for a dpcd. However, in cases where hotplugging doesn't cause a mode change (specifically when one unplugs a monitor from a DisplayPort connector, then plugs that same monitor back in seconds later on the same port without any other monitors connected), we never probe for the dpcd before starting the initial link training. What happens from there looks like this: - GPU has only one monitor connected. It's connected via DisplayPort, and does not go through an adaptor of any sort. - User unplugs DisplayPort connector from GPU. - Change in HPD is detected by the driver, we probe every DisplayPort for a possible connection. - Probe the port the user originally had the monitor connected on for it's dpcd. This fails, and we clear the first (and only the first) byte of the dpcd to indicate we no longer have a dpcd for this port. - User plugs the previously disconnected monitor back into the same DisplayPort. - radeon_connector_hotplug() is called before everyone else, and tries to handle the link training. Since only the first byte of the dpcd is zeroed, the driver is able to complete link training but does so against the wrong dpcd, causing it to initialize the link with the wrong settings. - Display stays blank (usually), dpcd is probed after the initial link training, and the driver prints no obvious messages to the log. In theory, since only one byte of the dpcd is chopped off (specifically, the byte that contains the revision information for DisplayPort), it's not entirely impossible that this bug may not show on certain monitors. For instance, the only reason this bug was visible on my ASUS PB238 monitor was due to the fact that this monitor using the enhanced framing symbol sequence, the flag for which is ignored if the radeon driver thinks that the DisplayPort version is below 1.1. Signed-off-by: Stephen Chandler Paul Reviewed-by: Jerome Glisse Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_connectors.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 94b21ae70ef72..5a2cafb4f1bc5 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector) if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } else if (radeon_dp_needs_link_train(radeon_connector)) { + /* Don't try to start link training before we + * have the dpcd */ + if (!radeon_dp_getdpcd(radeon_connector)) + return; + /* set it to OFF so that drm_helper_connector_dpms() * won't return immediately since the current state * is ON at this point. From 8aece0b0ccfda469bbe589cc178fe832f15a69be Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 30 Jun 2015 17:06:47 +0300 Subject: [PATCH 0703/2091] drm/i915: apply the PCI_D0/D3 hibernation workaround everywhere on pre GEN6 commit 54875571bbfde00fc63741715c531cbb5246c3b2 upstream. commit da2bc1b9db3351addd293e5b82757efe1f77ed1d Author: Imre Deak Date: Thu Oct 23 19:23:26 2014 +0300 drm/i915: add poweroff_late handler introduced a regression on old platforms during hibernation. A workaround was added in commit ab3be73fa7b43f4c3648ce29b5fd649ea54d3adb Author: Imre Deak Date: Mon Mar 2 13:04:41 2015 +0200 drm/i915: gen4: work around hang during hibernation using an explicit blacklist for the GENs/BIOS vendors where the issue was reported. Later there we had reports of the same failure on platforms not on this list. To my best knowledge the correct thing to do is still to put the device to PCI D3 state during hibernation, see [1] and [2] for the reasons. This also aligns with our future plans to unify more the runtime and system suspend/resume paths. Since an exact blacklist seems to be impractical (multiple GENs and BIOS vendors are affected) apply the workaround on everything pre GEN6. [1] http://lists.freedesktop.org/archives/intel-gfx/2015-February/060710.html [2] https://lkml.org/lkml/2015/6/22/274 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=95061 Reported-by: Ilya Tumaykin Reported-by: Dirk Griesbach Reported-by: Pavel Machek Reported-by: Mikko Rapeli Tested-by: Mikko Rapeli Reported-by: Paul Bolle Signed-off-by: Imre Deak Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a19d2c71e2050..fb91df1631d9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -647,15 +647,18 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) pci_disable_device(drm_dev->pdev); /* - * During hibernation on some GEN4 platforms the BIOS may try to access + * During hibernation on some platforms the BIOS may try to access * the device even though it's already in D3 and hang the machine. So * leave the device in D0 on those platforms and hope the BIOS will - * power down the device properly. Platforms where this was seen: - * Lenovo Thinkpad X301, X61s + * power down the device properly. The issue was seen on multiple old + * GENs with different BIOS vendors, so having an explicit blacklist + * is inpractical; apply the workaround on everything pre GEN6. The + * platforms where the issue was seen: + * Lenovo Thinkpad X301, X61s, X60, T60, X41 + * Fujitsu FSC S7110 + * Acer Aspire 1830T */ - if (!(hibernation && - drm_dev->pdev->subsystem_vendor == PCI_VENDOR_ID_LENOVO && - INTEL_INFO(dev_priv)->gen == 4)) + if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6)) pci_set_power_state(drm_dev->pdev, PCI_D3hot); return 0; From 16c5921e6028fb684b7082eca718e2189fc3ae2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 20 Aug 2015 19:37:29 +0300 Subject: [PATCH 0704/2091] drm/i915: Check DP link status on long hpd too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d14e7b6d1d8747826cb900db852351c550e00fdd upstream. We are no longer checkling the DP link status on long hpd. We used to do that from the .hot_plug() handler, but it was removed when MST got introduced. If there's no userspace we now fail to retrain the link if the sink power is toggled (or cable yanked and replugged), meaning the user is left staring at a blank screen. With the retraining put back that should be fixed. Also remove the leftover comment that referred to the old retraining from .hot_plug(). Fixes a regression introduced in: commit 0e32b39ceed665bfa4a77a4bc307b6652b991632 Author: Dave Airlie Date: Fri May 2 14:02:48 2014 +1000 drm/i915: add DP 1.2 MST support (v0.7) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89453 Tested-by: Palmer Dabbelt Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91407 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89461 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89594 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85641 Cc: Dave Airlie Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index b1fe32b119ef8..fb2983f77141b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4691,9 +4691,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) intel_dp_probe_oui(intel_dp); - if (!intel_dp_probe_mst(intel_dp)) + if (!intel_dp_probe_mst(intel_dp)) { + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + intel_dp_check_link_status(intel_dp); + drm_modeset_unlock(&dev->mode_config.connection_mutex); goto mst_fail; - + } } else { if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) @@ -4701,10 +4704,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - /* - * we'll check the link status via the normal hot plug path later - - * but for short hpds we should check it now - */ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); From b394195ca54af86c6ebb44396c29c24875870de4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Aug 2015 17:23:31 +0300 Subject: [PATCH 0705/2091] drm/radeon/atom: Send out the full AUX address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3f8340cc72c9a1a4b49bce7802afd7f248400ef5 upstream. AUX addresses are 20 bits long. Send out the entire address instead of just the low 16 bits. Cc: Alex Deucher Cc: "Christian König" Signed-off-by: Ville Syrjälä Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_dp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index b435c859dcbc3..447dbfa6c793e 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -171,8 +171,9 @@ radeon_dp_aux_transfer_atom(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return -E2BIG; tx_buf[0] = msg->address & 0xff; - tx_buf[1] = msg->address >> 8; - tx_buf[2] = msg->request << 4; + tx_buf[1] = (msg->address >> 8) & 0xff; + tx_buf[2] = (msg->request << 4) | + ((msg->address >> 16) & 0xf); tx_buf[3] = msg->size ? (msg->size - 1) : 0; switch (msg->request & ~DP_AUX_I2C_MOT) { From 00f6fa71ff02e572f660f7971232e9f3a95c9838 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Aug 2015 11:15:05 -0400 Subject: [PATCH 0706/2091] drm/radeon/native: Send out the full AUX address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7040c399aea2b0213a9aefd73e507369a6d641d6 upstream. AUX addresses are 20 bits long. Send out the entire address instead of just the low 16 bits. Port of: drm/radeon/atom: Send out the full AUX address to radeon non-atom aux path Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_dp_auxch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c index fcbd60bb03495..3b0c229d7dcd2 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c +++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c @@ -116,8 +116,8 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg AUX_SW_WR_BYTES(bytes)); /* write the data header into the registers */ - /* request, addres, msg size */ - byte = (msg->request << 4); + /* request, address, msg size */ + byte = (msg->request << 4) | ((msg->address >> 16) & 0xf); WREG32(AUX_SW_DATA + aux_offset[instance], AUX_SW_DATA_MASK(byte) | AUX_SW_AUTOINCREMENT_DISABLE); From 6002d1973933c76033c08d531e3f29d2da68000b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Aug 2015 09:52:22 -0400 Subject: [PATCH 0707/2091] drm/radeon: fix HDMI quantization_range for pre-DCE5 asics commit 86b7709d48f0df8796bddd7e1ce45c6fb7a7c6ec upstream. Support for output_csc is only available on DCE5 and newer so don't mess with the HDMI quantization_range on pre-DCE5 asics. bug: https://bugs.freedesktop.org/show_bug.cgi?id=83226 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_audio.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 59b3d3221294c..d77dd1430d58b 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -522,13 +522,15 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, return err; } - if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) { - if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB) - frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; - else - frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL; - } else { - frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + if (radeon_encoder->output_csc != RADEON_OUTPUT_CSC_BYPASS) { + if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) { + if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB) + frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; + else + frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL; + } else { + frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + } } err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); From cdfbbe252c3fa94a4ce83fb8d8331cc58de2fc35 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 15 Jul 2015 13:57:35 +0200 Subject: [PATCH 0708/2091] drm/i915: Preserve SSC earlier commit 69f92f67b68ab7028ffe15f0eea76b59f8859383 upstream. Commit 92122789b2d6 ("drm/i915: preserve SSC if previously set v3") added code to intel_modeset_gem_init to override the SSC status read from VBT with the SSC status set by BIOS. However, intel_modeset_gem_init is invoked *after* intel_modeset_init, which calls intel_setup_outputs, which *modifies* SSC status by way of intel_init_pch_refclk. So unlike advertised, intel_modeset_gem_init doesn't preserve the SSC status set by BIOS but whatever intel_init_pch_refclk decided on. This is a problem on dual gpu laptops such as the MacBook Pro which require either a handler to switch DDC lines, or the discrete gpu to proxy DDC/AUX communication: Both the handler and the discrete gpu may initialize after the i915 driver, and consequently, an LVDS connector may initially seem disconnected and the SSC therefore is disabled by intel_init_pch_refclk, but on reprobe the connector may turn out to be connected and the SSC must then be enabled. Due to 92122789b2d6 however, the SSC is not enabled on reprobe since it is assumed BIOS disabled it while in fact it was disabled by intel_init_pch_refclk. Also, because the SSC status is preserved so late, the preserved value only ever gets used on resume but not on panel initialization: intel_modeset_init calls intel_init_display which indirectly calls intel_panel_use_ssc via multiple subroutines, *before* the BIOS value overrides the VBT value in intel_modeset_gem_init (intel_panel_use_ssc is the sole user of dev_priv->vbt.lvds_use_ssc). Fix this by moving the code introduced by 92122789b2d6 from intel_modeset_gem_init to intel_modeset_init before the invocation of intel_setup_outputs and intel_init_display. Add a DRM_DEBUG_KMS as suggested way back by Jani: http://lists.freedesktop.org/archives/intel-gfx/2014-June/046666.html Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88861 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61115 Tested-by: Paul Hordiienko [MBP 6,2 2010 intel ILK + nvidia GT216 pre-retina] Tested-by: William Brown [MBP 8,2 2011 intel SNB + amd turks pre-retina] Tested-by: Lukas Wunner [MBP 9,1 2012 intel IVB + nvidia GK107 pre-retina] Tested-by: Bruno Bierbaumer [MBP 11,3 2013 intel HSW + nvidia GK107 retina -- work in progress] Fixes: 92122789b2d6 ("drm/i915: preserve SSC if previously set v3") Signed-off-by: Lukas Wunner Reviewed-by: Jesse Barnes Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 29 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 57c887843dc3f..f208bbc6d58e2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13781,6 +13781,24 @@ void intel_modeset_init(struct drm_device *dev) if (INTEL_INFO(dev)->num_pipes == 0) return; + /* + * There may be no VBT; and if the BIOS enabled SSC we can + * just keep using it to avoid unnecessary flicker. Whereas if the + * BIOS isn't using it, don't assume it will work even if the VBT + * indicates as much. + */ + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { + bool bios_lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & + DREF_SSC1_ENABLE); + + if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { + DRM_DEBUG_KMS("SSC %sabled by BIOS, overriding VBT which says %sabled\n", + bios_lvds_use_ssc ? "en" : "dis", + dev_priv->vbt.lvds_use_ssc ? "en" : "dis"); + dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; + } + } + intel_init_display(dev); intel_init_audio(dev); @@ -14266,7 +14284,6 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, void intel_modeset_gem_init(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *c; struct drm_i915_gem_object *obj; int ret; @@ -14275,16 +14292,6 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev); mutex_unlock(&dev->struct_mutex); - /* - * There may be no VBT; and if the BIOS enabled SSC we can - * just keep using it to avoid unnecessary flicker. Whereas if the - * BIOS isn't using it, don't assume it will work even if the VBT - * indicates as much. - */ - if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) - dev_priv->vbt.lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & - DREF_SSC1_ENABLE); - intel_modeset_init_hw(dev); intel_setup_overlay(dev); From 2c176780c2cd8e66d6fd1041debc9bdd909fa479 Mon Sep 17 00:00:00 2001 From: Jonathon Jongsma Date: Thu, 20 Aug 2015 14:04:32 -0500 Subject: [PATCH 0709/2091] drm/qxl: validate monitors config modes commit bd3e1c7c6de9f5f70d97cdb6c817151c0477c5e3 upstream. Due to some recent changes in drm_helper_probe_single_connector_modes_merge_bits(), old custom modes were not being pruned properly. In current kernels, drm_mode_validate_basic() is called to sanity-check each mode in the list. If the sanity-check passes, the mode's status gets set to to MODE_OK. In older kernels this check was not done, so old custom modes would still have a status of MODE_UNVERIFIED at this point, and would therefore be pruned later in the function. As a result of this new behavior, the list of modes for a device always includes every custom mode ever configured for the device, with the largest one listed first. Since desktop environments usually choose the first preferred mode when a hotplug event is emitted, this had the result of making it very difficult for the user to reduce the size of the display. The qxl driver did implement the mode_valid connector function, but it was empty. In order to restore the old behavior where old custom modes are pruned, we implement a proper mode_valid function for the qxl driver. This function now checks each mode against the last configured custom mode and the list of standard modes. If the mode doesn't match any of these, its status is set to MODE_BAD so that it will be pruned as expected. Signed-off-by: Jonathon Jongsma Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 66 +++++++++++++++++++------------ drivers/gpu/drm/qxl/qxl_drv.h | 2 + 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 4a0a8b29b0a1e..32248791bc4b0 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -160,9 +160,35 @@ static int qxl_add_monitors_config_modes(struct drm_connector *connector, *pwidth = head->width; *pheight = head->height; drm_mode_probed_add(connector, mode); + /* remember the last custom size for mode validation */ + qdev->monitors_config_width = mode->hdisplay; + qdev->monitors_config_height = mode->vdisplay; return 1; } +static struct mode_size { + int w; + int h; +} common_modes[] = { + { 640, 480}, + { 720, 480}, + { 800, 600}, + { 848, 480}, + {1024, 768}, + {1152, 768}, + {1280, 720}, + {1280, 800}, + {1280, 854}, + {1280, 960}, + {1280, 1024}, + {1440, 900}, + {1400, 1050}, + {1680, 1050}, + {1600, 1200}, + {1920, 1080}, + {1920, 1200} +}; + static int qxl_add_common_modes(struct drm_connector *connector, unsigned pwidth, unsigned pheight) @@ -170,29 +196,6 @@ static int qxl_add_common_modes(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct drm_display_mode *mode = NULL; int i; - struct mode_size { - int w; - int h; - } common_modes[] = { - { 640, 480}, - { 720, 480}, - { 800, 600}, - { 848, 480}, - {1024, 768}, - {1152, 768}, - {1280, 720}, - {1280, 800}, - {1280, 854}, - {1280, 960}, - {1280, 1024}, - {1440, 900}, - {1400, 1050}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200} - }; - for (i = 0; i < ARRAY_SIZE(common_modes); i++) { mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); @@ -823,11 +826,22 @@ static int qxl_conn_get_modes(struct drm_connector *connector) static int qxl_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_device *ddev = connector->dev; + struct qxl_device *qdev = ddev->dev_private; + int i; + /* TODO: is this called for user defined modes? (xrandr --add-mode) * TODO: check that the mode fits in the framebuffer */ - DRM_DEBUG("%s: %dx%d status=%d\n", mode->name, mode->hdisplay, - mode->vdisplay, mode->status); - return MODE_OK; + + if(qdev->monitors_config_width == mode->hdisplay && + qdev->monitors_config_height == mode->vdisplay) + return MODE_OK; + + for (i = 0; i < ARRAY_SIZE(common_modes); i++) { + if (common_modes[i].w == mode->hdisplay && common_modes[i].h == mode->vdisplay) + return MODE_OK; + } + return MODE_BAD; } static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector) diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 7c6cafe21f5f5..e66143cc1a7a9 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -325,6 +325,8 @@ struct qxl_device { struct work_struct fb_work; struct drm_property *hotplug_mode_update_property; + int monitors_config_width; + int monitors_config_height; }; /* forward declaration for QXL_INFO_IO */ From a822e861b7457e17d83ab7cb3f1c78c64e346cb1 Mon Sep 17 00:00:00 2001 From: Gaurav K Singh Date: Mon, 3 Aug 2015 15:45:32 +0530 Subject: [PATCH 0710/2091] drm/i915: Allow DSI dual link to be configured on any pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 824257857fd81f5e749831ff9cd63566b5a86abe upstream. Just like single link MIPI panels, similarly for dual link panels, pipe to be configured is based on the DVO port from VBT Block 2. In hardware, Port A is mapped with Pipe A and Port C is mapped with Pipe B. This issue got introduced in - commit 7e9804fdcffc650515c60f524b8b2076ee59e710 Author: Jani Nikula Date: Fri Jan 16 14:27:23 2015 +0200 drm/i915/dsi: add drm mipi dsi host support Signed-off-by: Gaurav K Singh Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dsi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 51966426addfb..c7a0b8d8fac9d 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1036,11 +1036,7 @@ void intel_dsi_init(struct drm_device *dev) intel_connector->unregister = intel_connector_unregister; /* Pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI port C */ - if (dev_priv->vbt.dsi.config->dual_link) { - /* XXX: does dual link work on either pipe? */ - intel_encoder->crtc_mask = (1 << PIPE_A); - intel_dsi->ports = ((1 << PORT_A) | (1 << PORT_C)); - } else if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIA) { + if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIA) { intel_encoder->crtc_mask = (1 << PIPE_A); intel_dsi->ports = (1 << PORT_A); } else if (dev_priv->vbt.dsi.port == DVO_PORT_MIPIC) { @@ -1048,6 +1044,9 @@ void intel_dsi_init(struct drm_device *dev) intel_dsi->ports = (1 << PORT_C); } + if (dev_priv->vbt.dsi.config->dual_link) + intel_dsi->ports = ((1 << PORT_A) | (1 << PORT_C)); + /* Create a DSI host (and a device) for each port. */ for_each_dsi_port(port, intel_dsi->ports) { struct intel_dsi_host *host; From 5b2c006168304879990b33f696219eff47157d8d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 31 Aug 2015 15:10:39 +0100 Subject: [PATCH 0711/2091] drm/i915: Always mark the object as dirty when used by the GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 51bc140431e233284660b1d22c47dec9ecdb521e upstream. There have been many hard to track down bugs whereby userspace forgot to flag a write buffer and then cause graphics corruption or a hung GPU when that buffer was later purged under memory pressure (as the buffer appeared clean, its pages would have been evicted rather than preserved and any changes more recent than in the backing storage would be lost). In retrospect this is a rare optimisation against memory pressure, already the slow path. If we always mark the buffer as dirty when accessed by the GPU, anything not used can still be evicted cheaply (ideal behaviour for mark-and-sweep eviction) but we do not run the risk of corruption. For correct read serialisation, userspace still has to notify when the GPU writes to an object. However, there are certain situations under which userspace may wish to tell white lies to the kernel... Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Kristian Høgsberg Cc: Jesse Barnes Cc: "Goel, Akash" Cc: Michał Winiarski Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e793ed43..479024a4caadd 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1025,6 +1025,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; + obj->dirty = 1; /* be paranoid */ obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; @@ -1032,7 +1033,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, ring); if (obj->base.write_domain) { - obj->dirty = 1; i915_gem_request_assign(&obj->last_write_req, req); intel_fb_obj_invalidate(obj, ring, ORIGIN_CS); From f22f4cf524e66f20c86570023027504288f9bef2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Sep 2015 14:17:13 +0100 Subject: [PATCH 0712/2091] drm/i915: Limit the number of loops for reading a split 64bit register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit acd29f7b22262d9e848393b9b6ae13eb42d22514 upstream. In I915_READ64_2x32 we attempt to read a 64bit register using 2 32bit reads. Due to the nature of the registers we try to read in this manner, they may increment between the two instruction (e.g. a timestamp counter). To keep the result accurate, we repeat the read if we detect an overflow (i.e. the upper value varies). However, some hardware is just plain flaky and may endless loop as the the upper 32bits are not stable. Just give up after a couple of tries and report whatever we read last. v2: Use the most recent values when erring out on an unstable register. Reported-by: russianneuromancer@ya.ru Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91906 Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Daniel Vetter Cc: Jani Nikula Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 683a9b004c117..7d53d7e154556 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3190,13 +3190,13 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) #define I915_READ64_2x32(lower_reg, upper_reg) ({ \ - u32 upper, lower, tmp; \ - tmp = I915_READ(upper_reg); \ + u32 upper, lower, old_upper, loop = 0; \ + upper = I915_READ(upper_reg); \ do { \ - upper = tmp; \ + old_upper = upper; \ lower = I915_READ(lower_reg); \ - tmp = I915_READ(upper_reg); \ - } while (upper != tmp); \ + upper = I915_READ(upper_reg); \ + } while (upper != old_upper && loop++ < 2); \ (u64)upper << 32 | lower; }) #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) From 8613898412bb80464d201bc9af7a44e599e547e1 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Jun 2015 09:32:22 +0200 Subject: [PATCH 0713/2091] s390/sclp: fix compile error commit a313bdc5310dd807655d3ca3eb2219cd65dfe45a upstream. Fix this error when compiling with CONFIG_SMP=n and CONFIG_DYNAMIC_DEBUG=y: drivers/s390/char/sclp_early.c: In function 'sclp_read_info_early': drivers/s390/char/sclp_early.c:87:19: error: 'EBUSY' undeclared (first use in this function) } while (rc == -EBUSY); ^ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/char/sclp_early.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 1efa4fdb7fe21..f45cd0cb1b32f 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "sclp_early" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include #include #include From 93f3257cb2a4f75e6d7ed3eb3d608639c708dd19 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2015 13:20:28 +0200 Subject: [PATCH 0714/2091] s390/setup: fix novx parameter commit 89b1145e93771d727645c96e323539c029b63f1c upstream. The novx parameter disables the vector facility but the HWCAP_S390_VXRS bit in the ELf hardware capabilies is always set if the machine has the vector facility. If the user space program uses the "vx" string in the features field of /proc/cpuinfo to utilize vector instruction it will crash if the novx kernel paramter is set. Convert setup_hwcaps to an arch_initcall and use MACHINE_HAS_VX to decide if the HWCAPS_S390_VXRS bit needs to be set. Reported-by: Ulrich Weigand Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7262fe438c990..1942f22e66940 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -683,7 +683,7 @@ static void __init setup_memory(void) /* * Setup hardware capabilities. */ -static void __init setup_hwcaps(void) +static int __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; struct cpuid cpu_id; @@ -749,9 +749,11 @@ static void __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_TE; /* - * Vector extension HWCAP_S390_VXRS is bit 11. + * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension + * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX + * instead of facility bit 129. */ - if (test_facility(129)) + if (MACHINE_HAS_VX) elf_hwcap |= HWCAP_S390_VXRS; get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); @@ -788,7 +790,9 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + return 0; } +arch_initcall(setup_hwcaps); /* * Add system information as device randomness @@ -870,11 +874,6 @@ void __init setup_arch(char **cmdline_p) smp_fill_possible_mask(); cpu_init(); - /* - * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). - */ - setup_hwcaps(); - /* * Create kernel page tables and switch to virtual addressing. */ From b78f553c856992967005427faf3cdc403578f863 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Wed, 29 Jul 2015 15:46:03 +0200 Subject: [PATCH 0715/2091] iio: bmg160: IIO_BUFFER and IIO_TRIGGERED_BUFFER are required commit 06d2f6ca5a38abe92f1f3a132b331eee773868c3 upstream. This patch adds selects for IIO_BUFFER and IIO_TRIGGERED_BUFFER. Without IIO_BUFFER, the driver does not compile. Signed-off-by: Markus Pargmann Reviewed-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index b3d0e94f72eb7..8d24393456739 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -53,7 +53,8 @@ config ADXRS450 config BMG160 tristate "BOSCH BMG160 Gyro Sensor" depends on I2C - select IIO_TRIGGERED_BUFFER if IIO_BUFFER + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for Bosch BMG160 Tri-axis Gyro Sensor driver. This driver also supports BMI055 gyroscope. From ddf196ae809800281b5494b3cda7cbff0f89e4a3 Mon Sep 17 00:00:00 2001 From: Cristina Opriceana Date: Mon, 3 Aug 2015 13:00:47 +0300 Subject: [PATCH 0716/2091] iio: event: Remove negative error code from iio_event_poll commit 41d903c00051d8f31c98a8136edbac67e6f8688f upstream. Negative return values are not supported by iio_event_poll since its return type is unsigned int. Fixes: f18e7a068a0a3 ("iio: Return -ENODEV for file operations if the device has been unregistered") Signed-off-by: Cristina Opriceana Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index a99692ba91bc7..69b8c338fa894 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -84,7 +84,7 @@ static unsigned int iio_event_poll(struct file *filep, unsigned int events = 0; if (!indio_dev->info) - return -ENODEV; + return events; poll_wait(filep, &ev_int->wait, wait); From 3f0ade6354d14d0efb7d344ea388a4d154fba03f Mon Sep 17 00:00:00 2001 From: Cristina Opriceana Date: Mon, 3 Aug 2015 13:37:40 +0300 Subject: [PATCH 0717/2091] iio: industrialio-buffer: Fix iio_buffer_poll return value commit 1bdc0293901cbea23c6dc29432e81919d4719844 upstream. Change return value to 0 if no device is bound since unsigned int cannot support negative error codes. Fixes: f18e7a068 ("iio: Return -ENODEV for file operations if the device has been unregistered") Signed-off-by: Cristina Opriceana Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/industrialio-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index df919f44d513f..7fa280b28ecb7 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -151,7 +151,7 @@ unsigned int iio_buffer_poll(struct file *filp, struct iio_buffer *rb = indio_dev->buffer; if (!indio_dev->info) - return -ENODEV; + return 0; poll_wait(filp, &rb->pollq, wait); if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0)) From dfc28f84d3d1d45bf2720362ee03420b8637d4ba Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 5 Aug 2015 15:38:13 +0200 Subject: [PATCH 0718/2091] iio: adis16400: Fix adis16448 gyroscope scale commit 8166537283b31d7abaae9e56bd48fbbc30cdc579 upstream. Use the correct scale for the adis16448 gyroscope output. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16400_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16400_core.c b/drivers/iio/imu/adis16400_core.c index 2fd68f2219a7d..d42e4fe2c7ed3 100644 --- a/drivers/iio/imu/adis16400_core.c +++ b/drivers/iio/imu/adis16400_core.c @@ -780,7 +780,7 @@ static struct adis16400_chip_info adis16400_chips[] = { .flags = ADIS16400_HAS_PROD_ID | ADIS16400_HAS_SERIAL_NUMBER | ADIS16400_BURST_DIAG_STAT, - .gyro_scale_micro = IIO_DEGREE_TO_RAD(10000), /* 0.01 deg/s */ + .gyro_scale_micro = IIO_DEGREE_TO_RAD(40000), /* 0.04 deg/s */ .accel_scale_micro = IIO_G_TO_M_S_2(833), /* 1/1200 g */ .temp_scale_nano = 73860000, /* 0.07386 C */ .temp_offset = 31000000 / 73860, /* 31 C = 0x00 */ From db783fa06defd4f908861a83d602c33f29af86e9 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 5 Aug 2015 15:38:14 +0200 Subject: [PATCH 0719/2091] iio: Add inverse unit conversion macros commit c689a923c867eac40ed3826c1d9328edea8b6bc7 upstream. Add inverse unit conversion macro to convert from standard IIO units to units that might be used by some devices. Those are useful in combination with scale factors that are specified as IIO_VAL_FRACTIONAL. Typically the denominator for those specifications will contain the maximum raw value the sensor will generate and the numerator the value it maps to in a specific unit. Sometimes datasheets specify those in different units than the standard IIO units (e.g. degree/s instead of rad/s) and so we need to do a unit conversion. From a mathematical point of view it does not make a difference whether we apply the unit conversion to the numerator or the inverse unit conversion to the denominator since (x / y) / z = x / (y * z). But as the denominator is typically a larger value and we are rounding both the numerator and denominator to integer values using the later method gives us a better precision (E.g. the relative error is smaller if we round 8000.3 to 8000 rather than rounding 8.3 to 8). This is where in inverse unit conversion macros will be used. Marked for stable as used by some upcoming fixes. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/iio.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d86b753e9b301..5ed7771ad3863 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -641,6 +641,15 @@ int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, */ #define IIO_DEGREE_TO_RAD(deg) (((deg) * 314159ULL + 9000000ULL) / 18000000ULL) +/** + * IIO_RAD_TO_DEGREE() - Convert rad to degree + * @rad: A value in rad + * + * Returns the given value converted from rad to degree + */ +#define IIO_RAD_TO_DEGREE(rad) \ + (((rad) * 18000000ULL + 314159ULL / 2) / 314159ULL) + /** * IIO_G_TO_M_S_2() - Convert g to meter / second**2 * @g: A value in g @@ -649,4 +658,12 @@ int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, */ #define IIO_G_TO_M_S_2(g) ((g) * 980665ULL / 100000ULL) +/** + * IIO_M_S_2_TO_G() - Convert meter / second**2 to g + * @ms2: A value in meter / second**2 + * + * Returns the given value converted from meter / second**2 to g + */ +#define IIO_M_S_2_TO_G(ms2) (((ms2) * 100000ULL + 980665ULL / 2) / 980665ULL) + #endif /* _INDUSTRIAL_IO_H_ */ From da4ecf32643639585af69bf5f0e5143198e43c39 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 5 Aug 2015 15:38:15 +0200 Subject: [PATCH 0720/2091] iio: adis16480: Fix scale factors commit 7abad1063deb0f77d275c61f58863ec319c58c5c upstream. The different devices support by the adis16480 driver have slightly different scales for the gyroscope and accelerometer channels. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16480.c | 39 +++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 989605dd6f781..b94bfd3f595bf 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -110,6 +110,10 @@ struct adis16480_chip_info { unsigned int num_channels; const struct iio_chan_spec *channels; + unsigned int gyro_max_val; + unsigned int gyro_max_scale; + unsigned int accel_max_val; + unsigned int accel_max_scale; }; struct adis16480 { @@ -497,19 +501,21 @@ static int adis16480_set_filter_freq(struct iio_dev *indio_dev, static int adis16480_read_raw(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val, int *val2, long info) { + struct adis16480 *st = iio_priv(indio_dev); + switch (info) { case IIO_CHAN_INFO_RAW: return adis_single_conversion(indio_dev, chan, 0, val); case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_ANGL_VEL: - *val = 0; - *val2 = IIO_DEGREE_TO_RAD(20000); /* 0.02 degree/sec */ - return IIO_VAL_INT_PLUS_MICRO; + *val = st->chip_info->gyro_max_scale; + *val2 = st->chip_info->gyro_max_val; + return IIO_VAL_FRACTIONAL; case IIO_ACCEL: - *val = 0; - *val2 = IIO_G_TO_M_S_2(800); /* 0.8 mg */ - return IIO_VAL_INT_PLUS_MICRO; + *val = st->chip_info->accel_max_scale; + *val2 = st->chip_info->accel_max_val; + return IIO_VAL_FRACTIONAL; case IIO_MAGN: *val = 0; *val2 = 100; /* 0.0001 gauss */ @@ -674,18 +680,39 @@ static const struct adis16480_chip_info adis16480_chip_info[] = { [ADIS16375] = { .channels = adis16485_channels, .num_channels = ARRAY_SIZE(adis16485_channels), + /* + * storing the value in rad/degree and the scale in degree + * gives us the result in rad and better precession than + * storing the scale directly in rad. + */ + .gyro_max_val = IIO_RAD_TO_DEGREE(22887), + .gyro_max_scale = 300, + .accel_max_val = IIO_M_S_2_TO_G(21973), + .accel_max_scale = 18, }, [ADIS16480] = { .channels = adis16480_channels, .num_channels = ARRAY_SIZE(adis16480_channels), + .gyro_max_val = IIO_RAD_TO_DEGREE(22500), + .gyro_max_scale = 450, + .accel_max_val = IIO_M_S_2_TO_G(12500), + .accel_max_scale = 5, }, [ADIS16485] = { .channels = adis16485_channels, .num_channels = ARRAY_SIZE(adis16485_channels), + .gyro_max_val = IIO_RAD_TO_DEGREE(22500), + .gyro_max_scale = 450, + .accel_max_val = IIO_M_S_2_TO_G(20000), + .accel_max_scale = 5, }, [ADIS16488] = { .channels = adis16480_channels, .num_channels = ARRAY_SIZE(adis16480_channels), + .gyro_max_val = IIO_RAD_TO_DEGREE(22500), + .gyro_max_scale = 450, + .accel_max_val = IIO_M_S_2_TO_G(22500), + .accel_max_scale = 18, }, }; From 668327e49d2aa4782da771c4b1c6c152bf5084a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Wed, 12 Aug 2015 21:35:56 +0200 Subject: [PATCH 0721/2091] sched: Fix cpu_active_mask/cpu_online_mask race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dd9d3843755da95f63dd3a376f62b3e45c011210 upstream. There is a race condition in SMP bootup code, which may result in WARNING: CPU: 0 PID: 1 at kernel/workqueue.c:4418 workqueue_cpu_up_callback() or kernel BUG at kernel/smpboot.c:135! It can be triggered with a bit of luck in Linux guests running on busy hosts. CPU0 CPUn ==== ==== _cpu_up() __cpu_up() start_secondary() set_cpu_online() cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); cpu_notify(CPU_ONLINE) cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); During the various CPU_ONLINE callbacks CPUn is online but not active. Several things can go wrong at that point, depending on the scheduling of tasks on CPU0. Variant 1: cpu_notify(CPU_ONLINE) workqueue_cpu_up_callback() rebind_workers() set_cpus_allowed_ptr() This call fails because it requires an active CPU; rebind_workers() ends with a warning: WARNING: CPU: 0 PID: 1 at kernel/workqueue.c:4418 workqueue_cpu_up_callback() Variant 2: cpu_notify(CPU_ONLINE) smpboot_thread_call() smpboot_unpark_threads() .. __kthread_unpark() __kthread_bind() wake_up_state() .. select_task_rq() select_fallback_rq() The ->wake_cpu of the unparked thread is not allowed, making a call to select_fallback_rq() necessary. Then, select_fallback_rq() cannot find an allowed, active CPU and promptly resets the allowed CPUs, so that the task in question ends up on CPU0. When those unparked tasks are eventually executed, they run immediately into a BUG: kernel BUG at kernel/smpboot.c:135! Just changing the order in which the online/active bits are set (and adding some memory barriers), would solve the two issues above. However, it would change the order of operations back to the one before commit 6acbfb96976f ("sched: Fix hotplug vs. set_cpus_allowed_ptr()"), thus, reintroducing that particular problem. Going further back into history, we have at least the following commits touching this topic: - commit 2baab4e90495 ("sched: Fix select_fallback_rq() vs cpu_active/cpu_online") - commit 5fbd036b552f ("sched: Cleanup cpu_active madness") Together, these give us the following non-working solutions: - secondary CPU sets active before online, because active is assumed to be a subset of online; - secondary CPU sets online before active, because the primary CPU assumes that an online CPU is also active; - secondary CPU sets online and waits for primary CPU to set active, because it might deadlock. Commit 875ebe940d77 ("powerpc/smp: Wait until secondaries are active & online") introduces an arch-specific solution to this arch-independent problem. Now, go for a more general solution without explicit waiting and simply set active twice: once on the secondary CPU after online was set and once on the primary CPU after online was seen. set_cpus_allowed_ptr()") Signed-off-by: Jan H. Schönherr Acked-by: Peter Zijlstra Cc: Anton Blanchard Cc: Borislav Petkov Cc: Joerg Roedel Cc: Linus Torvalds Cc: Matt Wilson Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6acbfb96976f ("sched: Fix hotplug vs. set_cpus_allowed_ptr()") Link: http://lkml.kernel.org/r/1439408156-18840-1-git-send-email-jschoenh@amazon.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 123673291ffbb..e6910526c84b8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5328,6 +5328,14 @@ static int sched_cpu_active(struct notifier_block *nfb, case CPU_STARTING: set_cpu_rq_start_time(); return NOTIFY_OK; + case CPU_ONLINE: + /* + * At this point a starting CPU has marked itself as online via + * set_cpu_online(). But it might not yet have marked itself + * as active, which is essential from here on. + * + * Thus, fall-through and help the starting CPU along. + */ case CPU_DOWN_FAILED: set_cpu_active((long)hcpu, true); return NOTIFY_OK; From 75882550ea3ec52c783f440f97ac4c6999916ede Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 11 Aug 2015 13:05:10 +0100 Subject: [PATCH 0722/2091] staging: comedi: adl_pci7x3x: fix digital output on PCI-7230 commit ad83dbd974feb2e2a8cc071a1d28782bd4d2c70e upstream. The "adl_pci7x3x" driver replaced the "adl_pci7230" and "adl_pci7432" drivers in commits 8f567c373c4b ("staging: comedi: new adl_pci7x3x driver") and 657f77d173d3 ("staging: comedi: remove adl_pci7230 and adl_pci7432 drivers"). Although the new driver code agrees with the user manuals for the respective boards, digital outputs stopped working on the PCI-7230. This has 16 digital output channels and the previous adl_pci7230 driver shifted the 16 bit output state left by 16 bits before writing to the hardware register. The new adl_pci7x3x driver doesn't do that. Fix it in `adl_pci7x3x_do_insn_bits()` by checking for the special case of the subdevice having only 16 channels and duplicating the 16 bit output state into both halves of the 32-bit register. That should work both for what the board actually does and for what the user manual says it should do. Fixes: 8f567c373c4b ("staging: comedi: new adl_pci7x3x driver") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/adl_pci7x3x.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/adl_pci7x3x.c b/drivers/staging/comedi/drivers/adl_pci7x3x.c index 934af3ff78973..b0fc027cf485d 100644 --- a/drivers/staging/comedi/drivers/adl_pci7x3x.c +++ b/drivers/staging/comedi/drivers/adl_pci7x3x.c @@ -120,8 +120,20 @@ static int adl_pci7x3x_do_insn_bits(struct comedi_device *dev, { unsigned long reg = (unsigned long)s->private; - if (comedi_dio_update_state(s, data)) - outl(s->state, dev->iobase + reg); + if (comedi_dio_update_state(s, data)) { + unsigned int val = s->state; + + if (s->n_chan == 16) { + /* + * It seems the PCI-7230 needs the 16-bit DO state + * to be shifted left by 16 bits before being written + * to the 32-bit register. Set the value in both + * halves of the register to be sure. + */ + val |= val << 16; + } + outl(val, dev->iobase + reg); + } data[1] = s->state; From 815efa5df8a5fc651289a3302cd89a6dbf30ac28 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 23 Jul 2015 16:46:57 +0100 Subject: [PATCH 0723/2091] staging: comedi: usbduxsigma: don't clobber ai_timer in command test commit 423b24c37dd5794a674c74b0ed56392003a69891 upstream. `devpriv->ai_timer` is used while an asynchronous command is running on the AI subdevice. It also gets modified by the subdevice's `cmdtest` handler for checking new asynchronous commands (`usbduxsigma_ai_cmdtest()`), which is not correct as it's allowed to check new commands while an old command is still running. Fix it by moving the code which sets up `devpriv->ai_timer` and `devpriv->ai_interval` into the subdevice's `cmd` handler, `usbduxsigma_ai_cmd()`. Note that the removed code in `usbduxsigma_ai_cmdtest()` checked that `devpriv->ai_timer` did not end up less than than 1, but that could not happen because `cmd->scan_begin_arg` had already been checked to be at least the minimum required value (at least when `cmd->scan_begin_src == TRIG_TIMER`, which had also been checked to be the case). Fixes: b986be8527c7 ("staging: comedi: usbduxsigma: tidy up analog input command support) Signed-off-by: Ian Abbott Reviewed-by: Bernd Porr Reviewed-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/usbduxsigma.c | 37 +++++++++----------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/staging/comedi/drivers/usbduxsigma.c b/drivers/staging/comedi/drivers/usbduxsigma.c index eaa9add491df7..22517deed2fd0 100644 --- a/drivers/staging/comedi/drivers/usbduxsigma.c +++ b/drivers/staging/comedi/drivers/usbduxsigma.c @@ -550,27 +550,6 @@ static int usbduxsigma_ai_cmdtest(struct comedi_device *dev, if (err) return 3; - /* Step 4: fix up any arguments */ - - if (high_speed) { - /* - * every 2 channels get a time window of 125us. Thus, if we - * sample all 16 channels we need 1ms. If we sample only one - * channel we need only 125us - */ - devpriv->ai_interval = interval; - devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval); - } else { - /* interval always 1ms */ - devpriv->ai_interval = 1; - devpriv->ai_timer = cmd->scan_begin_arg / 1000000; - } - if (devpriv->ai_timer < 1) - err |= -EINVAL; - - if (err) - return 4; - return 0; } @@ -668,6 +647,22 @@ static int usbduxsigma_ai_cmd(struct comedi_device *dev, down(&devpriv->sem); + if (devpriv->high_speed) { + /* + * every 2 channels get a time window of 125us. Thus, if we + * sample all 16 channels we need 1ms. If we sample only one + * channel we need only 125us + */ + unsigned int interval = usbduxsigma_chans_to_interval(len); + + devpriv->ai_interval = interval; + devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval); + } else { + /* interval always 1ms */ + devpriv->ai_interval = 1; + devpriv->ai_timer = cmd->scan_begin_arg / 1000000; + } + for (i = 0; i < len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); From b248fcf1cedcb427ccfd7a09a1b56b3c50190f81 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 23 Jul 2015 16:46:58 +0100 Subject: [PATCH 0724/2091] staging: comedi: usbduxsigma: don't clobber ao_timer in command test commit c04a1f17803e0d3eeada586ca34a6b436959bc20 upstream. `devpriv->ao_timer` is used while an asynchronous command is running on the AO subdevice. It also gets modified by the subdevice's `cmdtest` handler for checking new asynchronous commands, `usbduxsigma_ao_cmdtest()`, which is not correct as it's allowed to check new commands while an old command is still running. Fix it by moving the code which sets up `devpriv->ao_timer` into the subdevice's `cmd` handler, `usbduxsigma_ao_cmd()`. Note that the removed code in `usbduxsigma_ao_cmdtest()` checked that `devpriv->ao_timer` did not end up less that 1, but that could not happen due because `cmd->scan_begin_arg` or `cmd->convert_arg` had already been range-checked. Also note that we tested the `high_speed` variable in the old code, but that is currently always 0 and means that we always use "scan" timing (`cmd->scan_begin_src == TRIG_TIMER` and `cmd->convert_src == TRIG_NOW`) and never "convert" (individual sample) timing (`cmd->scan_begin_src == TRIG_FOLLOW` and `cmd->convert_src == TRIG_TIMER`). The moved code tests `cmd->convert_src` instead to decide whether "scan" or "convert" timing is being used, although currently only "scan" timing is supported. Fixes: fb1ef622e7a3 ("staging: comedi: usbduxsigma: tidy up analog output command support") Signed-off-by: Ian Abbott Reviewed-by: Bernd Porr Reviewed-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/usbduxsigma.c | 33 +++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/staging/comedi/drivers/usbduxsigma.c b/drivers/staging/comedi/drivers/usbduxsigma.c index 22517deed2fd0..dc0b25a540886 100644 --- a/drivers/staging/comedi/drivers/usbduxsigma.c +++ b/drivers/staging/comedi/drivers/usbduxsigma.c @@ -912,25 +912,6 @@ static int usbduxsigma_ao_cmdtest(struct comedi_device *dev, if (err) return 3; - /* Step 4: fix up any arguments */ - - /* we count in timer steps */ - if (high_speed) { - /* timing of the conversion itself: every 125 us */ - devpriv->ao_timer = cmd->convert_arg / 125000; - } else { - /* - * timing of the scan: every 1ms - * we get all channels at once - */ - devpriv->ao_timer = cmd->scan_begin_arg / 1000000; - } - if (devpriv->ao_timer < 1) - err |= -EINVAL; - - if (err) - return 4; - return 0; } @@ -943,6 +924,20 @@ static int usbduxsigma_ao_cmd(struct comedi_device *dev, down(&devpriv->sem); + if (cmd->convert_src == TRIG_TIMER) { + /* + * timing of the conversion itself: every 125 us + * at high speed (not used yet) + */ + devpriv->ao_timer = cmd->convert_arg / 125000; + } else { + /* + * timing of the scan: every 1ms + * we get all channels at once + */ + devpriv->ao_timer = cmd->scan_begin_arg / 1000000; + } + devpriv->ao_counter = devpriv->ao_timer; if (cmd->start_src == TRIG_NOW) { From 2c0a3f6ee3f768a4cc3462b532ce38ae5cbbf22c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 29 Jun 2015 22:13:38 +0100 Subject: [PATCH 0725/2091] PM / clk: don't return int on __pm_clk_enable() commit f4745a92781b872455f32feb01d1dce92aefcb6c upstream. Static analysis by cppcheck found an issue that was recently introduced by commit 471f7707b6f0b1 ("PM / clock_ops: make __pm_clk_enable more generic") where a return status in ret was not being initialised and garbage being returned when ce->status >= PCE_STATUS_ERROR. The fact that ret is not being checked by the caller and that ret is only used internally __pm_clk_enable() to check if clk_enable() was OK means we can ignore returning it instead turn __pm_clk_enable() into function with a void return. Fixes: 471f7707b6f0b1 ("PM / clock_ops: make __pm_clk_enable more generic") Signed-off-by: Colin Ian King Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/clock_ops.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index c7b0fcebf168c..ac3c07db92f10 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -37,7 +37,7 @@ struct pm_clock_entry { * @dev: The device for the given clock * @ce: PM clock entry corresponding to the clock. */ -static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) +static inline void __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) { int ret; @@ -49,8 +49,6 @@ static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) dev_err(dev, "%s: failed to enable clk %p, error %d\n", __func__, ce->clk, ret); } - - return ret; } /** From befadb239291808c185c9029d8b418beca05882a Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 18 Jun 2015 16:18:28 +0200 Subject: [PATCH 0726/2091] clk: rockchip: rk3288: add CLK_SET_RATE_PARENT to sclk_mac commit 4791eb61dbe8100ccac59fecfac9d93a15db1447 upstream. The dwmac ethernet controller on the rk3288 supports phys connected via rgmii and rmii. With rgmii phys it is expected that the mac clock is provided externally while with rmii phys the clock can be external but also generated from the plls. In the later case it of course needs be at 50MHz, which gets set from the dwmac_rk driver. As most devices use a rgmii phy it never surfaced so far that the mac clk mux, doesn't go up one lever to the pll clock in the rmii case with internal clock generation, as it is missing the CLK_SET_RATE_PARENT flag, and thus will not set the correct frequency in most cases. Fixes: b9e4ba541607 ("clk: rockchip: add clock controller for rk3288") Signed-off-by: Heiko Stuebner Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/rockchip/clk-rk3288.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index d17eb4528a283..37f96117fd3d0 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -578,7 +578,7 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE(0, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0, RK3288_CLKSEL_CON(21), 0, 2, MFLAGS, 8, 5, DFLAGS, RK3288_CLKGATE_CON(2), 5, GFLAGS), - MUX(SCLK_MAC, "mac_clk", mux_mac_p, 0, + MUX(SCLK_MAC, "mac_clk", mux_mac_p, CLK_SET_RATE_PARENT, RK3288_CLKSEL_CON(21), 4, 1, MFLAGS), GATE(SCLK_MACREF_OUT, "sclk_macref_out", "mac_clk", 0, RK3288_CLKGATE_CON(5), 3, GFLAGS), From dcce0e4e44ed93c0438d258232b15be481641cfa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 12 Jun 2015 10:53:25 +0900 Subject: [PATCH 0727/2091] clk: exynos4: Fix wrong clock for Exynos4x12 ADC commit e323d56eb06b266b77c2b430cb5f1977ba549e03 upstream. The TSADC gate clock was used in Exynos4x12 DTSI for exynos-adc driver. However TSADC is present only on Exynos4210 so on Trats2 board (with Exynos4412 SoC) the exynos-adc driver could not be probed: ERROR: could not get clock /adc@126C0000:adc(0) exynos-adc 126c0000.adc: failed getting clock, err = -2 exynos-adc: probe of 126c0000.adc failed with error -2 Instead on Exynos4x12 SoCs the main clock used by Analog to Digital Converter is located in different register and it is named in datasheet as PCLK_ADC. Regardless of the name the purpose of this PCLK_ADC clock is the same as purpose of TSADC from Exynos4210. The patch adds gate clock for Exynos4x12 using the proper register so backward compatibility is preserved. This fixes the probe of exynos-adc driver on Exynos4x12 boards and allows accessing sensors connected to it on Trats2 board (ntc,ncp15wb473 AP and battery thermistors). Signed-off-by: Krzysztof Kozlowski Fixes: c63c57433003 ("ARM: dts: Add ADC's dt data to read raw data for exynos4x12") Reviewed-by: Javier Martinez Canillas Acked-by: Tomasz Figa Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynos4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index 714d6ba782c81..f7890bf652e69 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -85,6 +85,7 @@ #define DIV_PERIL4 0xc560 #define DIV_PERIL5 0xc564 #define E4X12_DIV_CAM1 0xc568 +#define E4X12_GATE_BUS_FSYS1 0xc744 #define GATE_SCLK_CAM 0xc820 #define GATE_IP_CAM 0xc920 #define GATE_IP_TV 0xc924 @@ -1095,6 +1096,7 @@ static struct samsung_gate_clock exynos4x12_gate_clks[] __initdata = { 0), GATE(CLK_PPMUIMAGE, "ppmuimage", "aclk200", E4X12_GATE_IP_IMAGE, 9, 0, 0), + GATE(CLK_TSADC, "tsadc", "aclk133", E4X12_GATE_BUS_FSYS1, 16, 0, 0), GATE(CLK_MIPI_HSI, "mipi_hsi", "aclk133", GATE_IP_FSYS, 10, 0, 0), GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, 0, 0), GATE(CLK_SYSREG, "sysreg", "aclk100", E4X12_GATE_IP_PERIR, 1, From 0c34e6c8de0e7fcd7321217aeedfa797894c2324 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 12 Aug 2015 10:58:22 +0200 Subject: [PATCH 0728/2091] clk: s5pv210: add missing call to samsung_clk_of_add_provider() commit ba30011577330b7e29ecb5916d89c6db9fbc5b3d upstream. Commit d5e136a21b2028fb1f45143ea7112d5869bfc6c7 ("clk: samsung: Register clk provider only after registering its all clocks", merged to v3.17-rc1) modified a way that driver registers registers to core framework. This change has not been applied to s5pv210 clocks driver, which has been merged in parallel to that commit. This patch adds a missing call to samsung_clk_of_add_provider(), so the driver is operational again. Signed-off-by: Marek Szyprowski Acked-by: Tomasz Figa Signed-off-by: Michael Turquette Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-s5pv210.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/samsung/clk-s5pv210.c b/drivers/clk/samsung/clk-s5pv210.c index e668e479a6970..bdd284249cc3c 100644 --- a/drivers/clk/samsung/clk-s5pv210.c +++ b/drivers/clk/samsung/clk-s5pv210.c @@ -828,6 +828,8 @@ static void __init __s5pv210_clk_init(struct device_node *np, s5pv210_clk_sleep_init(); + samsung_clk_of_add_provider(np, ctx); + pr_info("%s clocks: mout_apll = %ld, mout_mpll = %ld\n" "\tmout_epll = %ld, mout_vpll = %ld\n", is_s5p6442 ? "S5P6442" : "S5PV210", From d865d79dba304627da19806d454838ee80b6eae4 Mon Sep 17 00:00:00 2001 From: Zdenko Pulitika Date: Wed, 26 Aug 2015 17:11:38 +0100 Subject: [PATCH 0729/2091] clk: pistachio: Fix override of clk-pll settings from boot loader commit e53f21c761d141bbcbce06e9ddab3b4e0a828f2c upstream. PLL enable callbacks are overriding PLL mode (int/frac) and Noise reduction (on/off) settings set by the boot loader which results in the incorrect clock rate. PLL mode and noise reduction are defined by the DSMPD and DACPD bits of the PLL control register. PLL .enable() callbacks enable PLL by deasserting all power-down bits of the PLL control register, including DSMPD and DACPD bits, which is not necessary since these bits don't actually enable/disable PLL. This commit fixes the problem by removing DSMPD and DACPD bits from the "PLL enable" mask. Fixes: 43049b0c83f17("CLK: Pistachio: Add PLL driver") Reviewed-by: Andrew Bresitcker Signed-off-by: Zdenko Pulitika Signed-off-by: Govindraj Raja Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/pistachio/clk-pll.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clk/pistachio/clk-pll.c b/drivers/clk/pistachio/clk-pll.c index de537560bf709..ebd0d2a3b5dad 100644 --- a/drivers/clk/pistachio/clk-pll.c +++ b/drivers/clk/pistachio/clk-pll.c @@ -115,8 +115,7 @@ static int pll_gf40lp_frac_enable(struct clk_hw *hw) u32 val; val = pll_readl(pll, PLL_CTRL3); - val &= ~(PLL_FRAC_CTRL3_PD | PLL_FRAC_CTRL3_DACPD | - PLL_FRAC_CTRL3_DSMPD | PLL_FRAC_CTRL3_FOUTPOSTDIVPD | + val &= ~(PLL_FRAC_CTRL3_PD | PLL_FRAC_CTRL3_FOUTPOSTDIVPD | PLL_FRAC_CTRL3_FOUT4PHASEPD | PLL_FRAC_CTRL3_FOUTVCOPD); pll_writel(pll, val, PLL_CTRL3); @@ -233,7 +232,7 @@ static int pll_gf40lp_laint_enable(struct clk_hw *hw) u32 val; val = pll_readl(pll, PLL_CTRL1); - val &= ~(PLL_INT_CTRL1_PD | PLL_INT_CTRL1_DSMPD | + val &= ~(PLL_INT_CTRL1_PD | PLL_INT_CTRL1_FOUTPOSTDIVPD | PLL_INT_CTRL1_FOUTVCOPD); pll_writel(pll, val, PLL_CTRL1); From 30840a6068fca6d56208b48dd83f448e509753ef Mon Sep 17 00:00:00 2001 From: "Damien.Horsley" Date: Wed, 26 Aug 2015 17:11:40 +0100 Subject: [PATCH 0730/2091] clk: pistachio: correct critical clock list commit d31ff5f7f3b142b8d1ebb3da89187c54cdf2bc71 upstream. Current critical clock list for pistachio enables only mips and sys clocks by default but there are also other clocks that are not claimed by anyone and needs to be enabled by default. This patch updates the critical clocks that need to be enabled by default. Add a separate struct to distinguish the critical clocks as listed: 1.) core clocks: a.) mips clock 2.) peripheral system clocks: a.) sys clock b.) sys_bus clock c.) DDR clock d.) ROM clock Fixes: b35d7c33419c("CLK: Pistachio: Register core clocks") Reviewed-by: Andrew Bresticker Signed-off-by: Ezequiel Garcia Signed-off-by: Damien.Horsley Signed-off-by: Govindraj Raja Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/pistachio/clk-pistachio.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/clk/pistachio/clk-pistachio.c b/drivers/clk/pistachio/clk-pistachio.c index 8c0fe8828f993..c4ceb5eaf46c1 100644 --- a/drivers/clk/pistachio/clk-pistachio.c +++ b/drivers/clk/pistachio/clk-pistachio.c @@ -159,9 +159,15 @@ PNAME(mux_debug) = { "mips_pll_mux", "rpu_v_pll_mux", "wifi_pll_mux", "bt_pll_mux" }; static u32 mux_debug_idx[] = { 0x0, 0x1, 0x2, 0x4, 0x8, 0x10 }; -static unsigned int pistachio_critical_clks[] __initdata = { - CLK_MIPS, - CLK_PERIPH_SYS, +static unsigned int pistachio_critical_clks_core[] __initdata = { + CLK_MIPS +}; + +static unsigned int pistachio_critical_clks_sys[] __initdata = { + PERIPH_CLK_SYS, + PERIPH_CLK_SYS_BUS, + PERIPH_CLK_DDR, + PERIPH_CLK_ROM, }; static void __init pistachio_clk_init(struct device_node *np) @@ -193,8 +199,8 @@ static void __init pistachio_clk_init(struct device_node *np) pistachio_clk_register_provider(p); - pistachio_clk_force_enable(p, pistachio_critical_clks, - ARRAY_SIZE(pistachio_critical_clks)); + pistachio_clk_force_enable(p, pistachio_critical_clks_core, + ARRAY_SIZE(pistachio_critical_clks_core)); } CLK_OF_DECLARE(pistachio_clk, "img,pistachio-clk", pistachio_clk_init); @@ -261,6 +267,9 @@ static void __init pistachio_clk_periph_init(struct device_node *np) ARRAY_SIZE(pistachio_periph_gates)); pistachio_clk_register_provider(p); + + pistachio_clk_force_enable(p, pistachio_critical_clks_sys, + ARRAY_SIZE(pistachio_critical_clks_sys)); } CLK_OF_DECLARE(pistachio_clk_periph, "img,pistachio-clk-periph", pistachio_clk_periph_init); From 77ec29edce35e4d969b9c8ced633d02da2365e34 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jul 2015 13:17:06 +0300 Subject: [PATCH 0731/2091] clk: versatile: off by one in clk_sp810_timerclken_of_get() commit 3294bee87091be5f179474f6c39d1d87769635e2 upstream. The ">" should be ">=" or we end up reading beyond the end of the array. Fixes: 6e973d2c4385 ('clk: vexpress: Add separate SP810 driver') Signed-off-by: Dan Carpenter Acked-by: Pawel Moll Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/versatile/clk-sp810.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c index c6e86a9a2aa3d..5122ef25f5952 100644 --- a/drivers/clk/versatile/clk-sp810.c +++ b/drivers/clk/versatile/clk-sp810.c @@ -128,8 +128,8 @@ static struct clk *clk_sp810_timerclken_of_get(struct of_phandle_args *clkspec, { struct clk_sp810 *sp810 = data; - if (WARN_ON(clkspec->args_count != 1 || clkspec->args[0] > - ARRAY_SIZE(sp810->timerclken))) + if (WARN_ON(clkspec->args_count != 1 || + clkspec->args[0] >= ARRAY_SIZE(sp810->timerclken))) return NULL; return sp810->timerclken[clkspec->args[0]].clk; From 95a275367ba98a04f790a9b2dd320a866ed5f153 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 12 Jul 2015 22:49:53 +0200 Subject: [PATCH 0732/2091] clk: pxa: fix core frequency reporting unit commit 4b5fb7dc9096d949a22651370bb6bf11f21edb30 upstream. Legacy drivers which are not yet ported, such as cpufreq-pxa[23]xx, rely on pxaXXx_get_clk_frequency_khz() to find the CPU core frequency. This reporting was broken because the expected unit is kHz and not Hz. Fix the reporting for pxa25x, pxa27x and pxa3xx. Fixes: fe7710fae477 ("clk: add pxa25x clock drivers") Fixes: d40670dc6169 ("clk: add pxa27x clock drivers") Fixes: 9bbb8a338fb2 ("clk: pxa: add pxa3xx clock driver") Signed-off-by: Robert Jarzmik Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/pxa/clk-pxa25x.c | 2 +- drivers/clk/pxa/clk-pxa27x.c | 2 +- drivers/clk/pxa/clk-pxa3xx.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c index 6cd88d963a7ff..542e45ef5087b 100644 --- a/drivers/clk/pxa/clk-pxa25x.c +++ b/drivers/clk/pxa/clk-pxa25x.c @@ -79,7 +79,7 @@ unsigned int pxa25x_get_clk_frequency_khz(int info) clks[3] / 1000000, (clks[3] % 1000000) / 10000); } - return (unsigned int)clks[0]; + return (unsigned int)clks[0] / KHz; } static unsigned long clk_pxa25x_memory_get_rate(struct clk_hw *hw, diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c index 5f9b54b024b9e..267511df1e59f 100644 --- a/drivers/clk/pxa/clk-pxa27x.c +++ b/drivers/clk/pxa/clk-pxa27x.c @@ -80,7 +80,7 @@ unsigned int pxa27x_get_clk_frequency_khz(int info) pr_info("System bus clock: %ld.%02ldMHz\n", clks[4] / 1000000, (clks[4] % 1000000) / 10000); } - return (unsigned int)clks[0]; + return (unsigned int)clks[0] / KHz; } bool pxa27x_is_ppll_disabled(void) diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index ac03ba49e9d19..4af4eed5f89f0 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -78,7 +78,7 @@ unsigned int pxa3xx_get_clk_frequency_khz(int info) pr_info("System bus clock: %ld.%02ldMHz\n", clks[4] / 1000000, (clks[4] % 1000000) / 10000); } - return (unsigned int)clks[0]; + return (unsigned int)clks[0] / KHz; } static unsigned long clk_pxa3xx_ac97_get_rate(struct clk_hw *hw, From 448696655e6d9a8e94a6e7af5ec5d2834840edd9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 14 Jul 2015 16:57:29 -0700 Subject: [PATCH 0733/2091] clk: qcom: Set CLK_SET_RATE_PARENT on ce1 clocks commit d7a304e9d018c99dda80f4c16ec0fe817b5be4a1 upstream. The other ce clocks have the flag set, but ce1 doesn't, so clk_set_rate() doesn't propagate up the tree to the ce1_src_clk. Set the flag as this is supported. Reported-by: Bjorn Andersson Tested-by: Bjorn Andersson Fixes: 02824653200b ("clk: qcom: Add APQ8084 Global Clock Controller support") Fixes: d33faa9ead8d ("clk: qcom: Add support for MSM8974's global clock controller (GCC)") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-apq8084.c | 1 + drivers/clk/qcom/gcc-msm8974.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-apq8084.c b/drivers/clk/qcom/gcc-apq8084.c index 54a756b90a374..457c540585f92 100644 --- a/drivers/clk/qcom/gcc-apq8084.c +++ b/drivers/clk/qcom/gcc-apq8084.c @@ -2105,6 +2105,7 @@ static struct clk_branch gcc_ce1_clk = { "ce1_clk_src", }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gcc-msm8974.c b/drivers/clk/qcom/gcc-msm8974.c index c39d09874e74d..f06a082e3e870 100644 --- a/drivers/clk/qcom/gcc-msm8974.c +++ b/drivers/clk/qcom/gcc-msm8974.c @@ -1783,6 +1783,7 @@ static struct clk_branch gcc_ce1_clk = { "ce1_clk_src", }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, }, }, From 88ce676adff806066f2ec27ff8ad3aa366a2dd20 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 25 Aug 2015 15:27:43 +0300 Subject: [PATCH 0734/2091] clk: qcom: Fix MSM8916 prng clock enable bit commit 1c4b4b0eb1909010b8ebda1ef208bf3ed62e7487 upstream. Fix the enable bit of the pseudorandom number generator clock. Reported-by: Stanimir Varbanov Fixes: 3966fab8b6ab "clk: qcom: Add MSM8916 Global Clock Controller support" Signed-off-by: Georgi Djakov Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-msm8916.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c index c66f7bc2ae87c..5d75bffab141e 100644 --- a/drivers/clk/qcom/gcc-msm8916.c +++ b/drivers/clk/qcom/gcc-msm8916.c @@ -2278,7 +2278,7 @@ static struct clk_branch gcc_prng_ahb_clk = { .halt_check = BRANCH_HALT_VOTED, .clkr = { .enable_reg = 0x45004, - .enable_mask = BIT(0), + .enable_mask = BIT(8), .hw.init = &(struct clk_init_data){ .name = "gcc_prng_ahb_clk", .parent_names = (const char *[]){ From e756cabae72cb32d339a8584b73fb8f67f5d5211 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 19 Jun 2015 15:58:24 -0500 Subject: [PATCH 0735/2091] PCI: Fix TI816X class code quirk commit d1541dc977d376406f4584d8eb055488655c98ec upstream. In fixup_ti816x_class(), we assigned "class = PCI_CLASS_MULTIMEDIA_VIDEO". But PCI_CLASS_MULTIMEDIA_VIDEO is only the two-byte base class/sub-class and needs to be shifted to make space for the low-order interface byte. Shift PCI_CLASS_MULTIMEDIA_VIDEO to set the correct class code. Fixes: 63c4408074cb ("PCI: Add quirk for setting valid class for TI816X Endpoint") Signed-off-by: Bjorn Helgaas CC: Hemant Pedanekar Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c6dc1dfd25d55..cb640023a4ddb 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2838,12 +2838,15 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); static void fixup_ti816x_class(struct pci_dev *dev) { + u32 class = dev->class; + /* TI 816x devices do not have class code set when in PCIe boot mode */ - dev_info(&dev->dev, "Setting PCI class for 816x PCIe device\n"); - dev->class = PCI_CLASS_MULTIMEDIA_VIDEO; + dev->class = PCI_CLASS_MULTIMEDIA_VIDEO << 8; + dev_info(&dev->dev, "PCI class overridden (%#08x -> %#08x)\n", + class, dev->class); } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_TI, 0xb800, - PCI_CLASS_NOT_DEFINED, 0, fixup_ti816x_class); + PCI_CLASS_NOT_DEFINED, 0, fixup_ti816x_class); /* Some PCIe devices do not work reliably with the claimed maximum * payload size supported. From 91a37c794cf2cf7ec1f9afc4cbf4a118df7e085e Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 13 Jul 2015 11:40:02 -0700 Subject: [PATCH 0736/2091] PCI: Add dev_flags bit to access VPD through function 0 commit 932c435caba8a2ce473a91753bad0173269ef334 upstream. Add a dev_flags bit, PCI_DEV_FLAGS_VPD_REF_F0, to access VPD through function 0 to provide VPD access on other functions. This is for hardware devices that provide copies of the same VPD capability registers in multiple functions. Because the kernel expects that each function has its own registers, both the locking and the state tracking are affected by VPD accesses to different functions. On such devices for example, if a VPD write is performed on function 0, *any* later attempt to read VPD from any other function of that device will hang. This has to do with how the kernel tracks the expected value of the F bit per function. Concurrent accesses to different functions of the same device can not only hang but also corrupt both read and write VPD data. When hangs occur, typically the error message: vpd r/w failed. This is likely a firmware bug on this device. will be seen. Never set this bit on function 0 or there will be an infinite recursion. Signed-off-by: Mark Rustad Signed-off-by: Bjorn Helgaas Acked-by: Alexander Duyck Signed-off-by: Greg Kroah-Hartman --- drivers/pci/access.c | 61 +++++++++++++++++++++++++++++++++++++++++++- include/linux/pci.h | 2 ++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index d9b64a175990c..b965c12168b72 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -439,6 +439,56 @@ static const struct pci_vpd_ops pci_vpd_pci22_ops = { .release = pci_vpd_pci22_release, }; +static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, + void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_read_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, + const void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_write_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static const struct pci_vpd_ops pci_vpd_f0_ops = { + .read = pci_vpd_f0_read, + .write = pci_vpd_f0_write, + .release = pci_vpd_pci22_release, +}; + +static int pci_vpd_f0_dev_check(struct pci_dev *dev) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + int ret = 0; + + if (!tdev) + return -ENODEV; + if (!tdev->vpd || !tdev->multifunction || + dev->class != tdev->class || dev->vendor != tdev->vendor || + dev->device != tdev->device) + ret = -ENODEV; + + pci_dev_put(tdev); + return ret; +} + int pci_vpd_pci22_init(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd; @@ -447,12 +497,21 @@ int pci_vpd_pci22_init(struct pci_dev *dev) cap = pci_find_capability(dev, PCI_CAP_ID_VPD); if (!cap) return -ENODEV; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { + int ret = pci_vpd_f0_dev_check(dev); + + if (ret) + return ret; + } vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC); if (!vpd) return -ENOMEM; vpd->base.len = PCI_VPD_PCI22_SIZE; - vpd->base.ops = &pci_vpd_pci22_ops; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) + vpd->base.ops = &pci_vpd_f0_ops; + else + vpd->base.ops = &pci_vpd_pci22_ops; mutex_init(&vpd->lock); vpd->cap = cap; vpd->busy = false; diff --git a/include/linux/pci.h b/include/linux/pci.h index 3ef3a52068df4..6e935e5eab565 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -180,6 +180,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), /* Do not use PM reset even if device advertises NoSoftRst- */ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), + /* Get VPD from function 0 VPD */ + PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), }; enum pci_irq_reroute_variant { From 884372a6b09964f236fa8148bc831c84f68d8a09 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 13 Jul 2015 11:40:07 -0700 Subject: [PATCH 0737/2091] PCI: Add VPD function 0 quirk for Intel Ethernet devices commit 7aa6ca4d39edf01f997b9e02cf6d2fdeb224f351 upstream. Set the PCI_DEV_FLAGS_VPD_REF_F0 flag on all Intel Ethernet device functions other than function 0, so that on multi-function devices, we will always read VPD from function 0 instead of from the other functions. [bhelgaas: changelog] Signed-off-by: Mark Rustad Signed-off-by: Bjorn Helgaas Acked-by: Alexander Duyck Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index cb640023a4ddb..f260f36c39140 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1903,6 +1903,15 @@ static void quirk_netmos(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos); +static void quirk_f0_vpd_link(struct pci_dev *dev) +{ + if (!dev->multifunction || !PCI_FUNC(dev->devfn)) + return; + dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; +} +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, + PCI_CLASS_NETWORK_ETHERNET, 8, quirk_f0_vpd_link); + static void quirk_e100_interrupt(struct pci_dev *dev) { u16 command, pmcsr; From a1ff8fe3a5387ac0c33b2a4677690e322bec8c04 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 24 Aug 2015 15:27:11 -0500 Subject: [PATCH 0738/2091] PCI: Disable async suspend/resume for JMicron multi-function SATA/AHCI commit 91f15fb30c77d4a0d0d9b97e5cec647650853145 upstream. On multi-function JMicron SATA/PATA/AHCI devices, the PATA controller at function 1 doesn't work if it is powered on before the SATA controller at function 0. The result is that PATA doesn't work after resume, and we print messages like this: pata_jmicron 0000:02:00.1: Refused to change power state, currently in D3 irq 17: nobody cared (try booting with the "irqpoll" option) Async resume was introduced in v3.15 by 76569faa62c4 ("PM / sleep: Asynchronous threads for resume_noirq"). Prior to that, we powered on the functions in order, so this problem shouldn't happen. e6b7e41cdd8c ("ata: Disabling the async PM for JMicron chip 363/361") solved the problem for JMicron 361 and 363 devices. With async suspend disabled, we always power on function 0 before function 1. Barto then reported the same problem with a JMicron 368 (see comment #57 in the bugzilla). Rather than extending the blacklist piecemeal, disable async suspend for all JMicron multi-function SATA/PATA/AHCI devices. This quirk could stay in the ahci and pata_jmicron drivers, but it's likely the problem will occur even if pata_jmicron isn't loaded until after the suspend/resume. Making it a PCI quirk ensures that we'll preserve the power-on order even if the drivers aren't loaded. [bhelgaas: changelog, limit to multi-function, limit to IDE/ATA] Link: https://bugzilla.kernel.org/show_bug.cgi?id=81551 Reported-and-tested-by: Barto Signed-off-by: Zhang Rui Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 13 +------------ drivers/ata/pata_jmicron.c | 12 ------------ drivers/pci/quirks.c | 12 ++++++++++++ 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 65ee94454bbd2..e6ea912aee31a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -349,6 +349,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* JMicron 362B and 362C have an AHCI function with IDE class code */ { PCI_VDEVICE(JMICRON, 0x2362), board_ahci_ign_iferr }, { PCI_VDEVICE(JMICRON, 0x236f), board_ahci_ign_iferr }, + /* May need to update quirk_jmicron_async_suspend() for additions */ /* ATI */ { PCI_VDEVICE(ATI, 0x4380), board_ahci_sb600 }, /* ATI SB600 */ @@ -1377,18 +1378,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else if (pdev->vendor == 0x1c44 && pdev->device == 0x8000) ahci_pci_bar = AHCI_PCI_BAR_ENMOTUS; - /* - * The JMicron chip 361/363 contains one SATA controller and one - * PATA controller,for powering on these both controllers, we must - * follow the sequence one by one, otherwise one of them can not be - * powered on successfully, so here we disable the async suspend - * method for these chips. - */ - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && - (pdev->device == PCI_DEVICE_ID_JMICRON_JMB363 || - pdev->device == PCI_DEVICE_ID_JMICRON_JMB361)) - device_disable_async_suspend(&pdev->dev); - /* acquire resources */ rc = pcim_enable_device(pdev); if (rc) diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c index 47e418b8c8baa..4d1a5d2c4287f 100644 --- a/drivers/ata/pata_jmicron.c +++ b/drivers/ata/pata_jmicron.c @@ -143,18 +143,6 @@ static int jmicron_init_one (struct pci_dev *pdev, const struct pci_device_id *i }; const struct ata_port_info *ppi[] = { &info, NULL }; - /* - * The JMicron chip 361/363 contains one SATA controller and one - * PATA controller,for powering on these both controllers, we must - * follow the sequence one by one, otherwise one of them can not be - * powered on successfully, so here we disable the async suspend - * method for these chips. - */ - if (pdev->vendor == PCI_VENDOR_ID_JMICRON && - (pdev->device == PCI_DEVICE_ID_JMICRON_JMB363 || - pdev->device == PCI_DEVICE_ID_JMICRON_JMB361)) - device_disable_async_suspend(&pdev->dev); - return ata_pci_bmdma_init_one(pdev, ppi, &jmicron_sht, NULL, 0); } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f260f36c39140..804cd3b02c66b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1576,6 +1576,18 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB3 #endif +static void quirk_jmicron_async_suspend(struct pci_dev *dev) +{ + if (dev->multifunction) { + device_disable_async_suspend(&dev->dev); + dev_info(&dev->dev, "async suspend disabled to avoid multi-function power-on ordering issue\n"); + } +} +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE, 8, quirk_jmicron_async_suspend); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_CLASS_STORAGE_SATA_AHCI, 0, quirk_jmicron_async_suspend); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_JMICRON, 0x2362, quirk_jmicron_async_suspend); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_JMICRON, 0x236f, quirk_jmicron_async_suspend); + #ifdef CONFIG_X86_IO_APIC static void quirk_alder_ioapic(struct pci_dev *pdev) { From 7f3f69a695fe2d10a9afad4419c68d731f775b94 Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Tue, 28 Jul 2015 14:03:12 +0000 Subject: [PATCH 0739/2091] spi: bcm2835: set up spi-mode before asserting cs-gpio commit acace73df2c1913a526c1b41e4741a4a6704c863 upstream. When using reverse polarity for clock (spi-cpol) on a device the clock line gets altered after chip-select has been asserted resulting in an additional clock beat, which confuses hardware. This did not show when using native-CS, as the same register is used to control cs as well as polarity, so the changes came into effect at the same time. Unfortunately this is not true with gpio-cs. To avoid this situation this patch moves the setup of polarity (spi-cpol and spi-cpha) outside of the chip-select into prepare_message, which is run prior to asserting chip-select. Also fixes resetting 3-wire mode after use of rx-mode, so that a 3-Wire sequence TX, RX, TX works as well (right now it runs TX, RX, RX instead) Reported-by: Noralf Tronnes Signed-off-by: Martin Sperl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bcm2835.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 37875cf942f7b..a5067739ee938 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -257,13 +257,11 @@ static int bcm2835_spi_transfer_one(struct spi_master *master, spi_used_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536); bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv); - /* handle all the modes */ + /* handle all the 3-wire mode */ if ((spi->mode & SPI_3WIRE) && (tfr->rx_buf)) cs |= BCM2835_SPI_CS_REN; - if (spi->mode & SPI_CPOL) - cs |= BCM2835_SPI_CS_CPOL; - if (spi->mode & SPI_CPHA) - cs |= BCM2835_SPI_CS_CPHA; + else + cs &= ~BCM2835_SPI_CS_REN; /* for gpio_cs set dummy CS so that no HW-CS get changed * we can not run this in bcm2835_spi_set_cs, as it does @@ -291,6 +289,25 @@ static int bcm2835_spi_transfer_one(struct spi_master *master, return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs); } +static int bcm2835_spi_prepare_message(struct spi_master *master, + struct spi_message *msg) +{ + struct spi_device *spi = msg->spi; + struct bcm2835_spi *bs = spi_master_get_devdata(master); + u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS); + + cs &= ~(BCM2835_SPI_CS_CPOL | BCM2835_SPI_CS_CPHA); + + if (spi->mode & SPI_CPOL) + cs |= BCM2835_SPI_CS_CPOL; + if (spi->mode & SPI_CPHA) + cs |= BCM2835_SPI_CS_CPHA; + + bcm2835_wr(bs, BCM2835_SPI_CS, cs); + + return 0; +} + static void bcm2835_spi_handle_err(struct spi_master *master, struct spi_message *msg) { @@ -429,6 +446,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev) master->set_cs = bcm2835_spi_set_cs; master->transfer_one = bcm2835_spi_transfer_one; master->handle_err = bcm2835_spi_handle_err; + master->prepare_message = bcm2835_spi_prepare_message; master->dev.of_node = pdev->dev.of_node; bs = spi_master_get_devdata(master); From 1583eaece6faf7b389085771c0eb4bf1af5960f2 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Wed, 29 Jul 2015 09:32:02 +0200 Subject: [PATCH 0740/2091] spi: Fix regression in spi-bitbang-txrx.h commit 26a67ec47a4c58fe79c6421c3dc3d697d322d2d6 upstream. This patch fixes a regression introduced by commit 232a5adc5199 ("spi: bitbang: only toggle bitchanges"). The attempt to optimize writes of consecutive bit patterns broke most of the combinations of word size and SPI modes due to selecting the wrong bit as the MSB value. Fixes: 232a5adc5199 (spi: bitbang: only toggle bitchanges) Signed-off-by: Lars Persson Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-bitbang-txrx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h index 06b34e5bcfa37..47bb9b898dfdc 100644 --- a/drivers/spi/spi-bitbang-txrx.h +++ b/drivers/spi/spi-bitbang-txrx.h @@ -49,7 +49,7 @@ bitbang_txrx_be_cpha0(struct spi_device *spi, { /* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */ - bool oldbit = !(word & 1); + u32 oldbit = (!(word & (1<<(bits-1)))) << 31; /* clock starts at inactive polarity */ for (word <<= (32 - bits); likely(bits); bits--) { @@ -81,7 +81,7 @@ bitbang_txrx_be_cpha1(struct spi_device *spi, { /* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */ - bool oldbit = !(word & (1 << 31)); + u32 oldbit = (!(word & (1<<(bits-1)))) << 31; /* clock starts at inactive polarity */ for (word <<= (32 - bits); likely(bits); bits--) { From 49b85054a83dd407879206f43fd6944287848189 Mon Sep 17 00:00:00 2001 From: Koji Matsuoka Date: Mon, 15 Jun 2015 02:25:05 +0900 Subject: [PATCH 0741/2091] spi: sh-msiof: Fix FIFO size to 64 word from 256 word commit fe78d0b7691c02744004b15f6979b3f106464bc4 upstream. The upper limit of Tx/Rx FIFO size is 64 word by the specification of H/W. This patch corrects to 64 word from 256 word. Signed-off-by: Koji Matsuoka Signed-off-by: Yoshihiro Kaneko Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-sh-msiof.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index bcc7c635d8e7d..7872f3c78b515 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -48,8 +48,8 @@ struct sh_msiof_spi_priv { const struct sh_msiof_chipdata *chipdata; struct sh_msiof_spi_info *info; struct completion done; - int tx_fifo_size; - int rx_fifo_size; + unsigned int tx_fifo_size; + unsigned int rx_fifo_size; void *tx_dma_page; void *rx_dma_page; dma_addr_t tx_dma_addr; @@ -95,8 +95,6 @@ struct sh_msiof_spi_priv { #define MDR2_WDLEN1(i) (((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */ #define MDR2_GRPMASK1 0x00000001 /* Group Output Mask 1 (SH, A1) */ -#define MAX_WDLEN 256U - /* TSCR and RSCR */ #define SCR_BRPS_MASK 0x1f00 /* Prescaler Setting (1-32) */ #define SCR_BRPS(i) (((i) - 1) << 8) @@ -850,7 +848,12 @@ static int sh_msiof_transfer_one(struct spi_master *master, * DMA supports 32-bit words only, hence pack 8-bit and 16-bit * words, with byte resp. word swapping. */ - unsigned int l = min(len, MAX_WDLEN * 4); + unsigned int l = 0; + + if (tx_buf) + l = min(len, p->tx_fifo_size * 4); + if (rx_buf) + l = min(len, p->rx_fifo_size * 4); if (bits <= 8) { if (l & 3) @@ -963,7 +966,7 @@ static const struct sh_msiof_chipdata sh_data = { static const struct sh_msiof_chipdata r8a779x_data = { .tx_fifo_size = 64, - .rx_fifo_size = 256, + .rx_fifo_size = 64, .master_flags = SPI_MASTER_MUST_TX, }; From b82ed8c90e5f1ad73d1c355c58ef38a789a16c7d Mon Sep 17 00:00:00 2001 From: Sifan Naeem Date: Mon, 27 Jul 2015 13:11:15 +0100 Subject: [PATCH 0742/2091] spi: img-spfi: check for timeout error before proceeding commit 011710e2ab659c7ad6e5e554806414bd7a9508be upstream. Calling spfi_wait_all_done is not required if the transfer has timed out before all data is transferred. spfi_wait_all_done polls for Alldone interrupt which is triggered to mark the transfer as complete and to indicate it is now safe to issue a new transfer. Fixes: 8c2c8c0 ("spi: img-spfi: Control CS lines with GPIO") Signed-off-by: Sifan Naeem Reviewed-by: Andrew Bresticker Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-img-spfi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index acce90ac7371d..9fed299603321 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -267,15 +267,15 @@ static int img_spfi_start_pio(struct spi_master *master, cpu_relax(); } - ret = spfi_wait_all_done(spfi); - if (ret < 0) - return ret; - if (rx_bytes > 0 || tx_bytes > 0) { dev_err(spfi->dev, "PIO transfer timed out\n"); return -ETIMEDOUT; } + ret = spfi_wait_all_done(spfi); + if (ret < 0) + return ret; + return 0; } From 1d43421636933f74c6f642a61951dbfa3732edec Mon Sep 17 00:00:00 2001 From: Sifan Naeem Date: Wed, 29 Jul 2015 11:55:26 +0100 Subject: [PATCH 0743/2091] spi: img-spfi: fix multiple calls to request gpio commit b03ba9e314c12b2127243145b5c1f41b2408de62 upstream. spfi_setup may be called many times by the spi framework, but gpio_request_one can only be called once without freeing, repeatedly calling gpio_request_one will cause an error to be thrown, which causes the request to spi_setup to be marked as failed. We can have a per-spi_device flag that indicates whether or not the gpio has been requested. If the gpio has already been requested use gpio_direction_output to set the direction of the gpio. Fixes: 8c2c8c03cdcb ("spi: img-spfi: Control CS lines with GPIO") Signed-off-by: Sifan Naeem Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-img-spfi.c | 49 +++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 9fed299603321..557a398a4b8b4 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -105,6 +105,10 @@ struct img_spfi { bool rx_dma_busy; }; +struct img_spfi_device_data { + bool gpio_requested; +}; + static inline u32 spfi_readl(struct img_spfi *spfi, u32 reg) { return readl(spfi->regs + reg); @@ -441,20 +445,49 @@ static int img_spfi_unprepare(struct spi_master *master, static int img_spfi_setup(struct spi_device *spi) { int ret; - - ret = gpio_request_one(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? - GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH, - dev_name(&spi->dev)); - if (ret) - dev_err(&spi->dev, "can't request chipselect gpio %d\n", + struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi); + + if (!spfi_data) { + spfi_data = kzalloc(sizeof(*spfi_data), GFP_KERNEL); + if (!spfi_data) + return -ENOMEM; + spfi_data->gpio_requested = false; + spi_set_ctldata(spi, spfi_data); + } + if (!spfi_data->gpio_requested) { + ret = gpio_request_one(spi->cs_gpio, + (spi->mode & SPI_CS_HIGH) ? + GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH, + dev_name(&spi->dev)); + if (ret) + dev_err(&spi->dev, "can't request chipselect gpio %d\n", spi->cs_gpio); - + else + spfi_data->gpio_requested = true; + } else { + if (gpio_is_valid(spi->cs_gpio)) { + int mode = ((spi->mode & SPI_CS_HIGH) ? + GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH); + + ret = gpio_direction_output(spi->cs_gpio, mode); + if (ret) + dev_err(&spi->dev, "chipselect gpio %d setup failed (%d)\n", + spi->cs_gpio, ret); + } + } return ret; } static void img_spfi_cleanup(struct spi_device *spi) { - gpio_free(spi->cs_gpio); + struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi); + + if (spfi_data) { + if (spfi_data->gpio_requested) + gpio_free(spi->cs_gpio); + kfree(spfi_data); + spi_set_ctldata(spi, NULL); + } } static void img_spfi_config(struct spi_master *master, struct spi_device *spi, From e712a3824ba05f5edddab8066a148dfd548f0119 Mon Sep 17 00:00:00 2001 From: Sifan Naeem Date: Thu, 6 Aug 2015 10:33:01 +0100 Subject: [PATCH 0744/2091] spi: img-spfi: fix kbuild test robot warning commit 9176c6657b5c313cf504d157e6d91496ee5c8708 upstream. drivers/spi/spi-img-spfi.c: In function 'img_spfi_setup': drivers/spi/spi-img-spfi.c:446: warning: 'ret' may be used uninitialized in this function. Fixes: commit b03ba9e314c1 ("spi: img-spfi: fix multiple calls to request gpio") Signed-off-by: Sifan Naeem Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-img-spfi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 557a398a4b8b4..bb916c8d40db8 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -444,7 +444,7 @@ static int img_spfi_unprepare(struct spi_master *master, static int img_spfi_setup(struct spi_device *spi) { - int ret; + int ret = -EINVAL; struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi); if (!spfi_data) { From 574c2c05ce61098130a06bf6cd16c7215b19be58 Mon Sep 17 00:00:00 2001 From: Michael van der Westhuizen Date: Tue, 18 Aug 2015 22:21:53 +0200 Subject: [PATCH 0745/2091] spi: dw: Allow interface drivers to limit data I/O to word sizes commit c4fe57f76269dbb2af135071513f260ca40229a3 upstream. The commit dd11444327ce ("spi: dw-spi: Convert 16bit accesses to 32bit accesses") changed all 16bit accesses in the DW_apb_ssi driver to 32bit. This, unfortunately, breaks data register access on picoXcell, where the DW IP needs data register accesses to be word accesses (all other accesses appear to be OK). This change introduces a new master variable to allow interface drivers to specify that 16bit data transfer I/O is required. This change also introduces the ability to set this variable via device tree bindings in the MMIO interface driver. Both the core and the MMIO interface driver default to the current 32bit behaviour. Before this change, on a picoXcell pc3x3: spi_master spi32766: interrupt_transfer: fifo overrun/underrun m25p80 spi32766.0: error -5 reading 9f m25p80: probe of spi32766.0 failed with error -5 After this change: m25p80 spi32766.0: m25p40 (512 Kbytes) Fixes: dd11444327ce ("spi: dw-spi: Convert 16bit accesses to 32bit accesses") Signed-off-by: Michael van der Westhuizen Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-dw-mmio.c | 3 +++ drivers/spi/spi-dw.c | 4 ++-- drivers/spi/spi-dw.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index eb03e1215195b..7edede6e024ba 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -74,6 +74,9 @@ static int dw_spi_mmio_probe(struct platform_device *pdev) dws->max_freq = clk_get_rate(dwsmmio->clk); + of_property_read_u32(pdev->dev.of_node, "reg-io-width", + &dws->reg_io_width); + num_cs = 4; if (pdev->dev.of_node) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 8d67d03c71ebc..4fbfcdc5cb244 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -194,7 +194,7 @@ static void dw_writer(struct dw_spi *dws) else txw = *(u16 *)(dws->tx); } - dw_writel(dws, DW_SPI_DR, txw); + dw_write_io_reg(dws, DW_SPI_DR, txw); dws->tx += dws->n_bytes; } } @@ -205,7 +205,7 @@ static void dw_reader(struct dw_spi *dws) u16 rxw; while (max--) { - rxw = dw_readl(dws, DW_SPI_DR); + rxw = dw_read_io_reg(dws, DW_SPI_DR); /* Care rx only if the transfer's original "rx" is not null */ if (dws->rx_end - dws->len) { if (dws->n_bytes == 1) diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h index 6c91391c1a4f8..b75ed327d5a29 100644 --- a/drivers/spi/spi-dw.h +++ b/drivers/spi/spi-dw.h @@ -109,6 +109,7 @@ struct dw_spi { u32 fifo_len; /* depth of the FIFO buffer */ u32 max_freq; /* max bus freq supported */ + u32 reg_io_width; /* DR I/O width in bytes */ u16 bus_num; u16 num_cs; /* supported slave numbers */ @@ -145,11 +146,45 @@ static inline u32 dw_readl(struct dw_spi *dws, u32 offset) return __raw_readl(dws->regs + offset); } +static inline u16 dw_readw(struct dw_spi *dws, u32 offset) +{ + return __raw_readw(dws->regs + offset); +} + static inline void dw_writel(struct dw_spi *dws, u32 offset, u32 val) { __raw_writel(val, dws->regs + offset); } +static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val) +{ + __raw_writew(val, dws->regs + offset); +} + +static inline u32 dw_read_io_reg(struct dw_spi *dws, u32 offset) +{ + switch (dws->reg_io_width) { + case 2: + return dw_readw(dws, offset); + case 4: + default: + return dw_readl(dws, offset); + } +} + +static inline void dw_write_io_reg(struct dw_spi *dws, u32 offset, u32 val) +{ + switch (dws->reg_io_width) { + case 2: + dw_writew(dws, offset, val); + break; + case 4: + default: + dw_writel(dws, offset, val); + break; + } +} + static inline void spi_enable_chip(struct dw_spi *dws, int enable) { dw_writel(dws, DW_SPI_SSIENR, (enable ? 1 : 0)); From 0d57ef0f108b7a956f6cd246eda2777391b134e5 Mon Sep 17 00:00:00 2001 From: Philipp Hachtmann Date: Mon, 17 Aug 2015 17:31:46 +0200 Subject: [PATCH 0746/2091] USB: symbolserial: Use usb_get_serial_port_data commit 951d3793bbfc0a441d791d820183aa3085c83ea9 upstream. The driver used usb_get_serial_data(port->serial) which compiled but resulted in a NULL pointer being returned (and subsequently used). I did not go deeper into this but I guess this is a regression. Signed-off-by: Philipp Hachtmann Fixes: a85796ee5149 ("USB: symbolserial: move private-data allocation to port_probe") Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/symbolserial.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index 8fceec7298e00..6ed804450a5a6 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -94,7 +94,7 @@ static void symbol_int_callback(struct urb *urb) static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct symbol_private *priv = usb_get_serial_data(port->serial); + struct symbol_private *priv = usb_get_serial_port_data(port); unsigned long flags; int result = 0; @@ -120,7 +120,7 @@ static void symbol_close(struct usb_serial_port *port) static void symbol_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct symbol_private *priv = usb_get_serial_data(port->serial); + struct symbol_private *priv = usb_get_serial_port_data(port); spin_lock_irq(&priv->lock); priv->throttled = true; @@ -130,7 +130,7 @@ static void symbol_throttle(struct tty_struct *tty) static void symbol_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - struct symbol_private *priv = usb_get_serial_data(port->serial); + struct symbol_private *priv = usb_get_serial_port_data(port); int result; bool was_throttled; From 16b58e65e405a1b671caad2204fce9d46df26dd0 Mon Sep 17 00:00:00 2001 From: David Ward Date: Tue, 18 Aug 2015 10:36:23 +0200 Subject: [PATCH 0747/2091] USB: qcserial: add HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module commit 44840dec6127e4d7c5074f75d2dd96bc4ab85fe3 upstream. This is an HP-branded Sierra Wireless EM7355: https://bugzilla.redhat.com/show_bug.cgi?id=1223646#c2 Signed-off-by: David Ward Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index d156545728c2a..ebcec8cda8584 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -139,6 +139,7 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x0AF0, 0x8120)}, /* Option GTM681W */ /* non-Gobi Sierra Wireless devices */ + {DEVICE_SWI(0x03f0, 0x4e1d)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {DEVICE_SWI(0x0f3d, 0x68a2)}, /* Sierra Wireless MC7700 */ {DEVICE_SWI(0x114f, 0x68a2)}, /* Sierra Wireless MC7750 */ {DEVICE_SWI(0x1199, 0x68a2)}, /* Sierra Wireless MC7710 */ From 62e9fa636be500d83cae0b2de1ad91a98d6b986b Mon Sep 17 00:00:00 2001 From: Matthijs Kooijman Date: Tue, 18 Aug 2015 10:33:56 +0200 Subject: [PATCH 0748/2091] USB: ftdi_sio: Added custom PID for CustomWare products commit 1fb8dc36384ae1140ee6ccc470de74397606a9d5 upstream. CustomWare uses the FTDI VID with custom PIDs for their ShipModul MiniPlex products. Signed-off-by: Matthijs Kooijman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 4 ++++ drivers/usb/serial/ftdi_sio_ids.h | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4c8b3b82103d6..a5a0376bbd48c 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -605,6 +605,10 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2WI_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX3_PID) }, /* * ELV devices: */ diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 792e054126de5..2943b97b2a836 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -568,6 +568,14 @@ */ #define FTDI_SYNAPSE_SS200_PID 0x9090 /* SS200 - SNAP Stick 200 */ +/* + * CustomWare / ShipModul NMEA multiplexers product ids (FTDI_VID) + */ +#define FTDI_CUSTOMWARE_MINIPLEX_PID 0xfd48 /* MiniPlex first generation NMEA Multiplexer */ +#define FTDI_CUSTOMWARE_MINIPLEX2_PID 0xfd49 /* MiniPlex-USB and MiniPlex-2 series */ +#define FTDI_CUSTOMWARE_MINIPLEX2WI_PID 0xfd4a /* MiniPlex-2Wi */ +#define FTDI_CUSTOMWARE_MINIPLEX3_PID 0xfd4b /* MiniPlex-3 series */ + /********************************/ /** third-party VID/PID combos **/ From 3a6e0a041c7eea1455def429f97ca3ec3844b004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pecio?= Date: Sun, 26 Jul 2015 11:14:34 +0200 Subject: [PATCH 0749/2091] USB: pl2303: fix baud-rate divisor calculations commit 49bda21266fdf195142e8b5dea057f09e96ada9f upstream. This commit fixes the following issues: 1. The 9th bit of buf was believed to be the LSB of divisor's exponent, but the hardware interprets it as MSB (9th bit) of the mantissa. The exponent is actually one bit shorter and applies to base 4, not 2 as previously believed. 2. Loop iterations doubled the exponent instead of incrementing. 3. The exponent wasn't checked for overflow. 4. The function returned requested rate instead of actual rate. Due to issue #2, the old code deviated from the wrong formula described in #1 and actually yielded correct rates when divisor was lower than 4096 by using exponents of 0, 2 or 4 base-2, interpreted as 0, 1, 2 base-4 with the 9th mantissa bit clear. However, at 93.75 kbaud or less the rate turned out too slow due to #2 or too fast due to #2 and #3. I tested this patch by sending and validating 0x00,0x01,..,0xff to an FTDI dongle at 234, 987, 2401, 9601, 31415, 115199, 250k, 500k, 750k, 1M, 1.5M, 3M+1 baud. All rates passed. I also used pv to check speed at some rates unsupported by FTDI: 45 (the lowest possible), 2M, 4M, 5M and 6M-1. Looked sane. Signed-off-by: Michal Pecio Fixes: 399aa9a75ad3 ("USB: pl2303: use divisors for unsupported baud rates") [johan: update summary ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index f5257af33ecfb..ae682e4eeaef5 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -362,21 +362,38 @@ static speed_t pl2303_encode_baud_rate_direct(unsigned char buf[4], static speed_t pl2303_encode_baud_rate_divisor(unsigned char buf[4], speed_t baud) { - unsigned int tmp; + unsigned int baseline, mantissa, exponent; /* * Apparently the formula is: - * baudrate = 12M * 32 / (2^buf[1]) / buf[0] + * baudrate = 12M * 32 / (mantissa * 4^exponent) + * where + * mantissa = buf[8:0] + * exponent = buf[11:9] */ - tmp = 12000000 * 32 / baud; + baseline = 12000000 * 32; + mantissa = baseline / baud; + if (mantissa == 0) + mantissa = 1; /* Avoid dividing by zero if baud > 32*12M. */ + exponent = 0; + while (mantissa >= 512) { + if (exponent < 7) { + mantissa >>= 2; /* divide by 4 */ + exponent++; + } else { + /* Exponent is maxed. Trim mantissa and leave. */ + mantissa = 511; + break; + } + } + buf[3] = 0x80; buf[2] = 0; - buf[1] = (tmp >= 256); - while (tmp >= 256) { - tmp >>= 2; - buf[1] <<= 1; - } - buf[0] = tmp; + buf[1] = exponent << 1 | mantissa >> 8; + buf[0] = mantissa & 0xff; + + /* Calculate and return the exact baud rate. */ + baud = (baseline / mantissa) >> (exponent << 1); return baud; } From 16f47b6ab239970067885a9384c456c6b530bb12 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 19 Aug 2015 10:33:58 +1000 Subject: [PATCH 0750/2091] libxfs: readahead of dir3 data blocks should use the read verifier commit 2f123bce18943fff819bc10f8868ffb9149fc622 upstream. In the dir3 data block readahead function, use the regular read verifier to check the block's CRC and spot-check the block contents instead of directly calling only the spot-checking routine. This prevents corrupted directory data blocks from being read into the kernel, which can lead to garbage ls output and directory loops (if say one of the entries contains slashes and other junk). Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2_data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index de1ea16f57485..534bbf283d6bb 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify( return; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): - xfs_dir3_data_verify(bp); + bp->b_ops = &xfs_dir3_data_buf_ops; + bp->b_ops->verify_read(bp); return; default: xfs_buf_ioerror(bp, -EFSCORRUPTED); From 39516016709f4bf72d6e4c12089bbae7b1432ba7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Aug 2015 10:34:32 +1000 Subject: [PATCH 0751/2091] xfs: Fix xfs_attr_leafblock definition commit ffeecc5213024ae663377b442eedcfbacf6d0c5d upstream. struct xfs_attr_leafblock contains 'entries' array which is declared with size 1 altough it can in fact contain much more entries. Since this array is followed by further struct members, gcc (at least in version 4.8.3) thinks that the array has the fixed size of 1 element and thus may optimize away all accesses beyond the end of array resulting in non-working code. This problem was only observed with userspace code in xfsprogs, however it's better to be safe in kernel as well and have matching kernel and xfsprogs definitions. Signed-off-by: Jan Kara Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_da_format.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 74bcbabfa5232..b14bbd6bb05fa 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote { typedef struct xfs_attr_leafblock { xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ - xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ - xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ + /* + * The rest of the block contains the following structures after the + * leaf entries, growing from the bottom up. The variables are never + * referenced and definining them can actually make gcc optimize away + * accesses to the 'entries' array above index 0 so don't do that. + * + * xfs_attr_leaf_name_local_t namelist; + * xfs_attr_leaf_name_remote_t valuelist; + */ } xfs_attr_leafblock_t; /* From 533f267763456f75d57bd590830629bf28fe3d8c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Aug 2015 10:05:13 +1000 Subject: [PATCH 0752/2091] xfs: Fix file type directory corruption for btree directories commit 037542345a82aaaa228ec280fe6ddff1568d169f upstream. Users have occasionally reported that file type for some directory entries is wrong. This mostly happened after updating libraries some libraries. After some debugging the problem was traced down to xfs_dir2_node_replace(). The function uses args->filetype as a file type to store in the replaced directory entry however it also calls xfs_da3_node_lookup_int() which will store file type of the current directory entry in args->filetype. Thus we fail to change file type of a directory entry to a proper type. Fix the problem by storing new file type in a local variable before calling xfs_da3_node_lookup_int(). Reported-by: Giacomo Comes Signed-off-by: Jan Kara Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2_node.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 41b80d3d38772..06bb4218b3625 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -2132,6 +2132,7 @@ xfs_dir2_node_replace( int error; /* error return value */ int i; /* btree level */ xfs_ino_t inum; /* new inode number */ + int ftype; /* new file type */ xfs_dir2_leaf_t *leaf; /* leaf structure */ xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ int rval; /* internal return value */ @@ -2145,7 +2146,14 @@ xfs_dir2_node_replace( state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; + + /* + * We have to save new inode number and ftype since + * xfs_da3_node_lookup_int() is going to overwrite them + */ inum = args->inumber; + ftype = args->filetype; + /* * Lookup the entry to change in the btree. */ @@ -2183,7 +2191,7 @@ xfs_dir2_node_replace( * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); - args->dp->d_ops->data_put_ftype(dep, args->filetype); + args->dp->d_ops->data_put_ftype(dep, ftype); xfs_dir2_data_log_entry(args, state->extrablk.bp, dep); rval = 0; } From 6e9660ca6fbf805c26aaef8856eb4680c689a5d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jul 2015 00:30:58 +0300 Subject: [PATCH 0753/2091] usb: gadget: m66592-udc: forever loop in set_feature() commit 5feb5d2003499b1094d898c010a7604d7afddc4c upstream. There is an "&&" vs "||" typo here so this loops 3000 times or if we get unlucky it could loop forever. Fixes: ceaa0a6eeadf ('usb: gadget: m66592-udc: add support for TEST_MODE') Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/m66592-udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/m66592-udc.c b/drivers/usb/gadget/udc/m66592-udc.c index 309706fe4bf0a..9704053dfe053 100644 --- a/drivers/usb/gadget/udc/m66592-udc.c +++ b/drivers/usb/gadget/udc/m66592-udc.c @@ -1052,7 +1052,7 @@ static void set_feature(struct m66592 *m66592, struct usb_ctrlrequest *ctrl) tmp = m66592_read(m66592, M66592_INTSTS0) & M66592_CTSQ; udelay(1); - } while (tmp != M66592_CS_IDST || timeout-- > 0); + } while (tmp != M66592_CS_IDST && timeout-- > 0); if (tmp == M66592_CS_IDST) m66592_bset(m66592, From bd89ac2a5d7760cd739eb228214295862bfb64f9 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 31 Jul 2015 16:36:30 +0800 Subject: [PATCH 0754/2091] doc: usb: gadget-testing: using the updated testusb.c commit f811a38300be3cdb603171aea5ad3fb42b71ca53 upstream. testusb.c at http://www.linux-usb.org/usbtest/ is out of date, using the one at the kernel source folder. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget-testing.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt index f45b2bf4b41de..820664af8f6a4 100644 --- a/Documentation/usb/gadget-testing.txt +++ b/Documentation/usb/gadget-testing.txt @@ -237,9 +237,7 @@ Testing the LOOPBACK function ----------------------------- device: run the gadget -host: test-usb - -http://www.linux-usb.org/usbtest/testusb.c +host: test-usb (tools/usb/testusb.c) 8. MASS STORAGE function ======================== @@ -588,9 +586,8 @@ Testing the SOURCESINK function ------------------------------- device: run the gadget -host: test-usb +host: test-usb (tools/usb/testusb.c) -http://www.linux-usb.org/usbtest/testusb.c 16. UAC1 function ================= From d408e5eccd26f1efd97fdbd568f3fa6b6f95e2a0 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Jul 2015 12:25:27 +0530 Subject: [PATCH 0755/2091] usb: dwc3: ep0: Fix mem corruption on OUT transfers of more than 512 bytes commit b2fb5b1a0f50d3ebc12342c8d8dead245e9c9d4e upstream. DWC3 uses bounce buffer to handle non max packet aligned OUT transfers and the size of bounce buffer is 512 bytes. However if the host initiates OUT transfers of size more than 512 bytes (and non max packet aligned), the driver throws a WARN dump but still programs the TRB to receive more than 512 bytes. This will cause bounce buffer to overflow and corrupt the adjacent memory locations which can be fatal. Fix it by programming the TRB to receive a maximum of DWC3_EP0_BOUNCE_SIZE (512) bytes. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 69e769c35cf5d..06ecd1e6871ce 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -820,6 +820,11 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, unsigned maxp = ep0->endpoint.maxpacket; transfer_size += (maxp - (transfer_size % maxp)); + + /* Maximum of DWC3_EP0_BOUNCE_SIZE can only be received */ + if (transfer_size > DWC3_EP0_BOUNCE_SIZE) + transfer_size = DWC3_EP0_BOUNCE_SIZE; + transferred = min_t(u32, ur->length, transfer_size - length); memcpy(ur->buf, dwc->ep0_bounce, transferred); @@ -941,11 +946,14 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, return; } - WARN_ON(req->request.length > DWC3_EP0_BOUNCE_SIZE); - maxpacket = dep->endpoint.maxpacket; transfer_size = roundup(req->request.length, maxpacket); + if (transfer_size > DWC3_EP0_BOUNCE_SIZE) { + dev_WARN(dwc->dev, "bounce buf can't handle req len\n"); + transfer_size = DWC3_EP0_BOUNCE_SIZE; + } + dwc->ep0_bounced = true; /* From 08ccc8bb7d2a9cf6543e686b43b59f888f4782e9 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 30 Jul 2015 13:13:03 +0800 Subject: [PATCH 0756/2091] usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth commit 913e4a90b6f9687ac0f543e7b632753e4f51c441 upstream. According to USB Audio Device 2.0 Spec, Ch4.10.1.1: wMaxPacketSize is defined as follows: Maximum packet size this endpoint is capable of sending or receiving when this configuration is selected. This is determined by the audio bandwidth constraints of the endpoint. In current code, the wMaxPacketSize is defined as the maximum packet size for ISO endpoint, and it will let the host reserve much more space than it really needs, so that we can't let more endpoints work together at one frame. We find this issue when we try to let 4 f_uac2 gadgets work together [1] at FS connection. [1]http://www.spinics.net/lists/linux-usb/msg123478.html Acked-by: Daniel Mack Cc: andrzej.p@samsung.com Cc: Daniel Mack Cc: tiwai@suse.de Cc: Alan Stern Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 5318615472533..96d935b00504c 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -975,6 +975,29 @@ free_ep(struct uac2_rtd_params *prm, struct usb_ep *ep) "%s:%d Error!\n", __func__, __LINE__); } +static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, + struct usb_endpoint_descriptor *ep_desc, + unsigned int factor, bool is_playback) +{ + int chmask, srate, ssize; + u16 max_packet_size; + + if (is_playback) { + chmask = uac2_opts->p_chmask; + srate = uac2_opts->p_srate; + ssize = uac2_opts->p_ssize; + } else { + chmask = uac2_opts->c_chmask; + srate = uac2_opts->c_srate; + ssize = uac2_opts->c_ssize; + } + + max_packet_size = num_channels(chmask) * ssize * + DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); + ep_desc->wMaxPacketSize = cpu_to_le16(min(max_packet_size, + le16_to_cpu(ep_desc->wMaxPacketSize))); +} + static int afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) { @@ -1070,10 +1093,14 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) uac2->p_prm.uac2 = uac2; uac2->c_prm.uac2 = uac2; + /* Calculate wMaxPacketSize according to audio bandwidth */ + set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true); + set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false); + set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true); + set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false); + hs_epout_desc.bEndpointAddress = fs_epout_desc.bEndpointAddress; - hs_epout_desc.wMaxPacketSize = fs_epout_desc.wMaxPacketSize; hs_epin_desc.bEndpointAddress = fs_epin_desc.bEndpointAddress; - hs_epin_desc.wMaxPacketSize = fs_epin_desc.wMaxPacketSize; ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL); if (ret) From bf14cc44d1adfe4d2d3420b51f6f44765e0219ff Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 17 Aug 2015 10:23:03 +0800 Subject: [PATCH 0757/2091] usb: host: ehci-sys: delete useless bus_to_hcd conversion commit 0521cfd06e1ebcd575e7ae36aab068b38df23850 upstream. The ehci platform device's drvdata is the pointer of struct usb_hcd already, so we doesn't need to call bus_to_hcd conversion again. Signed-off-by: Peter Chen Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-sysfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-sysfs.c b/drivers/usb/host/ehci-sysfs.c index 5e44407aa0997..5216f2b09d633 100644 --- a/drivers/usb/host/ehci-sysfs.c +++ b/drivers/usb/host/ehci-sysfs.c @@ -29,7 +29,7 @@ static ssize_t show_companion(struct device *dev, int count = PAGE_SIZE; char *ptr = buf; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); nports = HCS_N_PORTS(ehci->hcs_params); for (index = 0; index < nports; ++index) { @@ -54,7 +54,7 @@ static ssize_t store_companion(struct device *dev, struct ehci_hcd *ehci; int portnum, new_owner; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); new_owner = PORT_OWNER; /* Owned by companion */ if (sscanf(buf, "%d", &portnum) != 1) return -EINVAL; @@ -85,7 +85,7 @@ static ssize_t show_uframe_periodic_max(struct device *dev, struct ehci_hcd *ehci; int n; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); n = scnprintf(buf, PAGE_SIZE, "%d\n", ehci->uframe_periodic_max); return n; } @@ -101,7 +101,7 @@ static ssize_t store_uframe_periodic_max(struct device *dev, unsigned long flags; ssize_t ret; - ehci = hcd_to_ehci(bus_to_hcd(dev_get_drvdata(dev))); + ehci = hcd_to_ehci(dev_get_drvdata(dev)); if (kstrtouint(buf, 0, &uframe_periodic_max) < 0) return -EINVAL; From c7d416e290ded6f563c0d316787037c79e268a07 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 6 Aug 2015 09:16:37 +0200 Subject: [PATCH 0758/2091] tty: serial: men_z135_uart.c: Fix race between IRQ and set_termios() commit 8117e347406278fd399b077add4e638cd017ae2d upstream. Fix panic caused by a race between men_z135_intr() and men_z135_set_termios(). men_z135_intr() and men_z135_set_termios() both hold the struct uart_port::lock spinlock, but men_z135_intr() does a spin_lock_irqsave() and men_z135_set_termios() does a normal spin_lock(), which can lead to a deadlock when an interrupt is called while the lock is being helt by men_z135_set_termios(). This was discovered using a insmod, hardware looppback send/receive, rmmod stress test. Signed-off-by: Johannes Thumshirn Reviewed-by: Peter Hurley Cc: Andreas Werner Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/men_z135_uart.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c index 35c55505b3eb3..5a41b8fbb10a8 100644 --- a/drivers/tty/serial/men_z135_uart.c +++ b/drivers/tty/serial/men_z135_uart.c @@ -392,7 +392,6 @@ static irqreturn_t men_z135_intr(int irq, void *data) struct men_z135_port *uart = (struct men_z135_port *)data; struct uart_port *port = &uart->port; bool handled = false; - unsigned long flags; int irq_id; uart->stat_reg = ioread32(port->membase + MEN_Z135_STAT_REG); @@ -401,7 +400,7 @@ static irqreturn_t men_z135_intr(int irq, void *data) if (!irq_id) goto out; - spin_lock_irqsave(&port->lock, flags); + spin_lock(&port->lock); /* It's save to write to IIR[7:6] RXC[9:8] */ iowrite8(irq_id, port->membase + MEN_Z135_STAT_REG); @@ -427,7 +426,7 @@ static irqreturn_t men_z135_intr(int irq, void *data) handled = true; } - spin_unlock_irqrestore(&port->lock, flags); + spin_unlock(&port->lock); out: return IRQ_RETVAL(handled); } @@ -717,7 +716,7 @@ static void men_z135_set_termios(struct uart_port *port, baud = uart_get_baud_rate(port, termios, old, 0, uart_freq / 16); - spin_lock(&port->lock); + spin_lock_irq(&port->lock); if (tty_termios_baud_rate(termios)) tty_termios_encode_baud_rate(termios, baud, baud); @@ -725,7 +724,7 @@ static void men_z135_set_termios(struct uart_port *port, iowrite32(bd_reg, port->membase + MEN_Z135_BAUD_REG); uart_update_timeout(port, termios->c_cflag, baud); - spin_unlock(&port->lock); + spin_unlock_irq(&port->lock); } static const char *men_z135_type(struct uart_port *port) From 59358f278d5c3ba3b2bd14d83ded72a2faafb51b Mon Sep 17 00:00:00 2001 From: John Lin Date: Tue, 11 Aug 2015 14:27:25 +0800 Subject: [PATCH 0759/2091] ASoC: rt5640: fix line out no sound issue commit 9b850ca4f1c5acd7fcbbd4b38a2d27132801a8d5 upstream. The power for line out was not turned on when line out is enabled. So we add "LOUT amp" widget to turn on the power for line out. Signed-off-by: John Lin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5640.c | 40 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 178e55d4d4814..06317f7d945f0 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -985,6 +985,35 @@ static int rt5640_hp_event(struct snd_soc_dapm_widget *w, return 0; } +static int rt5640_lout_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + hp_amp_power_on(codec); + snd_soc_update_bits(codec, RT5640_PWR_ANLG1, + RT5640_PWR_LM, RT5640_PWR_LM); + snd_soc_update_bits(codec, RT5640_OUTPUT, + RT5640_L_MUTE | RT5640_R_MUTE, 0); + break; + + case SND_SOC_DAPM_PRE_PMD: + snd_soc_update_bits(codec, RT5640_OUTPUT, + RT5640_L_MUTE | RT5640_R_MUTE, + RT5640_L_MUTE | RT5640_R_MUTE); + snd_soc_update_bits(codec, RT5640_PWR_ANLG1, + RT5640_PWR_LM, 0); + break; + + default: + return 0; + } + + return 0; +} + static int rt5640_hp_power_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -1180,13 +1209,16 @@ static const struct snd_soc_dapm_widget rt5640_dapm_widgets[] = { 0, rt5640_spo_l_mix, ARRAY_SIZE(rt5640_spo_l_mix)), SND_SOC_DAPM_MIXER("SPOR MIX", SND_SOC_NOPM, 0, 0, rt5640_spo_r_mix, ARRAY_SIZE(rt5640_spo_r_mix)), - SND_SOC_DAPM_MIXER("LOUT MIX", RT5640_PWR_ANLG1, RT5640_PWR_LM_BIT, 0, + SND_SOC_DAPM_MIXER("LOUT MIX", SND_SOC_NOPM, 0, 0, rt5640_lout_mix, ARRAY_SIZE(rt5640_lout_mix)), SND_SOC_DAPM_SUPPLY_S("Improve HP Amp Drv", 1, SND_SOC_NOPM, 0, 0, rt5640_hp_power_event, SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_PGA_S("HP Amp", 1, SND_SOC_NOPM, 0, 0, rt5640_hp_event, SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_PGA_S("LOUT amp", 1, SND_SOC_NOPM, 0, 0, + rt5640_lout_event, + SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("HP L Amp", RT5640_PWR_ANLG1, RT5640_PWR_HP_L_BIT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("HP R Amp", RT5640_PWR_ANLG1, @@ -1501,8 +1533,10 @@ static const struct snd_soc_dapm_route rt5640_dapm_routes[] = { {"HP R Playback", "Switch", "HP Amp"}, {"HPOL", NULL, "HP L Playback"}, {"HPOR", NULL, "HP R Playback"}, - {"LOUTL", NULL, "LOUT MIX"}, - {"LOUTR", NULL, "LOUT MIX"}, + + {"LOUT amp", NULL, "LOUT MIX"}, + {"LOUTL", NULL, "LOUT amp"}, + {"LOUTR", NULL, "LOUT amp"}, }; static const struct snd_soc_dapm_route rt5640_specific_dapm_routes[] = { From b167a60c024decd22fd48459f24225cb6f92ab05 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Thu, 20 Aug 2015 22:11:15 +0530 Subject: [PATCH 0760/2091] ASoC: samsung: Remove redundant arndale_audio_remove commit 14a500fe1396934c6b3ed8f009459a4723da7862 upstream. There is no use of snd_soc_unregister_card in remove function as devm_snd_soc_register_card in probe function automatically handles it. So, remove use of snd_soc_unregister_card and with this change remove arndale_audio_remove as it is now redundant. Signed-off-by: Vaishali Thakkar Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/samsung/arndale_rt5631.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sound/soc/samsung/arndale_rt5631.c b/sound/soc/samsung/arndale_rt5631.c index 8bf2e2c4bafb9..9e371eb3e4faf 100644 --- a/sound/soc/samsung/arndale_rt5631.c +++ b/sound/soc/samsung/arndale_rt5631.c @@ -116,15 +116,6 @@ static int arndale_audio_probe(struct platform_device *pdev) return ret; } -static int arndale_audio_remove(struct platform_device *pdev) -{ - struct snd_soc_card *card = platform_get_drvdata(pdev); - - snd_soc_unregister_card(card); - - return 0; -} - static const struct of_device_id samsung_arndale_rt5631_of_match[] __maybe_unused = { { .compatible = "samsung,arndale-rt5631", }, { .compatible = "samsung,arndale-alc5631", }, @@ -139,7 +130,6 @@ static struct platform_driver arndale_audio_driver = { .of_match_table = of_match_ptr(samsung_arndale_rt5631_of_match), }, .probe = arndale_audio_probe, - .remove = arndale_audio_remove, }; module_platform_driver(arndale_audio_driver); From 3f3c08dd0dc598f0cfe6efb8de17139c03309ea7 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 14 Aug 2015 17:54:07 +0800 Subject: [PATCH 0761/2091] ASoC: adav80x: Remove .read_flag_mask setting from adav80x_regmap_config commit 9d8352864907f0ad76124c5b28f65b5a382d7d7c upstream. Don't set .read_flag_mask for adav803, it's for adav801 only. Fixes: 0c2d69645628 ("ASoC: adav80x: Split SPI and I2C code into different modules") Signed-off-by: Axel Lin Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/adav80x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/adav80x.c b/sound/soc/codecs/adav80x.c index 4373ada95648e..3a91a00fb9736 100644 --- a/sound/soc/codecs/adav80x.c +++ b/sound/soc/codecs/adav80x.c @@ -864,7 +864,6 @@ const struct regmap_config adav80x_regmap_config = { .val_bits = 8, .pad_bits = 1, .reg_bits = 7, - .read_flag_mask = 0x01, .max_register = ADAV80X_PLL_OUTE, From d23f2688b991310d4f27d4abaf7769ea301c9a91 Mon Sep 17 00:00:00 2001 From: Nikesh Oswal Date: Wed, 19 Aug 2015 16:02:24 +0100 Subject: [PATCH 0762/2091] ASoC: arizona: Fix gain settings of FLL in free-run mode commit 1cf5a330c05ae37a0a98ac7c9800a6f50d5579ec upstream. The wrong register was used to set the gain of ref loop, when changing the FLL output on an active FLL. This patch corrects the offset of the gain register. Signed-off-by: Nikesh Oswal Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/arizona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index eff4b4d512b7b..92b813b74014b 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1912,7 +1912,7 @@ static int arizona_enable_fll(struct arizona_fll *fll) if (already_enabled) { /* Facilitate smooth refclk across the transition */ - regmap_update_bits_async(fll->arizona->regmap, fll->base + 0x7, + regmap_update_bits_async(fll->arizona->regmap, fll->base + 0x9, ARIZONA_FLL1_GAIN_MASK, 0); regmap_update_bits_async(fll->arizona->regmap, fll->base + 1, ARIZONA_FLL1_FREERUN, From b3100c898181590cabc652a925d268c78a69b77a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 25 Aug 2015 12:43:48 +0100 Subject: [PATCH 0763/2091] ASoC: arizona: Poll for FLL clock OK rather than use interrupts commit 0e7659712836ca59b4735bc5cc94de38698a5e01 upstream. The extcon driver takes the DAPM mutex from within the interrupt thread in several places, which makes it possible to get into a situation where the interrupt thread is blocked waiting on the DAPM mutex whilst a DAPM sequence is running which is attempting to configure the FLL. In this case the FLL completion can't be completed as as the IRQ handler is ONE_SHOT, which cause the FLL lock to use the full time out (250mS) and report that the process timed out. It is not really practical to make the extcon driver not take the DAPM mutex from within the interrupt thread, at least not without extensive modification. So this patch fixes the issue by switching the wait for the FLL lock to polling. A few fast polls are done first as the FLL should lock quickly for a good quality reference clock, (indeed it hits on the first poll on my system) and it will poll every 20mS after that until it times out. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/arizona.c | 47 +++++++++++++++----------------------- sound/soc/codecs/arizona.h | 1 - 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 92b813b74014b..ee91edcf3cb0d 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1610,17 +1610,6 @@ int arizona_init_dai(struct arizona_priv *priv, int id) } EXPORT_SYMBOL_GPL(arizona_init_dai); -static irqreturn_t arizona_fll_clock_ok(int irq, void *data) -{ - struct arizona_fll *fll = data; - - arizona_fll_dbg(fll, "clock OK\n"); - - complete(&fll->ok); - - return IRQ_HANDLED; -} - static struct { unsigned int min; unsigned int max; @@ -1902,10 +1891,11 @@ static int arizona_is_enabled_fll(struct arizona_fll *fll) static int arizona_enable_fll(struct arizona_fll *fll) { struct arizona *arizona = fll->arizona; - unsigned long time_left; bool use_sync = false; int already_enabled = arizona_is_enabled_fll(fll); struct arizona_fll_cfg cfg; + int i; + unsigned int val; if (already_enabled < 0) return already_enabled; @@ -1964,9 +1954,6 @@ static int arizona_enable_fll(struct arizona_fll *fll) if (!already_enabled) pm_runtime_get(arizona->dev); - /* Clear any pending completions */ - try_wait_for_completion(&fll->ok); - regmap_update_bits_async(arizona->regmap, fll->base + 1, ARIZONA_FLL1_ENA, ARIZONA_FLL1_ENA); if (use_sync) @@ -1978,10 +1965,24 @@ static int arizona_enable_fll(struct arizona_fll *fll) regmap_update_bits_async(arizona->regmap, fll->base + 1, ARIZONA_FLL1_FREERUN, 0); - time_left = wait_for_completion_timeout(&fll->ok, - msecs_to_jiffies(250)); - if (time_left == 0) + arizona_fll_dbg(fll, "Waiting for FLL lock...\n"); + val = 0; + for (i = 0; i < 15; i++) { + if (i < 5) + usleep_range(200, 400); + else + msleep(20); + + regmap_read(arizona->regmap, + ARIZONA_INTERRUPT_RAW_STATUS_5, + &val); + if (val & (ARIZONA_FLL1_CLOCK_OK_STS << (fll->id - 1))) + break; + } + if (i == 15) arizona_fll_warn(fll, "Timed out waiting for lock\n"); + else + arizona_fll_dbg(fll, "FLL locked (%d polls)\n", i); return 0; } @@ -2066,11 +2067,8 @@ EXPORT_SYMBOL_GPL(arizona_set_fll); int arizona_init_fll(struct arizona *arizona, int id, int base, int lock_irq, int ok_irq, struct arizona_fll *fll) { - int ret; unsigned int val; - init_completion(&fll->ok); - fll->id = id; fll->base = base; fll->arizona = arizona; @@ -2092,13 +2090,6 @@ int arizona_init_fll(struct arizona *arizona, int id, int base, int lock_irq, snprintf(fll->clock_ok_name, sizeof(fll->clock_ok_name), "FLL%d clock OK", id); - ret = arizona_request_irq(arizona, ok_irq, fll->clock_ok_name, - arizona_fll_clock_ok, fll); - if (ret != 0) { - dev_err(arizona->dev, "Failed to get FLL%d clock OK IRQ: %d\n", - id, ret); - } - regmap_update_bits(arizona->regmap, fll->base + 1, ARIZONA_FLL1_FREERUN, 0); diff --git a/sound/soc/codecs/arizona.h b/sound/soc/codecs/arizona.h index 11ff899b02724..14e8485b55858 100644 --- a/sound/soc/codecs/arizona.h +++ b/sound/soc/codecs/arizona.h @@ -233,7 +233,6 @@ struct arizona_fll { int id; unsigned int base; unsigned int vco_mult; - struct completion ok; unsigned int fout; int sync_src; From 6c5f039655a41ba80e5e64ebb2b7032369f16ee1 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sun, 2 Aug 2015 23:11:52 +0200 Subject: [PATCH 0764/2091] serial: 8250: don't bind to SMSC IrCC IR port commit ffa34de03bcfbfa88d8352942bc238bb48e94e2d upstream. SMSC IrCC SIR/FIR port should not be bound to by (legacy) serial driver so its own driver (smsc-ircc2) can bind to it. Signed-off-by: Maciej Szmigiero Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pnp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index 50a09cd76d50a..61fd3d3db676c 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -364,6 +364,11 @@ static const struct pnp_device_id pnp_dev_table[] = { /* Winbond CIR port, should not be probed. We should keep track of it to prevent the legacy serial driver from probing it */ { "WEC1022", CIR_PORT }, + /* + * SMSC IrCC SIR/FIR port, should not be probed by serial driver + * as well so its own driver can bind to it. + */ + { "SMCF010", CIR_PORT }, { "", 0 } }; From 72b188506229b25c075a6573b0a6b54b5a14947d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sun, 2 Aug 2015 23:15:05 +0200 Subject: [PATCH 0765/2091] serial: 8250: bind to ALi Fast Infrared Controller (ALI5123) commit 1d7002777a8fe8188caaa98d4a8eb4ed298fcdae upstream. This way this device can be used with irtty-sir - at least on Toshiba Satellite A20-S103 it is not configured by default and needs PNP activation before it starts to respond on I/O ports. This device has actually its own driver (ali-ircc), but this driver seems to be non-functional for a very long time (see http://permalink.gmane.org/gmane.linux.irda.general/484 http://permalink.gmane.org/gmane.network.protocols.obex.openobex.user/943 https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=535070 ). Signed-off-by: Maciej Szmigiero Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_pnp.c | 1 + drivers/tty/serial/8250/8250_pnp.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c index ff6d8adc9cda6..fb765524cc3d5 100644 --- a/drivers/acpi/acpi_pnp.c +++ b/drivers/acpi/acpi_pnp.c @@ -153,6 +153,7 @@ static const struct acpi_device_id acpi_pnp_device_ids[] = { {"AEI0250"}, /* PROLiNK 1456VH ISA PnP K56flex Fax Modem */ {"AEI1240"}, /* Actiontec ISA PNP 56K X2 Fax Modem */ {"AKY1021"}, /* Rockwell 56K ACF II Fax+Data+Voice Modem */ + {"ALI5123"}, /* ALi Fast Infrared Controller */ {"AZT4001"}, /* AZT3005 PnP SOUND DEVICE */ {"BDP3336"}, /* Best Data Products Inc. Smart One 336F PnP Modem */ {"BRI0A49"}, /* Boca Complete Ofc Communicator 14.4 Data-FAX */ diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index 61fd3d3db676c..658b392d1170d 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -41,6 +41,12 @@ static const struct pnp_device_id pnp_dev_table[] = { { "AEI1240", 0 }, /* Rockwell 56K ACF II Fax+Data+Voice Modem */ { "AKY1021", 0 /*SPCI_FL_NO_SHIRQ*/ }, + /* + * ALi Fast Infrared Controller + * Native driver (ali-ircc) is broken so at least + * it can be used with irtty-sir. + */ + { "ALI5123", 0 }, /* AZT3005 PnP SOUND DEVICE */ { "AZT4001", 0 }, /* Best Data Products Inc. Smart One 336F PnP Modem */ From 5fe45c8dbe3f1927db62fbbbc37b64cbe41967b4 Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Mon, 3 Aug 2015 13:28:13 +0800 Subject: [PATCH 0766/2091] serial: 8250_pci: Add support for Pericom PI7C9X795[1248] commit 89c043a6cb2d4525d48a38ed78d5f0f5672338b3 upstream. Pericom PI7C9X795[1248] are Uno/Dual/Quad/Octal UART devices, this patch enables them, also defines PCI_VENDOR_ID_PERICOM here. Signed-off-by: Adam Lee Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 82 ++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 46bcebba54b2f..9373cca121d33 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2000,6 +2000,12 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 +#define PCI_VENDOR_ID_PERICOM 0x12D8 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7951 0x7951 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7952 0x7952 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7954 0x7954 +#define PCI_DEVICE_ID_PERICOM_PI7C9X7958 0x7958 + /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 @@ -2314,27 +2320,12 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { * Pericom */ { - .vendor = 0x12d8, - .device = 0x7952, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_pericom_setup, - }, - { - .vendor = 0x12d8, - .device = 0x7954, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_pericom_setup, - }, - { - .vendor = 0x12d8, - .device = 0x7958, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_pericom_setup, + .vendor = PCI_VENDOR_ID_PERICOM, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup, }, - /* * PLX */ @@ -3031,6 +3022,10 @@ enum pci_board_num_t { pbn_fintek_8, pbn_fintek_12, pbn_wch384_4, + pbn_pericom_PI7C9X7951, + pbn_pericom_PI7C9X7952, + pbn_pericom_PI7C9X7954, + pbn_pericom_PI7C9X7958, }; /* @@ -3848,7 +3843,6 @@ static struct pciserial_board pci_boards[] = { .base_baud = 115200, .first_offset = 0x40, }, - [pbn_wch384_4] = { .flags = FL_BASE0, .num_ports = 4, @@ -3856,6 +3850,33 @@ static struct pciserial_board pci_boards[] = { .uart_offset = 8, .first_offset = 0xC0, }, + /* + * Pericom PI7C9X795[1248] Uno/Dual/Quad/Octal UART + */ + [pbn_pericom_PI7C9X7951] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 0x8, + }, + [pbn_pericom_PI7C9X7952] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 921600, + .uart_offset = 0x8, + }, + [pbn_pericom_PI7C9X7954] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 921600, + .uart_offset = 0x8, + }, + [pbn_pericom_PI7C9X7958] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 921600, + .uart_offset = 0x8, + }, }; static const struct pci_device_id blacklist[] = { @@ -5116,6 +5137,25 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17V8358 }, + /* + * Pericom PI7C9X795[1248] Uno/Dual/Quad/Octal UART + */ + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7951, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7951 }, + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7952, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7952 }, + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7954, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_PERICOM, PCI_DEVICE_ID_PERICOM_PI7C9X7958, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_pericom_PI7C9X7958 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ From da5b2f0abeaa4816e8dbc8dffef33704f53db1b3 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 31 Jul 2015 10:58:27 +0200 Subject: [PATCH 0767/2091] serial: samsung: fix DMA mode enter condition for small FIFO sizes commit 81ccb2a69f76b88295a1da9fc9484df715fe3bfa upstream. Due to some of serial ports can have FIFO size smaller than cache line size, and because of need to align DMA buffer address to cache line size, it's necessary to calculate minimum number of bytes for which we want to start DMA transaction to be at least cache line size. The simplest way to meet this requirement is to get maximum of cache line size and FIFO size. Reported-by: Krzysztof Kozlowski Signed-off-by: Marek Szyprowski Signed-off-by: Robert Baldyga Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 13 +++++++++++-- drivers/tty/serial/samsung.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index a0ae942d9562d..a8aeab3532e71 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -342,7 +342,8 @@ static void s3c24xx_serial_start_next_tx(struct s3c24xx_uart_port *ourport) return; } - if (!ourport->dma || !ourport->dma->tx_chan || count < port->fifosize) + if (!ourport->dma || !ourport->dma->tx_chan || + count < ourport->min_dma_size) s3c24xx_serial_start_tx_pio(ourport); else s3c24xx_serial_start_tx_dma(ourport, count); @@ -742,7 +743,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); - if (ourport->dma && ourport->dma->tx_chan && count >= port->fifosize) { + if (ourport->dma && ourport->dma->tx_chan && + count >= ourport->min_dma_size) { s3c24xx_serial_start_tx_dma(ourport, count); goto out; } @@ -1838,6 +1840,13 @@ static int s3c24xx_serial_probe(struct platform_device *pdev) else if (ourport->info->fifosize) ourport->port.fifosize = ourport->info->fifosize; + /* + * DMA transfers must be aligned at least to cache line size, + * so find minimal transfer size suitable for DMA mode + */ + ourport->min_dma_size = max_t(int, ourport->port.fifosize, + dma_get_cache_alignment()); + probe_index++; dbg("%s: initialising port %p...\n", __func__, ourport); diff --git a/drivers/tty/serial/samsung.h b/drivers/tty/serial/samsung.h index d275032aa68d4..fc5deaa4f382d 100644 --- a/drivers/tty/serial/samsung.h +++ b/drivers/tty/serial/samsung.h @@ -82,6 +82,7 @@ struct s3c24xx_uart_port { unsigned char tx_claimed; unsigned int pm_level; unsigned long baudclk_rate; + unsigned int min_dma_size; unsigned int rx_irq; unsigned int tx_irq; From 9feb2d70d3bc561c900e0d976d7700306f4806a4 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Fri, 31 Jul 2015 10:58:28 +0200 Subject: [PATCH 0768/2091] serial: samsung: fix DMA for FIFO smaller than cache line size commit 736cd79f483fd7a1e0b71e6eaddf01d8d87fbbbb upstream. So far DMA mode were activated when only number of bytes to send was equal or greater than min_dma_size. Due to requirement that DMA transaction buffer should be aligned to cache line size, the excessive bytes were written to FIFO before starting DMA transaction. The problem occurred when FIFO size were smaller than cache alignment, because writing all excessive bytes to FIFO would fail. It happened in DMA mode when PIO interrupts disabled, which caused driver hung. The solution is to test if buffer is alligned to cache line size before activating DMA mode, and if it's not, running PIO mode to align buffer and then starting DMA transaction. In PIO mode, when interrupts are enabled, lack of space in FIFO isn't the problem, so buffer aligning will always finish with success. Reported-by: Krzysztof Kozlowski Signed-off-by: Robert Baldyga Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index a8aeab3532e71..1e0d9b8c48c93 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -295,15 +295,6 @@ static int s3c24xx_serial_start_tx_dma(struct s3c24xx_uart_port *ourport, if (ourport->tx_mode != S3C24XX_TX_DMA) enable_tx_dma(ourport); - while (xmit->tail & (dma_get_cache_alignment() - 1)) { - if (rd_regl(port, S3C2410_UFSTAT) & ourport->info->tx_fifofull) - return 0; - wr_regb(port, S3C2410_UTXH, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - count--; - } - dma->tx_size = count & ~(dma_get_cache_alignment() - 1); dma->tx_transfer_addr = dma->tx_addr + xmit->tail; @@ -343,7 +334,8 @@ static void s3c24xx_serial_start_next_tx(struct s3c24xx_uart_port *ourport) } if (!ourport->dma || !ourport->dma->tx_chan || - count < ourport->min_dma_size) + count < ourport->min_dma_size || + xmit->tail & (dma_get_cache_alignment() - 1)) s3c24xx_serial_start_tx_pio(ourport); else s3c24xx_serial_start_tx_dma(ourport, count); @@ -737,7 +729,7 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) struct uart_port *port = &ourport->port; struct circ_buf *xmit = &port->state->xmit; unsigned long flags; - int count; + int count, dma_count = 0; spin_lock_irqsave(&port->lock, flags); @@ -745,8 +737,12 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) if (ourport->dma && ourport->dma->tx_chan && count >= ourport->min_dma_size) { - s3c24xx_serial_start_tx_dma(ourport, count); - goto out; + int align = dma_get_cache_alignment() - + (xmit->tail & (dma_get_cache_alignment() - 1)); + if (count-align >= ourport->min_dma_size) { + dma_count = count-align; + count = align; + } } if (port->x_char) { @@ -767,14 +763,24 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) /* try and drain the buffer... */ - count = port->fifosize; - while (!uart_circ_empty(xmit) && count-- > 0) { + if (count > port->fifosize) { + count = port->fifosize; + dma_count = 0; + } + + while (!uart_circ_empty(xmit) && count > 0) { if (rd_regl(port, S3C2410_UFSTAT) & ourport->info->tx_fifofull) break; wr_regb(port, S3C2410_UTXH, xmit->buf[xmit->tail]); xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); port->icount.tx++; + count--; + } + + if (!count && dma_count) { + s3c24xx_serial_start_tx_dma(ourport, dma_count); + goto out; } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) { From 176688bba97e4829627a08c852ff8bf07348bffa Mon Sep 17 00:00:00 2001 From: Leonidas Da Silva Barbosa Date: Fri, 14 Aug 2015 10:14:16 -0300 Subject: [PATCH 0769/2091] crypto: vmx - Fixing GHASH Key issue on little endian commit 3c5f0ed78e976be705218cad62acf6a68e9d121e upstream. GHASH table algorithm is using a big endian key. In little endian machines key will be LE ordered. After a lxvd2x instruction key is loaded as it is, LE/BE order, in first case it'll generate a wrong table resulting in wrong hashes from the algorithm. Bug affects only LE machines. In order to fix it we do a swap for loaded key. Signed-off-by: Leonidas S Barbosa Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/vmx/ghashp8-ppc.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl index 0a6f899839ddb..d8429cb71f027 100644 --- a/drivers/crypto/vmx/ghashp8-ppc.pl +++ b/drivers/crypto/vmx/ghashp8-ppc.pl @@ -61,6 +61,12 @@ mtspr 256,r0 li r10,0x30 lvx_u $H,0,r4 # load H + le?xor r7,r7,r7 + le?addi r7,r7,0x8 # need a vperm start with 08 + le?lvsr 5,0,r7 + le?vspltisb 6,0x0f + le?vxor 5,5,6 # set a b-endian mask + le?vperm $H,$H,$H,5 vspltisb $xC2,-16 # 0xf0 vspltisb $t0,1 # one From 16d2c7bc05ebfa25feecaacb74d43ec4b3dbc28e Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 3 Sep 2015 14:32:01 +0300 Subject: [PATCH 0770/2091] crypto: ghash-clmulni: specify context size for ghash async algorithm commit 71c6da846be478a61556717ef1ee1cea91f5d6a8 upstream. Currently context size (cra_ctxsize) doesn't specified for ghash_async_alg. Which means it's zero. Thus crypto_create_tfm() doesn't allocate needed space for ghash_async_ctx, so any read/write to ctx (e.g. in ghash_async_init_tfm()) is not valid. Signed-off-by: Andrey Ryabinin Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf06bdd3..daf8d2b9a2173 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -294,6 +294,7 @@ static struct ahash_alg ghash_async_alg = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", .cra_priority = 400, + .cra_ctxsize = sizeof(struct ghash_async_ctx), .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, From 3e1ed32743b9a2eb37c90d7e66c8b416bd60ffae Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 10 Aug 2015 12:06:53 -0400 Subject: [PATCH 0771/2091] HID: usbhid: Fix the check for HID_RESET_PENDING in hid_io_error commit 3af4e5a95184d6d3c1c6a065f163faa174a96a1d upstream. It was reported that after 10-20 reboots, a usb keyboard plugged into a docking station would not work unless it was replugged in. Using usbmon, it turns out the interrupt URBs were streaming with callback errors of -71 for some reason. The hid-core.c::hid_io_error was supposed to retry and then reset, but the reset wasn't really happening. The check for HID_NO_BANDWIDTH was inverted. Fix was simple. Tested by reporter and locally by me by unplugging a keyboard halfway until I could recreate a stream of errors but no disconnect. Signed-off-by: Don Zickus Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index bfbe1bedda7f3..eab5bd6a24426 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -164,7 +164,7 @@ static void hid_io_error(struct hid_device *hid) if (time_after(jiffies, usbhid->stop_retry)) { /* Retries failed, so do a port reset unless we lack bandwidth*/ - if (test_bit(HID_NO_BANDWIDTH, &usbhid->iofl) + if (!test_bit(HID_NO_BANDWIDTH, &usbhid->iofl) && !test_and_set_bit(HID_RESET_PENDING, &usbhid->iofl)) { schedule_work(&usbhid->reset_work); From d60fc16235b744ac10a82f03aec26110eeec7b51 Mon Sep 17 00:00:00 2001 From: Ellen Wang Date: Thu, 9 Jul 2015 22:04:31 -0700 Subject: [PATCH 0772/2091] HID: cp2112: fix byte order in SMBUS operations commit 29e2d6d1f6f61ba2b5cc9d9867e01d8c31a6c4f7 upstream. Change all occurrences of be16 to le16 in cp2112_xfer(), because SMBUS words are little endian, not big endian. Signed-off-by: Ellen Wang Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-cp2112.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index a2dbbbe0d8d7e..45198baf3523b 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -537,7 +537,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, struct cp2112_device *dev = (struct cp2112_device *)adap->algo_data; struct hid_device *hdev = dev->hdev; u8 buf[64]; - __be16 word; + __le16 word; ssize_t count; size_t read_length = 0; unsigned int retries; @@ -569,7 +569,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, break; case I2C_SMBUS_WORD_DATA: read_length = 2; - word = cpu_to_be16(data->word); + word = cpu_to_le16(data->word); if (I2C_SMBUS_READ == read_write) count = cp2112_write_read_req(buf, addr, read_length, @@ -582,7 +582,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, size = I2C_SMBUS_WORD_DATA; read_write = I2C_SMBUS_READ; read_length = 2; - word = cpu_to_be16(data->word); + word = cpu_to_le16(data->word); count = cp2112_write_read_req(buf, addr, read_length, command, (u8 *)&word, 2); @@ -675,7 +675,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, data->byte = buf[0]; break; case I2C_SMBUS_WORD_DATA: - data->word = be16_to_cpup((__be16 *)buf); + data->word = le16_to_cpup((__le16 *)buf); break; case I2C_SMBUS_BLOCK_DATA: if (read_length > I2C_SMBUS_BLOCK_MAX) { From fd54f2ee1ac27477ebf78d082e6e411d82fe3766 Mon Sep 17 00:00:00 2001 From: Ellen Wang Date: Mon, 13 Jul 2015 15:23:54 -0700 Subject: [PATCH 0773/2091] HID: cp2112: fix I2C_SMBUS_BYTE write commit 6d00d153f00097d259f86304e11858a50a1b8ad1 upstream. When doing an I2C_SMBUS_BYTE write (one byte write, no address), the data to be written is in "command" not "data->byte". Signed-off-by: Ellen Wang Acked-by: Wolfram Sang Reviewed-by: Antonio Borneo Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-cp2112.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 45198baf3523b..39bf74793b8b0 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -554,7 +554,7 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, if (I2C_SMBUS_READ == read_write) count = cp2112_read_req(buf, addr, read_length); else - count = cp2112_write_req(buf, addr, data->byte, NULL, + count = cp2112_write_req(buf, addr, command, NULL, 0); break; case I2C_SMBUS_BYTE_DATA: From 8609dd0dc11cf342adc727d5f19e471ed291cf4e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 5 Aug 2015 12:04:19 +0800 Subject: [PATCH 0774/2091] KVM: MMU: fix validation of mmio page fault commit 6f691251c0350ac52a007c54bf3ef62e9d8cdc5e upstream. We got the bug that qemu complained with "KVM: unknown exit, hardware reason 31" and KVM shown these info: [84245.284948] EPT: Misconfiguration. [84245.285056] EPT: GPA: 0xfeda848 [84245.285154] ept_misconfig_inspect_spte: spte 0x5eaef50107 level 4 [84245.285344] ept_misconfig_inspect_spte: spte 0x5f5fadc107 level 3 [84245.285532] ept_misconfig_inspect_spte: spte 0x5141d18107 level 2 [84245.285723] ept_misconfig_inspect_spte: spte 0x52e40dad77 level 1 This is because we got a mmio #PF and the handler see the mmio spte becomes normal (points to the ram page) However, this is valid after introducing fast mmio spte invalidation which increases the generation-number instead of zapping mmio sptes, a example is as follows: 1. QEMU drops mmio region by adding a new memslot 2. invalidate all mmio sptes 3. VCPU 0 VCPU 1 access the invalid mmio spte access the region originally was MMIO before set the spte to the normal ram map mmio #PF check the spte and see it becomes normal ram mapping !!! This patch fixes the bug just by dropping the check in mmio handler, it's good for backport. Full check will be introduced in later patches Reported-by: Pavel Shirshov Tested-by: Pavel Shirshov Signed-off-by: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 45 --------------------------------------------- 1 file changed, 45 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b73337634214c..554e877e0bc4a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep) { return ACCESS_ONCE(*sptep); } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - /* It is valid if the spte is zapped. */ - return spte == 0ull; -} #else union split_spte { struct { @@ -478,23 +472,6 @@ static u64 __get_spte_lockless(u64 *sptep) return spte.spte; } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - union split_spte sspte = (union split_spte)spte; - u32 high_mmio_mask = shadow_mmio_mask >> 32; - - /* It is valid if the spte is zapped. */ - if (spte == 0ull) - return true; - - /* It is valid if the spte is being zapped. */ - if (sspte.spte_low == 0ull && - (sspte.spte_high & high_mmio_mask) == high_mmio_mask) - return true; - - return false; -} #endif static bool spte_is_locklessly_modifiable(u64 spte) @@ -3343,21 +3320,6 @@ static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) return vcpu_match_mmio_gva(vcpu, addr); } - -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - * - It is the mmio spte - * - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) -{ - return __check_direct_spte_mmio_pf(spte); -} - static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) { struct kvm_shadow_walk_iterator iterator; @@ -3399,13 +3361,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) return RET_MMIO_PF_EMULATE; } - /* - * It's ok if the gva is remapped by other cpus on shadow guest, - * it's a BUG if the gfn is not a mmio page. - */ - if (direct && !check_direct_spte_mmio_pf(spte)) - return RET_MMIO_PF_BUG; - /* * If the page table is zapped by other cpus, let CPU fault again on * the address. From 76c77a45a74a51dc7bdf7773d8ec37ba7cffe361 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 7 Aug 2015 17:41:20 +0530 Subject: [PATCH 0775/2091] KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set commit 06554d9f6cc8f0b5ec903db19726a15dfc7b09d6 upstream. The code that handles the case when we receive a H_DOORBELL interrupt has a comment which says "Hypervisor doorbell - exit only if host IPI flag set". However, the current code does not actually check if the host IPI flag is set. This is due to a comparison instruction that got missed. As a result, the current code performs the exit to host only if some sibling thread or a sibling sub-core is exiting to the host. This implies that, an IPI sent to a sibling core in (subcores-per-core != 1) mode will be missed by the host unless the sibling core is on the exit path to the host. This patch adds the missing comparison operation which will ensure that when HOST_IPI flag is set, we unconditionally exit to the host. Fixes: 66feed61cdf6 Signed-off-by: Gautham R. Shenoy Reviewed-by: David Gibson Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4d70df26c402c..3b2d2c5b6376c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1127,6 +1127,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 beq 4f b guest_exit_cont 3: From 73e56fdc3699efea574812d1116dfebd347ce88e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 24 Jun 2015 21:18:05 +1000 Subject: [PATCH 0776/2091] KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE commit 1e5bf454f58731e360e504253e85bae7aaa2d298 upstream. The reference (R) and change (C) bits in a HPT entry can be set by hardware at any time up until the HPTE is invalidated and the TLB invalidation sequence has completed. This means that when removing a HPTE, we need to read the HPTE after the invalidation sequence has completed in order to obtain reliable values of R and C. The code in kvmppc_do_h_remove() used to do this. However, commit 6f22bd3265fb ("KVM: PPC: Book3S HV: Make HTAB code LE host aware") removed the read after invalidation as a side effect of other changes. This restores the read of the HPTE after invalidation. The user-visible effect of this bug would be that when migrating a guest, there is a small probability that a page modified by the guest and then unmapped by the guest might not get re-transmitted and thus the destination might end up with a stale copy of the page. Fixes: 6f22bd3265fb Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index b027a89737b62..c6d601cc97640 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -421,14 +421,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - u64 pte1; - - pte1 = be64_to_cpu(hpte[1]); hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, pte1, pte_index); + rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); - /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, pte1); + /* + * The reference (R) and change (C) bits in a HPT + * entry can be set by hardware at any time up until + * the HPTE is invalidated and the TLB invalidation + * sequence has completed. This means that when + * removing a HPTE, we need to re-read the HPTE after + * the invalidation sequence has completed in order to + * obtain reliable values of R and C. + */ + remove_revmap_chain(kvm, pte_index, rev, v, + be64_to_cpu(hpte[1])); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); From 49102e956fec3806fb0c7fb7b68b0900feab8ece Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Fri, 7 Aug 2015 11:24:32 +0800 Subject: [PATCH 0777/2091] KVM: x86: Use adjustment in guest cycles when handling MSR_IA32_TSC_ADJUST commit d7add05458084a5e3d65925764a02ca9c8202c1e upstream. When kvm_set_msr_common() handles a guest's write to MSR_IA32_TSC_ADJUST, it will calcuate an adjustment based on the data written by guest and then use it to adjust TSC offset by calling a call-back adjust_tsc_offset(). The 3rd parameter of adjust_tsc_offset() indicates whether the adjustment is in host TSC cycles or in guest TSC cycles. If SVM TSC scaling is enabled, adjust_tsc_offset() [i.e. svm_adjust_tsc_offset()] will first scale the adjustment; otherwise, it will just use the unscaled one. As the MSR write here comes from the guest, the adjustment is in guest TSC cycles. However, the current kvm_set_msr_common() uses it as a value in host TSC cycles (by using true as the 3rd parameter of adjust_tsc_offset()), which can result in an incorrect adjustment of TSC offset if SVM TSC scaling is enabled. This patch fixes this problem. Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea306adbbc136..47a32f743a912 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2192,7 +2192,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; - kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); + adjust_tsc_offset_guest(vcpu, adj); } vcpu->arch.ia32_tsc_adjust_msr = data; } From 36c2895c6b132f8ff19074f6d226bcedf2614e89 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 4 Jul 2015 15:27:39 +0300 Subject: [PATCH 0778/2091] xtensa: fix threadptr reload on return to userspace commit 4229fb12a03e5da5882b420b0aa4a02e77447b86 upstream. Userspace return code may skip restoring THREADPTR register if there are no registers that need to be zeroed. This leads to spurious failures in libc NPTL tests. Always restore THREADPTR on return to userspace. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 82bbfa5a05b34..51e9877ced06a 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -568,12 +568,13 @@ user_exception_exit: * (if we have restored WSBITS-1 frames). */ +2: #if XCHAL_HAVE_THREADPTR l32i a3, a1, PT_THREADPTR wur a3, threadptr #endif -2: j common_exception_exit + j common_exception_exit /* This is the kernel exception exit. * We avoided to do a MOVSP when we entered the exception, but we From 3799248733cbdb14ca12b0680a2e776cb35e19fe Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 16 Jul 2015 10:41:02 +0300 Subject: [PATCH 0779/2091] xtensa: fix kernel register spilling commit 77d6273e79e3a86552fcf10cdd31a69b46ed2ce6 upstream. call12 can't be safely used as the first call in the inline function, because the compiler does not extend the stack frame of the bounding function accordingly, which may result in corruption of local variables. If a call needs to be done, do call8 first followed by call12. For pure assembly code in _switch_to increase stack frame size of the bounding function. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/include/asm/traps.h | 29 +++++++++++++++++++---------- arch/xtensa/kernel/entry.S | 4 ++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 677bfcf4ee5dd..28f33a8b7f5f5 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -25,30 +25,39 @@ static inline void spill_registers(void) { #if XCHAL_NUM_AREGS > 16 __asm__ __volatile__ ( - " call12 1f\n" + " call8 1f\n" " _j 2f\n" " retw\n" " .align 4\n" "1:\n" +#if XCHAL_NUM_AREGS == 32 + " _entry a1, 32\n" + " addi a8, a0, 3\n" + " _entry a1, 16\n" + " mov a12, a12\n" + " retw\n" +#else " _entry a1, 48\n" - " addi a12, a0, 3\n" -#if XCHAL_NUM_AREGS > 32 - " .rept (" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n" + " call12 1f\n" + " retw\n" + " .align 4\n" + "1:\n" + " .rept (" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n" " _entry a1, 48\n" " mov a12, a0\n" " .endr\n" -#endif - " _entry a1, 48\n" + " _entry a1, 16\n" #if XCHAL_NUM_AREGS % 12 == 0 - " mov a8, a8\n" -#elif XCHAL_NUM_AREGS % 12 == 4 " mov a12, a12\n" -#elif XCHAL_NUM_AREGS % 12 == 8 +#elif XCHAL_NUM_AREGS % 12 == 4 " mov a4, a4\n" +#elif XCHAL_NUM_AREGS % 12 == 8 + " mov a8, a8\n" #endif " retw\n" +#endif "2:\n" - : : : "a12", "a13", "memory"); + : : : "a8", "a9", "memory"); #else __asm__ __volatile__ ( " mov a12, a12\n" diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 51e9877ced06a..a2a902140c4e5 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1821,7 +1821,7 @@ ENDPROC(system_call) mov a12, a0 .endr #endif - _entry a1, 48 + _entry a1, 16 #if XCHAL_NUM_AREGS % 12 == 0 mov a8, a8 #elif XCHAL_NUM_AREGS % 12 == 4 @@ -1845,7 +1845,7 @@ ENDPROC(system_call) ENTRY(_switch_to) - entry a1, 16 + entry a1, 48 mov a11, a3 # and 'next' (a3) From f1d35b3d905cc564fa3fb89a3d802dd90b8ef8a8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 15 Jul 2015 10:29:00 +0900 Subject: [PATCH 0780/2091] devres: fix devres_get() commit 64526370d11ce8868ca495723d595b61e8697fbf upstream. Currently, devres_get() passes devres_free() the pointer to devres, but devres_free() should be given with the pointer to resource data. Fixes: 9ac7849e35f7 ("devres: device resource management") Signed-off-by: Masahiro Yamada Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/base/devres.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index c8a53d1e019fb..8754646901174 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -297,10 +297,10 @@ void * devres_get(struct device *dev, void *new_res, if (!dr) { add_dr(dev, &new_dr->node); dr = new_dr; - new_dr = NULL; + new_res = NULL; } spin_unlock_irqrestore(&dev->devres_lock, flags); - devres_free(new_dr); + devres_free(new_res); return dr->data; } From d92f2a2a9b0afc5dd75679b63d0f682d75865c07 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 31 Jul 2015 16:36:28 +0800 Subject: [PATCH 0781/2091] Doc: ABI: testing: configfs-usb-gadget-loopback commit 8cd50626823c00ca7472b2f61cb8c0eb9798ddc0 upstream. Fix the name of attribute Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/configfs-usb-gadget-loopback | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/configfs-usb-gadget-loopback b/Documentation/ABI/testing/configfs-usb-gadget-loopback index 9aae5bfb99088..06beefbcf061a 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-loopback +++ b/Documentation/ABI/testing/configfs-usb-gadget-loopback @@ -5,4 +5,4 @@ Description: The attributes: qlen - depth of loopback queue - bulk_buflen - buffer length + buflen - buffer length From 7d72c3751d555482a6729f99d4a41cc71660db1f Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 31 Jul 2015 16:36:29 +0800 Subject: [PATCH 0782/2091] Doc: ABI: testing: configfs-usb-gadget-sourcesink commit 4bc58eb16bb2352854b9c664cc36c1c68d2bfbb7 upstream. Fix the name of attribute Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/configfs-usb-gadget-sourcesink | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink index 29477c319f61b..bc7ff731aa0cf 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink +++ b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink @@ -9,4 +9,4 @@ Description: isoc_maxpacket - 0 - 1023 (fs), 0 - 1024 (hs/ss) isoc_mult - 0..2 (hs/ss only) isoc_maxburst - 0..15 (ss only) - qlen - buffer length + buflen - buffer length From 7d3cb0eecda8d19007f0004a3af071ed1bc997b7 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Thu, 13 Aug 2015 16:09:28 +0200 Subject: [PATCH 0783/2091] spi/spi-xilinx: Fix spurious IRQ ACK on irq mode commit 74346841e6f5df5f7b83d5904435d273c507dba6 upstream. The ACK of an inexistent IRQ can trigger an spurious IRQ that breaks the txrx logic. This has been observed on axi_quad_spi:3.2 core. This patch only ACKs IRQs that have not been Acknowledge jet. Reported-by: Edward Kigwana Tested-by: Edward Kigwana Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-xilinx.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index 133f53a9c1d4e..8a81a692f764d 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -249,19 +249,23 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) xspi->tx_ptr = t->tx_buf; xspi->rx_ptr = t->rx_buf; remaining_words = t->len / xspi->bytes_per_word; - reinit_completion(&xspi->done); if (xspi->irq >= 0 && remaining_words > xspi->buffer_size) { + u32 isr; use_irq = true; - xspi->write_fn(XSPI_INTR_TX_EMPTY, - xspi->regs + XIPIF_V123B_IISR_OFFSET); - /* Enable the global IPIF interrupt */ - xspi->write_fn(XIPIF_V123B_GINTR_ENABLE, - xspi->regs + XIPIF_V123B_DGIER_OFFSET); /* Inhibit irq to avoid spurious irqs on tx_empty*/ cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET); xspi->write_fn(cr | XSPI_CR_TRANS_INHIBIT, xspi->regs + XSPI_CR_OFFSET); + /* ACK old irqs (if any) */ + isr = xspi->read_fn(xspi->regs + XIPIF_V123B_IISR_OFFSET); + if (isr) + xspi->write_fn(isr, + xspi->regs + XIPIF_V123B_IISR_OFFSET); + /* Enable the global IPIF interrupt */ + xspi->write_fn(XIPIF_V123B_GINTR_ENABLE, + xspi->regs + XIPIF_V123B_DGIER_OFFSET); + reinit_completion(&xspi->done); } while (remaining_words) { From 7aa3b3b1d7838a75ad91fce91f187d26aacaa682 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Wed, 12 Aug 2015 18:04:04 +0200 Subject: [PATCH 0784/2091] spi/spi-xilinx: Fix mixed poll/irq mode commit 16ea9b8ac45bf11d48af6013283e141e8ed86348 upstream. Once the module process a transfer in irq mode, the next poll transfer will not work because the transmitter is left in inhibited state. Fixes: 22417352f6b7f623 (Use polling mode on small transfers) Reported-by: Edward Kigwana Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-xilinx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index 8a81a692f764d..a339c1e9997a9 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -306,8 +306,10 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) remaining_words -= n_words; } - if (use_irq) + if (use_irq) { xspi->write_fn(0, xspi->regs + XIPIF_V123B_DGIER_OFFSET); + xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET); + } return t->len; } From 1042a41280224603cc6e6c282d4811bcfd79a251 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 20 Jul 2015 17:27:21 +0530 Subject: [PATCH 0785/2091] auxdisplay: ks0108: fix refcount commit bab383de3b84e584b0f09227151020b2a43dc34c upstream. parport_find_base() will implicitly do parport_get_port() which increases the refcount. Then parport_register_device() will again increment the refcount. But while unloading the module we are only doing parport_unregister_device() decrementing the refcount only once. We add an parport_put_port() to neutralize the effect of parport_get_port(). Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/ks0108.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c index 5b93852392b8c..0d752851a1eef 100644 --- a/drivers/auxdisplay/ks0108.c +++ b/drivers/auxdisplay/ks0108.c @@ -139,6 +139,7 @@ static int __init ks0108_init(void) ks0108_pardevice = parport_register_device(ks0108_parport, KS0108_NAME, NULL, NULL, NULL, PARPORT_DEV_EXCL, NULL); + parport_put_port(ks0108_parport); if (ks0108_pardevice == NULL) { printk(KERN_ERR KS0108_NAME ": ERROR: " "parport didn't register new device\n"); From 0fd96604b85be283a3bef4939fdd70d30a232bbc Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Jul 2015 16:54:10 +0530 Subject: [PATCH 0786/2091] regulator: pbias: Fix broken pbias disable functionality commit c329061be51bef655f28c9296093984c977aff85 upstream. regulator_disable of pbias always writes '0' to the enable_reg. However actual disable value of pbias regulator is not always '0'. Fix it by populating the disable_val in pbias_reg_info for the various platforms and assign it to the disable_val of pbias regulator descriptor. This will be used by regulator_disable_regmap while disabling pbias regulator. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/pbias-regulator.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/regulator/pbias-regulator.c b/drivers/regulator/pbias-regulator.c index bd2b75c0d1d12..4fa7bcaf454e8 100644 --- a/drivers/regulator/pbias-regulator.c +++ b/drivers/regulator/pbias-regulator.c @@ -30,6 +30,7 @@ struct pbias_reg_info { u32 enable; u32 enable_mask; + u32 disable_val; u32 vmode; unsigned int enable_time; char *name; @@ -62,6 +63,7 @@ static const struct pbias_reg_info pbias_mmc_omap2430 = { .enable = BIT(1), .enable_mask = BIT(1), .vmode = BIT(0), + .disable_val = 0, .enable_time = 100, .name = "pbias_mmc_omap2430" }; @@ -77,6 +79,7 @@ static const struct pbias_reg_info pbias_sim_omap3 = { static const struct pbias_reg_info pbias_mmc_omap4 = { .enable = BIT(26) | BIT(22), .enable_mask = BIT(26) | BIT(25) | BIT(22), + .disable_val = BIT(25), .vmode = BIT(21), .enable_time = 100, .name = "pbias_mmc_omap4" @@ -85,6 +88,7 @@ static const struct pbias_reg_info pbias_mmc_omap4 = { static const struct pbias_reg_info pbias_mmc_omap5 = { .enable = BIT(27) | BIT(26), .enable_mask = BIT(27) | BIT(25) | BIT(26), + .disable_val = BIT(25), .vmode = BIT(21), .enable_time = 100, .name = "pbias_mmc_omap5" @@ -159,6 +163,7 @@ static int pbias_regulator_probe(struct platform_device *pdev) drvdata[data_idx].desc.enable_reg = res->start; drvdata[data_idx].desc.enable_mask = info->enable_mask; drvdata[data_idx].desc.enable_val = info->enable; + drvdata[data_idx].desc.disable_val = info->disable_val; cfg.init_data = pbias_matches[idx].init_data; cfg.driver_data = &drvdata[data_idx]; From e4b09a61e9b1eb200f300cdc21d146dbdc4014ca Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Wed, 12 Aug 2015 18:29:41 +0200 Subject: [PATCH 0787/2091] x86/mce: Reenable CMCI banks when swiching back to interrupt mode commit 1b48465500611a2dc5e75800c61ac352e22d41c3 upstream. Zhang Liguang reported the following issue: 1) System detects a CMCI storm on the current CPU. 2) Kernel disables the CMCI interrupt on banks owned by the current CPU and switches to poll mode 3) After the CMCI storm subsides, kernel switches back to interrupt mode 4) We expect the system to reenable the CMCI interrupt on banks owned by the current CPU mce_intel_adjust_timer |-> cmci_reenable |-> cmci_discover # owned banks are ignored here static void cmci_discover(int banks) ... for (i = 0; i < banks; i++) { ... if (test_bit(i, owned)) # ownd banks is ignore here continue; So convert cmci_storm_disable_banks() to cmci_toggle_interrupt_mode() which controls whether to enable or disable CMCI interrupts with its argument. NB: We cannot clear the owned bit because the banks won't be polled, otherwise. See: 27f6c573e0f7 ("x86, CMCI: Add proper detection of end of CMCI storms") for more info. Reported-by: Zhang Liguang Signed-off-by: Xie XiuQi Signed-off-by: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: huawei.libin@huawei.com Cc: linux-edac Cc: rui.xiang@huawei.com Link: http://lkml.kernel.org/r/1439396985-12812-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 41 +++++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b4a41cf030eda..e166d833cf63b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -116,6 +116,27 @@ void mce_intel_hcpu_update(unsigned long cpu) per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; } +static void cmci_toggle_interrupt_mode(bool on) +{ + unsigned long flags, *owned; + int bank; + u64 val; + + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + owned = this_cpu_ptr(mce_banks_owned); + for_each_set_bit(bank, owned, MAX_NR_BANKS) { + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + + if (on) + val |= MCI_CTL2_CMCI_EN; + else + val &= ~MCI_CTL2_CMCI_EN; + + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + } + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); +} + unsigned long cmci_intel_adjust_timer(unsigned long interval) { if ((this_cpu_read(cmci_backoff_cnt) > 0) && @@ -145,7 +166,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) */ if (!atomic_read(&cmci_storm_on_cpus)) { __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); - cmci_reenable(); + cmci_toggle_interrupt_mode(true); cmci_recheck(); } return CMCI_POLL_INTERVAL; @@ -156,22 +177,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) } } -static void cmci_storm_disable_banks(void) -{ - unsigned long flags, *owned; - int bank; - u64 val; - - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = this_cpu_ptr(mce_banks_owned); - for_each_set_bit(bank, owned, MAX_NR_BANKS) { - rdmsrl(MSR_IA32_MCx_CTL2(bank), val); - val &= ~MCI_CTL2_CMCI_EN; - wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); -} - static bool cmci_storm_detect(void) { unsigned int cnt = __this_cpu_read(cmci_storm_cnt); @@ -193,7 +198,7 @@ static bool cmci_storm_detect(void) if (cnt <= CMCI_STORM_THRESHOLD) return false; - cmci_storm_disable_banks(); + cmci_toggle_interrupt_mode(false); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); r = atomic_add_return(1, &cmci_storm_on_cpus); mce_timer_kick(CMCI_STORM_INTERVAL); From 72cdf324731e08faebd16a6a281872f6f7d8fcb2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Jul 2015 09:59:55 +0200 Subject: [PATCH 0788/2091] soc/tegra: pmc: Avoid usage of uninitialized variable commit 95169cd23bfa88003f8be06234dbd65f5737add0 upstream. Make sure to only drop the reference to the OF node after it's been successfully obtained. Fixes: 3568df3d31d6 ("soc: tegra: Add thermal reset (thermtrip) support to PMC") Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/soc/tegra/pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index c956395cf46f9..c89bada875f85 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -732,12 +732,12 @@ void tegra_pmc_init_tsense_reset(struct tegra_pmc *pmc) u32 value, checksum; if (!pmc->soc->has_tsense_reset) - goto out; + return; np = of_find_node_by_name(pmc->dev->of_node, "i2c-thermtrip"); if (!np) { dev_warn(dev, "i2c-thermtrip node not found, %s.\n", disabled); - goto out; + return; } if (of_property_read_u32(np, "nvidia,i2c-controller-id", &ctrl_id)) { From 86bd6436f2d4df75952820010af611dcf85d17f9 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 19 Aug 2015 13:17:47 -0700 Subject: [PATCH 0789/2091] of/address: Don't loop forever in of_find_matching_node_by_address(). commit 3a496b00b6f90c41bd21a410871dfc97d4f3c7ab upstream. If the internal call to of_address_to_resource() fails, we end up looping forever in of_find_matching_node_by_address(). This can be caused by a defective device tree, or calling with an incorrect matches argument. Fix by calling of_find_matching_node() unconditionally at the end of the loop. Signed-off-by: David Daney Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 8bfda6ade2c02..384574c3987c3 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -845,10 +845,10 @@ struct device_node *of_find_matching_node_by_address(struct device_node *from, struct resource res; while (dn) { - if (of_address_to_resource(dn, 0, &res)) - continue; - if (res.start == base_address) + if (!of_address_to_resource(dn, 0, &res) && + res.start == base_address) return dn; + dn = of_find_matching_node(dn, matches); } From 25f5225e7c06316ed7d5866bb95ea0acd3de7921 Mon Sep 17 00:00:00 2001 From: Benjamin Cama Date: Tue, 14 Jul 2015 16:25:58 +0200 Subject: [PATCH 0790/2091] ARM: orion5x: fix legacy orion5x IRQ numbers commit 5be9fc23cdb42e1d383ecc8eae8a8ff70a752708 upstream. Since v3.18, attempts to deliver IRQ0 are rejected, breaking orion5x. Fix this by increasing all interrupts by one, as did 5d6bed2a9c8b for dove. Also, force MULTI_IRQ_HANDLER for all orion platforms (including dove) as the specific handler is needed to shift back IRQ numbers by one. [gregory.clement@free-electrons.com]: moved the select MULTI_IRQ_HANDLER from PLAT_ORION_LEGACY to ARCH_ORION5X as it broke the build for dove. Fixes: a71b092a9c68 ("ARM: Convert handle_IRQ to use __handle_domain_irq") Signed-off-by: Benjamin Cama Signed-off-by: Gregory CLEMENT Tested-by: Detlef Vollmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 1 + arch/arm/mach-orion5x/include/mach/irqs.h | 64 +++++++++++------------ arch/arm/mach-orion5x/irq.c | 4 +- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b128..19f4cc634b0e8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -538,6 +538,7 @@ config ARCH_ORION5X select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY + select MULTI_IRQ_HANDLER help Support for the following Marvell Orion 5x series SoCs: Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182), diff --git a/arch/arm/mach-orion5x/include/mach/irqs.h b/arch/arm/mach-orion5x/include/mach/irqs.h index a6fa9d8f12d88..2431d99234273 100644 --- a/arch/arm/mach-orion5x/include/mach/irqs.h +++ b/arch/arm/mach-orion5x/include/mach/irqs.h @@ -16,42 +16,42 @@ /* * Orion Main Interrupt Controller */ -#define IRQ_ORION5X_BRIDGE 0 -#define IRQ_ORION5X_DOORBELL_H2C 1 -#define IRQ_ORION5X_DOORBELL_C2H 2 -#define IRQ_ORION5X_UART0 3 -#define IRQ_ORION5X_UART1 4 -#define IRQ_ORION5X_I2C 5 -#define IRQ_ORION5X_GPIO_0_7 6 -#define IRQ_ORION5X_GPIO_8_15 7 -#define IRQ_ORION5X_GPIO_16_23 8 -#define IRQ_ORION5X_GPIO_24_31 9 -#define IRQ_ORION5X_PCIE0_ERR 10 -#define IRQ_ORION5X_PCIE0_INT 11 -#define IRQ_ORION5X_USB1_CTRL 12 -#define IRQ_ORION5X_DEV_BUS_ERR 14 -#define IRQ_ORION5X_PCI_ERR 15 -#define IRQ_ORION5X_USB_BR_ERR 16 -#define IRQ_ORION5X_USB0_CTRL 17 -#define IRQ_ORION5X_ETH_RX 18 -#define IRQ_ORION5X_ETH_TX 19 -#define IRQ_ORION5X_ETH_MISC 20 -#define IRQ_ORION5X_ETH_SUM 21 -#define IRQ_ORION5X_ETH_ERR 22 -#define IRQ_ORION5X_IDMA_ERR 23 -#define IRQ_ORION5X_IDMA_0 24 -#define IRQ_ORION5X_IDMA_1 25 -#define IRQ_ORION5X_IDMA_2 26 -#define IRQ_ORION5X_IDMA_3 27 -#define IRQ_ORION5X_CESA 28 -#define IRQ_ORION5X_SATA 29 -#define IRQ_ORION5X_XOR0 30 -#define IRQ_ORION5X_XOR1 31 +#define IRQ_ORION5X_BRIDGE (1 + 0) +#define IRQ_ORION5X_DOORBELL_H2C (1 + 1) +#define IRQ_ORION5X_DOORBELL_C2H (1 + 2) +#define IRQ_ORION5X_UART0 (1 + 3) +#define IRQ_ORION5X_UART1 (1 + 4) +#define IRQ_ORION5X_I2C (1 + 5) +#define IRQ_ORION5X_GPIO_0_7 (1 + 6) +#define IRQ_ORION5X_GPIO_8_15 (1 + 7) +#define IRQ_ORION5X_GPIO_16_23 (1 + 8) +#define IRQ_ORION5X_GPIO_24_31 (1 + 9) +#define IRQ_ORION5X_PCIE0_ERR (1 + 10) +#define IRQ_ORION5X_PCIE0_INT (1 + 11) +#define IRQ_ORION5X_USB1_CTRL (1 + 12) +#define IRQ_ORION5X_DEV_BUS_ERR (1 + 14) +#define IRQ_ORION5X_PCI_ERR (1 + 15) +#define IRQ_ORION5X_USB_BR_ERR (1 + 16) +#define IRQ_ORION5X_USB0_CTRL (1 + 17) +#define IRQ_ORION5X_ETH_RX (1 + 18) +#define IRQ_ORION5X_ETH_TX (1 + 19) +#define IRQ_ORION5X_ETH_MISC (1 + 20) +#define IRQ_ORION5X_ETH_SUM (1 + 21) +#define IRQ_ORION5X_ETH_ERR (1 + 22) +#define IRQ_ORION5X_IDMA_ERR (1 + 23) +#define IRQ_ORION5X_IDMA_0 (1 + 24) +#define IRQ_ORION5X_IDMA_1 (1 + 25) +#define IRQ_ORION5X_IDMA_2 (1 + 26) +#define IRQ_ORION5X_IDMA_3 (1 + 27) +#define IRQ_ORION5X_CESA (1 + 28) +#define IRQ_ORION5X_SATA (1 + 29) +#define IRQ_ORION5X_XOR0 (1 + 30) +#define IRQ_ORION5X_XOR1 (1 + 31) /* * Orion General Purpose Pins */ -#define IRQ_ORION5X_GPIO_START 32 +#define IRQ_ORION5X_GPIO_START 33 #define NR_GPIO_IRQS 32 #define NR_IRQS (IRQ_ORION5X_GPIO_START + NR_GPIO_IRQS) diff --git a/arch/arm/mach-orion5x/irq.c b/arch/arm/mach-orion5x/irq.c index cd4bac4d7e43f..086ecb87d8858 100644 --- a/arch/arm/mach-orion5x/irq.c +++ b/arch/arm/mach-orion5x/irq.c @@ -42,7 +42,7 @@ __exception_irq_entry orion5x_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(MAIN_IRQ_CAUSE); stat &= readl_relaxed(MAIN_IRQ_MASK); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -51,7 +51,7 @@ __exception_irq_entry orion5x_legacy_handle_irq(struct pt_regs *regs) void __init orion5x_init_irq(void) { - orion_irq_init(0, MAIN_IRQ_MASK); + orion_irq_init(1, MAIN_IRQ_MASK); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(orion5x_legacy_handle_irq); From f7da4ad880129c40624b22e009ef94891ac8d32e Mon Sep 17 00:00:00 2001 From: Hyungwon Hwang Date: Mon, 15 Jun 2015 13:03:17 +0900 Subject: [PATCH 0791/2091] ARM: dts: fix clock-frequency of display timing0 for exynos3250-rinato commit 65e3293381e1cf1abcfe1aa22b914650a40e3af4 upstream. After the commit abc0b1447d49 ("drm: Perform basic sanity checks on probed modes"), proper clock-frequency becomes mandatory for validating the mode of panel. The display does not work if there is no mode validated. Also, this clock-frequency must be set appropriately for getting required frame rate. Fixes: abc0b1447d49 ("drm: Perform basic sanity checks on probed modes") Signed-off-by: Hyungwon Hwang Signed-off-by: Krzysztof Kozlowski Sigend-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos3250-rinato.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts index 0b9906880c0c7..75aba40c69e13 100644 --- a/arch/arm/boot/dts/exynos3250-rinato.dts +++ b/arch/arm/boot/dts/exynos3250-rinato.dts @@ -181,7 +181,7 @@ display-timings { timing-0 { - clock-frequency = <0>; + clock-frequency = <4600000>; hactive = <320>; vactive = <320>; hfront-porch = <1>; From 23df80191134ee02b5575436c7f1d366b2795a15 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 3 Jun 2015 17:21:20 +0530 Subject: [PATCH 0792/2091] ARM: OMAP2+: DRA7: clockdomain: change l4per2_7xx_clkdm to SW_WKUP commit b9e23f321940d2db2c9def8ff723b8464fb86343 upstream. Legacy IPs like PWMSS, present under l4per2_7xx_clkdm, cannot support smart-idle when its clock domain is in HW_AUTO on DRA7 SoCs. Hence, program clock domain to SW_WKUP. Signed-off-by: Vignesh R Acked-by: Tero Kristo Reviewed-by: Paul Walmsley Signed-off-by: Paul Walmsley Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/clockdomains7xx_data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/clockdomains7xx_data.c b/arch/arm/mach-omap2/clockdomains7xx_data.c index 57d5df0c1fbd0..7581e036bda62 100644 --- a/arch/arm/mach-omap2/clockdomains7xx_data.c +++ b/arch/arm/mach-omap2/clockdomains7xx_data.c @@ -331,7 +331,7 @@ static struct clockdomain l4per2_7xx_clkdm = { .dep_bit = DRA7XX_L4PER2_STATDEP_SHIFT, .wkdep_srcs = l4per2_wkup_sleep_deps, .sleepdep_srcs = l4per2_wkup_sleep_deps, - .flags = CLKDM_CAN_HWSUP_SWSUP, + .flags = CLKDM_CAN_SWSUP, }; static struct clockdomain mpu0_7xx_clkdm = { From 322ce3c54460446faa3db9db50f0e801c631625e Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Tue, 9 Jun 2015 17:49:57 +0800 Subject: [PATCH 0793/2091] ARM: rockchip: fix the CPU soft reset commit fe4407c0dc58215a7abfb7532740d79ddabe7a7a upstream. We need different orderings when turning a core on and turning a core off. In one case we need to assert reset before turning power off. In ther other case we need to turn power on and the deassert reset. In general, the correct flow is: CPU off: reset_control_assert regmap_update_bits(pmu, PMU_PWRDN_CON, BIT(pd), BIT(pd)) wait_for_power_domain_to_turn_off CPU on: regmap_update_bits(pmu, PMU_PWRDN_CON, BIT(pd), 0) wait_for_power_domain_to_turn_on reset_control_deassert This is needed for stressing CPU up/down, as per: cd /sys/devices/system/cpu/ for i in $(seq 10000); do echo "================= $i ============" for j in $(seq 100); do while [[ "$(cat cpu1/online)$(cat cpu2/online)$(cat cpu3/online)" != "000"" ]] echo 0 > cpu1/online echo 0 > cpu2/online echo 0 > cpu3/online done while [[ "$(cat cpu1/online)$(cat cpu2/online)$(cat cpu3/online)" != "111" ]]; do echo 1 > cpu1/online echo 1 > cpu2/online echo 1 > cpu3/online done done done The following is reproducable log: [34466.186812] PM: noirq suspend of devices complete after 0.669 msecs [34466.186824] Disabling non-boot CPUs ... [34466.187509] CPU1: shutdown [34466.188672] CPU2: shutdown [34473.736627] Kernel panic - not syncing:Watchdog detected hard LOCKUP on cpu 0 ....... or others similar log: ....... [ 4072.454453] CPU1: shutdown [ 4072.504436] CPU2: shutdown [ 4072.554426] CPU3: shutdown [ 4072.577827] CPU1: Booted secondary processor [ 4072.582611] CPU2: Booted secondary processor Tested by cpu up/down scripts, the results told us need delay more time before write the sram. The wait time is affected by many aspects (e.g: cpu frequency, bootrom frequency, sram frequency, bus speed, ...). Although the cpus other than cpu0 will write the sram, the speedy is no the same as cpu0, if the cpu0 early wake up, perhaps the other cpus can't startup. As we know, the cpu0 can wake up when the cpu1/2/3 write the 'sram+4/8' and send the sev. Anyway..... At the moment, 1ms delay will be happy work for cpu up/down scripts test. Signed-off-by: Caesar Wang Reviewed-by: Doug Anderson Reviewed-by: Kever Yang Fixes: 3ee851e212d0 ("ARM: rockchip: add basic smp support for rk3288") Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-rockchip/platsmp.c | 40 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 2e6ab67e22844..58ca50104a14e 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -72,29 +72,22 @@ static struct reset_control *rockchip_get_core_reset(int cpu) static int pmu_set_power_domain(int pd, bool on) { u32 val = (on) ? 0 : BIT(pd); + struct reset_control *rstc = rockchip_get_core_reset(pd); int ret; + if (IS_ERR(rstc) && read_cpuid_part() != ARM_CPU_PART_CORTEX_A9) { + pr_err("%s: could not get reset control for core %d\n", + __func__, pd); + return PTR_ERR(rstc); + } + /* * We need to soft reset the cpu when we turn off the cpu power domain, * or else the active processors might be stalled when the individual * processor is powered down. */ - if (read_cpuid_part() != ARM_CPU_PART_CORTEX_A9) { - struct reset_control *rstc = rockchip_get_core_reset(pd); - - if (IS_ERR(rstc)) { - pr_err("%s: could not get reset control for core %d\n", - __func__, pd); - return PTR_ERR(rstc); - } - - if (on) - reset_control_deassert(rstc); - else - reset_control_assert(rstc); - - reset_control_put(rstc); - } + if (!IS_ERR(rstc) && !on) + reset_control_assert(rstc); ret = regmap_update_bits(pmu, PMU_PWRDN_CON, BIT(pd), val); if (ret < 0) { @@ -112,6 +105,12 @@ static int pmu_set_power_domain(int pd, bool on) } } + if (!IS_ERR(rstc)) { + if (on) + reset_control_deassert(rstc); + reset_control_put(rstc); + } + return 0; } @@ -147,9 +146,14 @@ static int __cpuinit rockchip_boot_secondary(unsigned int cpu, * the mailbox: * sram_base_addr + 4: 0xdeadbeaf * sram_base_addr + 8: start address for pc + * The cpu0 need to wait the other cpus other than cpu0 entering + * the wfe state.The wait time is affected by many aspects. + * (e.g: cpu frequency, bootrom frequency, sram frequency, ...) * */ - udelay(10); - writel(virt_to_phys(secondary_startup), sram_base_addr + 8); + mdelay(1); /* ensure the cpus other than cpu0 to startup */ + + writel(virt_to_phys(rockchip_secondary_startup), + sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } From 33a0064c659d8fbbe6811aaddd0274e0e68b5e92 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 19 Jun 2015 16:31:14 +0200 Subject: [PATCH 0794/2091] ARM: dts: rockchip: fix rk3288 watchdog irq commit 1a1b698b115467242303daf5fe1d3c9886c2fa17 upstream. The watchdog irq is actually SPI 79, which translates to the original 111 in the manual where the SPI irqs start at 32. The current dw_wdt driver does not use the irq at all, so this issue never surfaced. Nevertheless fix this for a time we want to use the irq. Fixes: 2ab557b72d46 ("ARM: dts: rockchip: add core rk3288 dtsi") Signed-off-by: Heiko Stuebner Reviewed-by: Douglas Anderson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/rk3288.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 165968d51d8fd..8eca5878a8774 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -584,7 +584,7 @@ compatible = "rockchip,rk3288-wdt", "snps,dw-wdt"; reg = <0xff800000 0x100>; clocks = <&cru PCLK_WDT>; - interrupts = ; + interrupts = ; status = "disabled"; }; From f4e4aaa5b1c9ad0ce9370444f566b972fb988349 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 21 Aug 2015 15:36:23 +0800 Subject: [PATCH 0795/2091] ACPI, PCI: Penalize legacy IRQ used by ACPI SCI commit 5d0ddfebb93069061880fc57ee4ba7246bd1e1ee upstream. Nick Meier reported a regression with HyperV that " After rebooting the VM, the following messages are logged in syslog when trying to load the tulip driver: tulip: Linux Tulip drivers version 1.1.15 (Feb 27, 2007) tulip: 0000:00:0a.0: PCI INT A: failed to register GSI tulip: Cannot enable tulip board #0, aborting tulip: probe of 0000:00:0a.0 failed with error -16 Errors occur in 3.19.0 kernel Works in 3.17 kernel. " According to the ACPI dump file posted by Nick at https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1440072 The ACPI MADT table includes an interrupt source overridden entry for ACPI SCI: [236h 0566 1] Subtable Type : 02 [237h 0567 1] Length : 0A [238h 0568 1] Bus : 00 [239h 0569 1] Source : 09 [23Ah 0570 4] Interrupt : 00000009 [23Eh 0574 2] Flags (decoded below) : 000D Polarity : 1 Trigger Mode : 3 And in DSDT table, we have _PRT method to define PCI interrupts, which eventually goes to: Name (PRSA, ResourceTemplate () { IRQ (Level, ActiveLow, Shared, ) {3,4,5,7,9,10,11,12,14,15} }) Name (PRSB, ResourceTemplate () { IRQ (Level, ActiveLow, Shared, ) {3,4,5,7,9,10,11,12,14,15} }) Name (PRSC, ResourceTemplate () { IRQ (Level, ActiveLow, Shared, ) {3,4,5,7,9,10,11,12,14,15} }) Name (PRSD, ResourceTemplate () { IRQ (Level, ActiveLow, Shared, ) {3,4,5,7,9,10,11,12,14,15} }) According to the MADT and DSDT tables, IRQ 9 may be used for: 1) ACPI SCI in level, high mode 2) PCI legacy IRQ in level, low mode So there's a conflict in polarity setting for IRQ 9. Prior to commit cd68f6bd53cf ("x86, irq, acpi: Get rid of special handling of GSI for ACPI SCI"), ACPI SCI is handled specially and there's no check for conflicts between ACPI SCI and PCI legagy IRQ. And it seems that the HyperV hypervisor doesn't make use of the polarity configuration in IOAPIC entry, so it just works. Commit cd68f6bd53cf gets rid of the specially handling of ACPI SCI, and then the pin attribute checking code discloses the conflicts between ACPI SCI and PCI legacy IRQ on HyperV virtual machine, and rejects the request to assign IRQ9 to PCI devices. So penalize legacy IRQ used by ACPI SCI and mark it unusable if ACPI SCI attributes conflict with PCI IRQ attributes. Please refer to following links for more information: https://bugzilla.kernel.org/show_bug.cgi?id=101301 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1440072 Fixes: cd68f6bd53cf ("x86, irq, acpi: Get rid of special handling of GSI for ACPI SCI") Reported-and-tested-by: Nick Meier Acked-by: Thomas Gleixner Signed-off-by: Jiang Liu Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/boot.c | 1 + drivers/acpi/pci_link.c | 16 ++++++++++++++++ include/linux/acpi.h | 2 +- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dbe76a14c3c99..07bea80223f69 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -489,6 +489,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index cfd7581cc19fa..b09ad554430ac 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -825,6 +825,22 @@ void acpi_penalize_isa_irq(int irq, int active) } } +/* + * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with + * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for + * PCI IRQs. + */ +void acpi_penalize_sci_irq(int irq, int trigger, int polarity) +{ + if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) { + if (trigger != ACPI_MADT_TRIGGER_LEVEL || + polarity != ACPI_MADT_POLARITY_ACTIVE_LOW) + acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS; + else + acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING; + } +} + /* * Over-ride default table to reserve additional IRQs for use by ISA * e.g. acpi_irq_isa=5 diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4550be3bb63b5..808c43afa8ac9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -198,7 +198,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); - +void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); From ad2fee661f32e3f5399c116453d50867df3a6042 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sun, 7 Jun 2015 15:20:11 +0100 Subject: [PATCH 0796/2091] drivercore: Fix unregistration path of platform devices commit 7f5dcaf1fdf289767a126a0a5cc3ef39b5254b06 upstream. The unregister path of platform_device is broken. On registration, it will register all resources with either a parent already set, or type==IORESOURCE_{IO,MEM}. However, on unregister it will release everything with type==IORESOURCE_{IO,MEM}, but ignore the others. There are also cases where resources don't get registered in the first place, like with devices created by of_platform_populate()*. Fix the unregister path to be symmetrical with the register path by checking the parent pointer instead of the type field to decide which resources to unregister. This is safe because the upshot of the registration path algorithm is that registered resources have a parent pointer, and non-registered resources do not. * It can be argued that of_platform_populate() should be registering it's resources, and they argument has some merit. However, there are quite a few platforms that end up broken if we try to do that due to overlapping resources in the device tree. Until that is fixed, we need to solve the immediate problem. Cc: Pantelis Antoniou Cc: Wolfram Sang Cc: Rob Herring Cc: Greg Kroah-Hartman Cc: Ricardo Ribalda Delgado Signed-off-by: Grant Likely Tested-by: Ricardo Ribalda Delgado Tested-by: Wolfram Sang Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ebf034b972785..7403de94832c0 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -375,9 +375,7 @@ int platform_device_add(struct platform_device *pdev) while (--i >= 0) { struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if (type == IORESOURCE_MEM || type == IORESOURCE_IO) + if (r->parent) release_resource(r); } @@ -408,9 +406,7 @@ void platform_device_del(struct platform_device *pdev) for (i = 0; i < pdev->num_resources; i++) { struct resource *r = &pdev->resource[i]; - unsigned long type = resource_type(r); - - if (type == IORESOURCE_MEM || type == IORESOURCE_IO) + if (r->parent) release_resource(r); } } From 16e327d319bf3cbdd507693ed18792cda3af3324 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 12 Aug 2015 15:00:12 -0500 Subject: [PATCH 0797/2091] fs: Set the size of empty dirs to 0. commit 4b75de8615050c1b0dd8d7794838c42f74ed36ba upstream. Before the make_empty_dir_inode calls were introduce into proc, sysfs, and sysctl those directories when stated reported an i_size of 0. make_empty_dir_inode started reporting an i_size of 2. At least one userspace application depended on stat returning i_size of 0. So modify make_empty_dir_inode to cause an i_size of 0 to be reported for these directories. Reported-by: Tejun Heo Acked-by: Tejun Heo Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/libfs.c b/fs/libfs.c index 02813592e1216..f4641fd27bda8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1176,7 +1176,7 @@ void make_empty_dir_inode(struct inode *inode) inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_rdev = 0; - inode->i_size = 2; + inode->i_size = 0; inode->i_blkbits = PAGE_SHIFT; inode->i_blocks = 0; From 213abaf9525398c7dd34012b29274261dd8cd797 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Sep 2015 22:51:53 +0200 Subject: [PATCH 0798/2091] hpfs: update ctime and mtime on directory modification commit f49a26e7718dd30b49e3541e3e25aecf5e7294e2 upstream. Update ctime and mtime when a directory is modified. (though OS/2 doesn't update them anyway) Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hpfs/namei.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index a0872f239f04f..9e92c9c2d3195 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -8,6 +8,17 @@ #include #include "hpfs_fn.h" +static void hpfs_update_directory_times(struct inode *dir) +{ + time_t t = get_seconds(); + if (t == dir->i_mtime.tv_sec && + t == dir->i_ctime.tv_sec) + return; + dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t; + dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0; + hpfs_write_inode_nolock(dir); +} + static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { const unsigned char *name = dentry->d_name.name; @@ -99,6 +110,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) result->i_mode = mode | S_IFDIR; hpfs_write_inode_nolock(result); } + hpfs_update_directory_times(dir); d_instantiate(dentry, result); hpfs_unlock(dir->i_sb); return 0; @@ -187,6 +199,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b result->i_mode = mode | S_IFREG; hpfs_write_inode_nolock(result); } + hpfs_update_directory_times(dir); d_instantiate(dentry, result); hpfs_unlock(dir->i_sb); return 0; @@ -262,6 +275,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de insert_inode_hash(result); hpfs_write_inode_nolock(result); + hpfs_update_directory_times(dir); d_instantiate(dentry, result); brelse(bh); hpfs_unlock(dir->i_sb); @@ -340,6 +354,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy insert_inode_hash(result); hpfs_write_inode_nolock(result); + hpfs_update_directory_times(dir); d_instantiate(dentry, result); hpfs_unlock(dir->i_sb); return 0; @@ -423,6 +438,8 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) out1: hpfs_brelse4(&qbh); out: + if (!err) + hpfs_update_directory_times(dir); hpfs_unlock(dir->i_sb); return err; } @@ -477,6 +494,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) out1: hpfs_brelse4(&qbh); out: + if (!err) + hpfs_update_directory_times(dir); hpfs_unlock(dir->i_sb); return err; } @@ -595,7 +614,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto end1; } - end: +end: hpfs_i(i)->i_parent_dir = new_dir->i_ino; if (S_ISDIR(i->i_mode)) { inc_nlink(new_dir); @@ -610,6 +629,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, brelse(bh); } end1: + if (!err) { + hpfs_update_directory_times(old_dir); + hpfs_update_directory_times(new_dir); + } hpfs_unlock(i->i_sb); return err; } From d3b428f0361d6dcbe7c6665ae0a824517a0b1ca9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 4 Sep 2015 15:44:57 -0700 Subject: [PATCH 0799/2091] fs: create and use seq_show_option for escaping commit a068acf2ee77693e0bf39d6e07139ba704f461c3 upstream. Many file systems that implement the show_options hook fail to correctly escape their output which could lead to unescaped characters (e.g. new lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This could lead to confusion, spoofed entries (resulting in things like systemd issuing false d-bus "mount" notifications), and who knows what else. This looks like it would only be the root user stepping on themselves, but it's possible weird things could happen in containers or in other situations with delegated mount privileges. Here's an example using overlay with setuid fusermount trusting the contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use of "sudo" is something more sneaky: $ BASE="ovl" $ MNT="$BASE/mnt" $ LOW="$BASE/lower" $ UP="$BASE/upper" $ WORK="$BASE/work/ 0 0 none /proc fuse.pwn user_id=1000" $ mkdir -p "$LOW" "$UP" "$WORK" $ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt $ cat /proc/mounts none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0 none /proc fuse.pwn user_id=1000 0 0 $ fusermount -u /proc $ cat /proc/mounts cat: /proc/mounts: No such file or directory This fixes the problem by adding new seq_show_option and seq_show_option_n helpers, and updating the vulnerable show_option handlers to use them as needed. Some, like SELinux, need to be open coded due to unusual existing escape mechanisms. [akpm@linux-foundation.org: add lost chunk, per Kees] [keescook@chromium.org: seq_show_option should be using const parameters] Signed-off-by: Kees Cook Acked-by: Serge Hallyn Acked-by: Jan Kara Acked-by: Paul Moore Cc: J. R. Okajima Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 2 +- fs/cifs/cifsfs.c | 6 +++--- fs/ext4/super.c | 4 ++-- fs/gfs2/super.c | 6 +++--- fs/hfs/super.c | 4 ++-- fs/hfsplus/options.c | 4 ++-- fs/hostfs/hostfs_kern.c | 2 +- fs/ocfs2/super.c | 4 ++-- fs/overlayfs/super.c | 6 +++--- fs/reiserfs/super.c | 8 +++++--- fs/xfs/xfs_super.c | 4 ++-- include/linux/seq_file.h | 35 +++++++++++++++++++++++++++++++++++ kernel/cgroup.c | 7 ++++--- net/ceph/ceph_common.c | 7 +++++-- security/selinux/hooks.c | 2 +- 15 files changed, 71 insertions(+), 30 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e9905374078a..0d47422e35481 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -466,7 +466,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) - seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); + seq_show_option(m, "snapdirname", fsopt->snapdir_name); return 0; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0a9fb6b53126a..6a1119e87fbb6 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -394,17 +394,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root) struct sockaddr *srcaddr; srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; - seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); + seq_show_option(s, "vers", tcon->ses->server->vals->version_string); cifs_show_security(s, tcon->ses); cifs_show_cache_flavor(s, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) seq_puts(s, ",multiuser"); else if (tcon->ses->user_name) - seq_printf(s, ",username=%s", tcon->ses->user_name); + seq_show_option(s, "username", tcon->ses->user_name); if (tcon->ses->domainName) - seq_printf(s, ",domain=%s", tcon->ses->domainName); + seq_show_option(s, "domain", tcon->ses->domainName); if (srcaddr->sa_family != AF_UNSPEC) { struct sockaddr_in *saddr4; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ca12affdba961..6b4eb94b04a59 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1738,10 +1738,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq, } if (sbi->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); if (sbi->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); #endif } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 859c6edbf81a0..c18b49dc5d4f6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1334,11 +1334,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) if (is_ancestor(root, sdp->sd_master_dir)) seq_puts(s, ",meta"); if (args->ar_lockproto[0]) - seq_printf(s, ",lockproto=%s", args->ar_lockproto); + seq_show_option(s, "lockproto", args->ar_lockproto); if (args->ar_locktable[0]) - seq_printf(s, ",locktable=%s", args->ar_locktable); + seq_show_option(s, "locktable", args->ar_locktable); if (args->ar_hostdata[0]) - seq_printf(s, ",hostdata=%s", args->ar_hostdata); + seq_show_option(s, "hostdata", args->ar_hostdata); if (args->ar_spectator) seq_puts(s, ",spectator"); if (args->ar_localflocks) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index eee7206c38d18..410b65eea683d 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -135,9 +135,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root) struct hfs_sb_info *sbi = HFS_SB(root->d_sb); if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) - seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); + seq_show_option_n(seq, "creator", (char *)&sbi->s_creator, 4); if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) - seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); + seq_show_option_n(seq, "type", (char *)&sbi->s_type, 4); seq_printf(seq, ",uid=%u,gid=%u", from_kuid_munged(&init_user_ns, sbi->s_uid), from_kgid_munged(&init_user_ns, sbi->s_gid)); diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index c90b72ee676d8..bb806e58c9770 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -218,9 +218,9 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root) struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb); if (sbi->creator != HFSPLUS_DEF_CR_TYPE) - seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); + seq_show_option_n(seq, "creator", (char *)&sbi->creator, 4); if (sbi->type != HFSPLUS_DEF_CR_TYPE) - seq_printf(seq, ",type=%.4s", (char *)&sbi->type); + seq_show_option_n(seq, "type", (char *)&sbi->type, 4); seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, from_kuid_munged(&init_user_ns, sbi->uid), from_kgid_munged(&init_user_ns, sbi->gid)); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 07d8d8f52faf5..de2d6245e9fad 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -260,7 +260,7 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root) size_t offset = strlen(root_ino) + 1; if (strlen(root_path) > offset) - seq_printf(seq, ",%s", root_path + offset); + seq_show_option(seq, root_path + offset, NULL); if (append) seq_puts(seq, ",append"); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 403c5660b3064..a482e312c7b28 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1550,8 +1550,8 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",localflocks,"); if (osb->osb_cluster_stack[0]) - seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, - osb->osb_cluster_stack); + seq_show_option_n(s, "cluster_stack", osb->osb_cluster_stack, + OCFS2_STACK_LABEL_LEN); if (opts & OCFS2_MOUNT_USRQUOTA) seq_printf(s, ",usrquota"); if (opts & OCFS2_MOUNT_GRPQUOTA) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index bf8537c7f4552..155989455a721 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -517,10 +517,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) struct super_block *sb = dentry->d_sb; struct ovl_fs *ufs = sb->s_fs_info; - seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); + seq_show_option(m, "lowerdir", ufs->config.lowerdir); if (ufs->config.upperdir) { - seq_printf(m, ",upperdir=%s", ufs->config.upperdir); - seq_printf(m, ",workdir=%s", ufs->config.workdir); + seq_show_option(m, "upperdir", ufs->config.upperdir); + seq_show_option(m, "workdir", ufs->config.workdir); } return 0; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0111ad0466ed4..cf6fa25f884b4 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -714,18 +714,20 @@ static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",acl"); if (REISERFS_SB(s)->s_jdev) - seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev); + seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev); if (journal->j_max_commit_age != journal->j_default_max_commit_age) seq_printf(seq, ",commit=%d", journal->j_max_commit_age); #ifdef CONFIG_QUOTA if (REISERFS_SB(s)->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]); + seq_show_option(seq, "usrjquota", + REISERFS_SB(s)->s_qf_names[USRQUOTA]); else if (opts & (1 << REISERFS_USRQUOTA)) seq_puts(seq, ",usrquota"); if (REISERFS_SB(s)->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]); + seq_show_option(seq, "grpjquota", + REISERFS_SB(s)->s_qf_names[GRPQUOTA]); else if (opts & (1 << REISERFS_GRPQUOTA)) seq_puts(seq, ",grpquota"); if (REISERFS_SB(s)->s_jquota_fmt) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 858e1e62bbaa3..65a45372fb1f3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -504,9 +504,9 @@ xfs_showargs( seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); if (mp->m_logname) - seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); + seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname); if (mp->m_rtname) - seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); + seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname); if (mp->m_dalign > 0) seq_printf(m, "," MNTOPT_SUNIT "=%d", diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c772..7848473a5bc8b 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -148,6 +148,41 @@ static inline struct user_namespace *seq_user_ns(struct seq_file *seq) #endif } +/** + * seq_show_options - display mount options with appropriate escapes. + * @m: the seq_file handle + * @name: the mount option name + * @value: the mount option name's value, can be NULL + */ +static inline void seq_show_option(struct seq_file *m, const char *name, + const char *value) +{ + seq_putc(m, ','); + seq_escape(m, name, ",= \t\n\\"); + if (value) { + seq_putc(m, '='); + seq_escape(m, value, ", \t\n\\"); + } +} + +/** + * seq_show_option_n - display mount options with appropriate escapes + * where @value must be a specific length. + * @m: the seq_file handle + * @name: the mount option name + * @value: the mount option name's value, cannot be NULL + * @length: the length of @value to display + * + * This is a macro since this uses "length" to define the size of the + * stack buffer. + */ +#define seq_show_option_n(m, name, value, length) { \ + char val_buf[length + 1]; \ + strncpy(val_buf, value, length); \ + val_buf[length] = '\0'; \ + seq_show_option(m, name, val_buf); \ +} + #define SEQ_START_TOKEN ((void *)1) /* * Helpers for iteration over list_head-s in seq_files diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e8a5491be756a..4d65b66ae60d4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1319,7 +1319,7 @@ static int cgroup_show_options(struct seq_file *seq, for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) - seq_printf(seq, ",%s", ss->name); + seq_show_option(seq, ss->name, NULL); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -1327,13 +1327,14 @@ static int cgroup_show_options(struct seq_file *seq, spin_lock(&release_agent_path_lock); if (strlen(root->release_agent_path)) - seq_printf(seq, ",release_agent=%s", root->release_agent_path); + seq_show_option(seq, "release_agent", + root->release_agent_path); spin_unlock(&release_agent_path_lock); if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags)) seq_puts(seq, ",clone_children"); if (strlen(root->name)) - seq_printf(seq, ",name=%s", root->name); + seq_show_option(seq, "name", root->name); return 0; } diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79e8f71aef5be..3f76eb84b395f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -495,8 +495,11 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) struct ceph_options *opt = client->options; size_t pos = m->count; - if (opt->name) - seq_printf(m, "name=%s,", opt->name); + if (opt->name) { + seq_puts(m, "name="); + seq_escape(m, opt->name, ", \t\n\\"); + seq_putc(m, ','); + } if (opt->key) seq_puts(m, "secret=,"); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7f8d7f19e0446..280235cc3a980 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1095,7 +1095,7 @@ static void selinux_write_opts(struct seq_file *m, seq_puts(m, prefix); if (has_comma) seq_putc(m, '\"'); - seq_puts(m, opts->mnt_opts[i]); + seq_escape(m, opts->mnt_opts[i], "\"\n\\"); if (has_comma) seq_putc(m, '\"'); } From 77798915750db46f10bb449e1625d6368ea42e25 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Mon, 6 Jul 2015 11:37:23 +0800 Subject: [PATCH 0800/2091] ARM: rockchip: fix broken build commit cb8cc37f4d38d96552f2c52deb15e511cdacf906 upstream. The following was seen in branch[0] build. arch/arm/mach-rockchip/platsmp.c:154:23: error: 'rockchip_secondary_startup' undeclared (first use in this function) branch[0]: git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git v4.3-armsoc/soc The broken build is caused by the commit fe4407c0dc58 ("ARM: rockchip: fix the CPU soft reset"). Signed-off-by: Caesar Wang The breakage was a result of it being wrongly merged in my branch with the cache invalidation rework from Russell 02b4e2756e01c ("ARM: v7 setup function should invalidate L1 cache"). Signed-off-by: Heiko Stuebner Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-rockchip/platsmp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 58ca50104a14e..611a5f96d3ca0 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -152,8 +152,7 @@ static int __cpuinit rockchip_boot_secondary(unsigned int cpu, * */ mdelay(1); /* ensure the cpus other than cpu0 to startup */ - writel(virt_to_phys(rockchip_secondary_startup), - sram_base_addr + 8); + writel(virt_to_phys(secondary_startup), sram_base_addr + 8); writel(0xDEADBEAF, sram_base_addr + 4); dsb_sev(); } From 36311a9ec4904c080bbdfcefc0f3d609ed508224 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Sep 2015 10:06:02 -0700 Subject: [PATCH 0801/2091] Linux 4.1.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b8591e5f79b8b..dbf3baa5fabbb 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = NAME = Series 4800 From f84e11df678f2e3c47264f9f42523d822b3fe426 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Fri, 14 Aug 2015 22:33:33 +0200 Subject: [PATCH 0802/2091] NFC: st21nfca: fix use of uninitialized variables in error path commit 5a3570061a131309143a49e4bbdbce7e23f261e7 upstream. st21nfca_hci_load_session() calls kfree_skb() on unitialized variables skb_pipe_info and skb_pipe_list if the call to nfc_hci_connect_gate() failed. Reword the error path to not use these variables when they are not initialized. While at it, there seemed to be a memory leak because skb_pipe_info was only freed once, after the for-loop, even though several ones were created by nfc_hci_send_cmd. Fixes: ec03ff1a8f9a ("NFC: st21nfca: Remove skb_pipe_list and skb_pipe_info useless allocation") Acked-by: Christophe Ricard Signed-off-by: Nicolas Iooss Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/st21nfca/st21nfca.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c index d251f7229c4e8..051286562fab3 100644 --- a/drivers/nfc/st21nfca/st21nfca.c +++ b/drivers/nfc/st21nfca/st21nfca.c @@ -148,14 +148,14 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE); if (r < 0) - goto free_info; + return r; /* Get pipe list */ r = nfc_hci_send_cmd(hdev, ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DM_GETINFO, pipe_list, sizeof(pipe_list), &skb_pipe_list); if (r < 0) - goto free_info; + return r; /* Complete the existing gate_pipe table */ for (i = 0; i < skb_pipe_list->len; i++) { @@ -181,6 +181,7 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) info->src_host_id != ST21NFCA_ESE_HOST_ID) { pr_err("Unexpected apdu_reader pipe on host %x\n", info->src_host_id); + kfree_skb(skb_pipe_info); continue; } @@ -200,6 +201,7 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) hdev->pipes[st21nfca_gates[j].pipe].dest_host = info->src_host_id; } + kfree_skb(skb_pipe_info); } /* @@ -214,13 +216,12 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev) st21nfca_gates[i].gate, st21nfca_gates[i].pipe); if (r < 0) - goto free_info; + goto free_list; } } memcpy(hdev->init_data.gates, st21nfca_gates, sizeof(st21nfca_gates)); -free_info: - kfree_skb(skb_pipe_info); +free_list: kfree_skb(skb_pipe_list); return r; } From 4fbdb442390e3eeb755d299c3dcbc777ea83f193 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 19 Aug 2015 21:26:42 +0200 Subject: [PATCH 0803/2091] nfc: nci: hci: Add check on skb nci_hci_send_cmd parameter commit 5a9e0ffc0f128ecdf7c770f76c268e4f9f3c9118 upstream. skb can be NULL and may lead to a NULL pointer error. Add a check condition before setting HCI rx buffer. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ed54ec5338363..b33fed6d15841 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -233,7 +233,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - if (r == NCI_STATUS_OK) + if (r == NCI_STATUS_OK && skb) *skb = conn_info->rx_skb; return r; From 1af97771b397a9ca2ee984c3277bcada3eec95cd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 9 Aug 2015 03:41:50 -0400 Subject: [PATCH 0804/2091] blk-mq: fix buffer overflow when reading sysfs file of 'pending' commit 596f5aad2a704b72934e5abec1b1b4114c16f45b upstream. There may be lots of pending requests so that the buffer of PAGE_SIZE can't hold them at all. One typical example is scsi-mq, the queue depth(.can_queue) of scsi_host and blk-mq is quite big but scsi_device's queue_depth is a bit small(.cmd_per_lun), then it is quite easy to have lots of pending requests in hw queue. This patch fixes the following warning and the related memory destruction. [ 359.025101] fill_read_buffer: blk_mq_hw_sysfs_show+0x0/0x7d returned bad count^M [ 359.055595] irq event stamp: 15537^M [ 359.055606] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC ^M [ 359.055614] Dumping ftrace buffer:^M [ 359.055660] (ftrace buffer empty)^M [ 359.055672] Modules linked in: nbd ipv6 kvm_intel kvm serio_raw^M [ 359.055678] CPU: 4 PID: 21631 Comm: stress-ng-sysfs Not tainted 4.2.0-rc5-next-20150805 #434^M [ 359.055679] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011^M [ 359.055682] task: ffff8802161cc000 ti: ffff88021b4a8000 task.ti: ffff88021b4a8000^M [ 359.055693] RIP: 0010:[] [] __kmalloc+0xe8/0x152^M Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-sysfs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index b79685e06b70e..279c5d674edf3 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -141,15 +141,26 @@ static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page) static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg) { - char *start_page = page; struct request *rq; + int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg); + + list_for_each_entry(rq, list, queuelist) { + const int rq_len = 2 * sizeof(rq) + 2; + + /* if the output will be truncated */ + if (PAGE_SIZE - 1 < len + rq_len) { + /* backspacing if it can't hold '\t...\n' */ + if (PAGE_SIZE - 1 < len + 5) + len -= rq_len; + len += snprintf(page + len, PAGE_SIZE - 1 - len, + "\t...\n"); + break; + } + len += snprintf(page + len, PAGE_SIZE - 1 - len, + "\t%p\n", rq); + } - page += sprintf(page, "%s:\n", msg); - - list_for_each_entry(rq, list, queuelist) - page += sprintf(page, "\t%p\n", rq); - - return page - start_page; + return len; } static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) From 11e43d237c8ca2d39047c527b105366823c5f8bc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Aug 2015 17:35:07 -0500 Subject: [PATCH 0805/2091] unshare: Unsharing a thread does not require unsharing a vm commit 12c641ab8270f787dfcce08b5f20ce8b65008096 upstream. In the logic in the initial commit of unshare made creating a new thread group for a process, contingent upon creating a new memory address space for that process. That is wrong. Two separate processes in different thread groups can share a memory address space and clone allows creation of such proceses. This is significant because it was observed that mm_users > 1 does not mean that a process is multi-threaded, as reading /proc/PID/maps temporarily increments mm_users, which allows other processes to (accidentally) interfere with unshare() calls. Correct the check in check_unshare_flags() to test for !thread_group_empty() for CLONE_THREAD, CLONE_SIGHAND, and CLONE_VM. For sighand->count > 1 for CLONE_SIGHAND and CLONE_VM. For !current_is_single_threaded instead of mm_users > 1 for CLONE_VM. By using the correct checks in unshare this removes the possibility of an accidental denial of service attack. Additionally using the correct checks in unshare ensures that only an explicit unshare(CLONE_VM) can possibly trigger the slow path of current_is_single_threaded(). As an explict unshare(CLONE_VM) is pointless it is not expected there are many applications that make that call. Fixes: b2e0d98705e60e45bbb3c0032c48824ad7ae0704 userns: Implement unshare of the user namespace Reported-by: Ricky Zhou Reported-by: Kees Cook Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 03c1eaaa6ef56..8209fa2d36ef8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1854,13 +1854,21 @@ static int check_unshare_flags(unsigned long unshare_flags) CLONE_NEWUSER|CLONE_NEWPID)) return -EINVAL; /* - * Not implemented, but pretend it works if there is nothing to - * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND - * needs to unshare vm. + * Not implemented, but pretend it works if there is nothing + * to unshare. Note that unsharing the address space or the + * signal handlers also need to unshare the signal queues (aka + * CLONE_THREAD). */ if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { - /* FIXME: get_task_mm() increments ->mm_users */ - if (atomic_read(¤t->mm->mm_users) > 1) + if (!thread_group_empty(current)) + return -EINVAL; + } + if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { + if (atomic_read(¤t->sighand->count) > 1) + return -EINVAL; + } + if (unshare_flags & CLONE_VM) { + if (!current_is_single_threaded()) return -EINVAL; } @@ -1928,16 +1936,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) */ if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; - /* - * If unsharing a thread from a thread group, must also unshare vm. - */ - if (unshare_flags & CLONE_THREAD) - unshare_flags |= CLONE_VM; /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; + /* + * If unsharing a signal handlers, must also unshare the signal queues. + */ + if (unshare_flags & CLONE_SIGHAND) + unshare_flags |= CLONE_THREAD; /* * If unsharing namespace, must also unshare filesystem information. */ From e8ad44583d5cffa93063a2c866c6f7806dd40374 Mon Sep 17 00:00:00 2001 From: Adrien Schildknecht Date: Wed, 19 Aug 2015 17:33:12 +0200 Subject: [PATCH 0806/2091] rtlwifi: rtl8192cu: Add new device ID commit 1642d09fb9b128e8e538b2a4179962a34f38dff9 upstream. The v2 of NetGear WNA1000M uses a different idProduct: USB ID 0846:9043 Signed-off-by: Adrien Schildknecht Acked-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 23806c243a531..fd4a5353d2169 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -321,6 +321,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x07b8, 0x8188, rtl92cu_hal_cfg)}, /*Abocom - Abocom*/ {RTL_USB_DEVICE(0x07b8, 0x8189, rtl92cu_hal_cfg)}, /*Funai - Abocom*/ {RTL_USB_DEVICE(0x0846, 0x9041, rtl92cu_hal_cfg)}, /*NetGear WNA1000M*/ + {RTL_USB_DEVICE(0x0846, 0x9043, rtl92cu_hal_cfg)}, /*NG WNA1000Mv2*/ {RTL_USB_DEVICE(0x0b05, 0x17ba, rtl92cu_hal_cfg)}, /*ASUS-Edimax*/ {RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/ {RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ From d7378111c7f7845a14792564d5168505f5309b74 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 8 Jul 2015 10:18:50 -0500 Subject: [PATCH 0807/2091] rtlwifi: rtl8821ae: Fix an expression that is always false commit 251086f588720277a6f5782020a648ce32c4e00b upstream. In routine _rtl8821ae_set_media_status(), an incorrect mask results in a test for AP status to always be false. Similar bugs were fixed in rtl8192cu and rtl8192de, but this instance was missed at that time. Reported-by: David Binderman Signed-off-by: Larry Finger Cc: David Binderman Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/rtl8821ae/hw.c | 2 +- drivers/net/wireless/rtlwifi/rtl8821ae/reg.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c index 57966e3c8e8d4..3fa2fb7c8e4e9 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c @@ -2180,7 +2180,7 @@ static int _rtl8821ae_set_media_status(struct ieee80211_hw *hw, rtl_write_byte(rtlpriv, MSR, bt_msr); rtlpriv->cfg->ops->led_control(hw, ledaction); - if ((bt_msr & 0xfc) == MSR_AP) + if ((bt_msr & MSR_MASK) == MSR_AP) rtl_write_byte(rtlpriv, REG_BCNTCFG + 1, 0x00); else rtl_write_byte(rtlpriv, REG_BCNTCFG + 1, 0x66); diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h b/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h index 53668fc8f23e2..1d6110f9c1fb6 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/reg.h @@ -429,6 +429,7 @@ #define MSR_ADHOC 0x01 #define MSR_INFRA 0x02 #define MSR_AP 0x03 +#define MSR_MASK 0x03 #define RRSR_RSC_OFFSET 21 #define RRSR_SHORT_OFFSET 23 From a0e26ed623a1e1460c1a191fbc0f37bddab7851a Mon Sep 17 00:00:00 2001 From: Shota Suzuki Date: Wed, 1 Jul 2015 09:25:52 +0900 Subject: [PATCH 0808/2091] igb: Fix oops caused by missing queue pairing commit 72ddef0506da852dc82f078f37ced8ef4d74a2bf upstream. When initializing igb driver (e.g. 82576, I350), IGB_FLAG_QUEUE_PAIRS is set if adapter->rss_queues exceeds half of max_rss_queues in igb_init_queue_configuration(). On the other hand, IGB_FLAG_QUEUE_PAIRS is not set even if the number of queues exceeds half of max_combined in igb_set_channels() when changing the number of queues by "ethtool -L". In this case, if numvecs is larger than MAX_MSIX_ENTRIES (10), the size of adapter->msix_entries[], an overflow can occur in igb_set_interrupt_capability(), which in turn leads to an oops. Fix this problem as follows: - When changing the number of queues by "ethtool -L", set IGB_FLAG_QUEUE_PAIRS in the same way as initializing igb driver. - When increasing the size of q_vector, reallocate it appropriately. (With IGB_FLAG_QUEUE_PAIRS set, the size of q_vector gets larger.) Another possible way to fix this problem is to cap the queues at its initial number, which is the number of the initial online cpus. But this is not the optimal way because we cannot increase queues when another cpu becomes online. Note that before commit cd14ef54d25b ("igb: Change to use statically allocated array for MSIx entries"), this problem did not cause oops but just made the number of queues become 1 because of entering msi_only mode in igb_set_interrupt_capability(). Fixes: 907b7835799f ("igb: Add ethtool support to configure number of channels") Signed-off-by: Shota Suzuki Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb.h | 1 + drivers/net/ethernet/intel/igb/igb_ethtool.c | 5 ++++- drivers/net/ethernet/intel/igb/igb_main.c | 16 ++++++++++++++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index c2bd4f98a8376..212d668dabb38 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -540,6 +540,7 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va, struct sk_buff *skb); int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); +void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); #ifdef CONFIG_IGB_HWMON void igb_sysfs_exit(struct igb_adapter *adapter); int igb_sysfs_init(struct igb_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index d5673eb90c542..0afc0913e5b91 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2991,6 +2991,7 @@ static int igb_set_channels(struct net_device *netdev, { struct igb_adapter *adapter = netdev_priv(netdev); unsigned int count = ch->combined_count; + unsigned int max_combined = 0; /* Verify they are not requesting separate vectors */ if (!count || ch->rx_count || ch->tx_count) @@ -3001,11 +3002,13 @@ static int igb_set_channels(struct net_device *netdev, return -EINVAL; /* Verify the number of channels doesn't exceed hw limits */ - if (count > igb_max_channels(adapter)) + max_combined = igb_max_channels(adapter); + if (count > max_combined) return -EINVAL; if (count != adapter->rss_queues) { adapter->rss_queues = count; + igb_set_flag_queue_pairs(adapter, max_combined); /* Hardware has to reinitialize queues and interrupts to * match the new configuration. diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a0a9b1fcb5e8e..ec79e6461f568 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1205,10 +1205,14 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; - if (!q_vector) + if (!q_vector) { q_vector = kzalloc(size, GFP_KERNEL); - else + } else if (size > ksize(q_vector)) { + kfree_rcu(q_vector, rcu); + q_vector = kzalloc(size, GFP_KERNEL); + } else { memset(q_vector, 0, size); + } if (!q_vector) return -ENOMEM; @@ -2901,6 +2905,14 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); + igb_set_flag_queue_pairs(adapter, max_rss_queues); +} + +void igb_set_flag_queue_pairs(struct igb_adapter *adapter, + const u32 max_rss_queues) +{ + struct e1000_hw *hw = &adapter->hw; + /* Determine if we need to pair queues. */ switch (hw->mac.type) { case e1000_82575: From 4e00f05deff2b5828c0bf2a747f8b024c0b25718 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 Sep 2015 18:07:41 +0200 Subject: [PATCH 0809/2091] tg3: Fix temperature reporting commit d3d11fe08ccc9bff174fc958722b5661f0932486 upstream. The temperature registers appear to report values in degrees Celsius while the hwmon API mandates values to be exposed in millidegrees Celsius. Do the conversion so that the values reported by "sensors" are correct. Fixes: aed93e0bf493 ("tg3: Add hwmon support for temperature") Signed-off-by: Jean Delvare Cc: Prashant Sreedharan Cc: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 069952fa5d644..0d8af5bb59077 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -10757,7 +10757,7 @@ static ssize_t tg3_show_temp(struct device *dev, tg3_ape_scratchpad_read(tp, &temperature, attr->index, sizeof(temperature)); spin_unlock_bh(&tp->lock); - return sprintf(buf, "%u\n", temperature); + return sprintf(buf, "%u\n", temperature * 1000); } From 2297868856a2208eed7598446edfcc71ffb717a1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 17 Jul 2015 10:36:03 +0100 Subject: [PATCH 0810/2091] MIPS: math-emu: Allow m{f,t}hc emulation on MIPS R6 commit e8f80cc1a6d80587136b015e989a12827e1fcfe5 upstream. The mfhc/mthc instructions are supported on MIPS R6 so emulate them if needed. Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10737/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/cp1emu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 6983fcd481319..7a47b359622e5 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1137,7 +1137,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case mfhc_op: - if (!cpu_has_mips_r2) + if (!cpu_has_mips_r2_r6) goto sigill; /* copregister rd -> gpr[rt] */ @@ -1148,7 +1148,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case mthc_op: - if (!cpu_has_mips_r2) + if (!cpu_has_mips_r2_r6) goto sigill; /* copregister rd <- gpr[rt] */ From 5242720923a0a6d3a59b62e51c053f0948b9d2a3 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 17 Jul 2015 10:38:32 +0100 Subject: [PATCH 0811/2091] MIPS: math-emu: Emulate missing BC1{EQ,NE}Z instructions commit c909ca718e8f50cf484ef06a8dd935e738e8e53d upstream. Commit c8a34581ec09 ("MIPS: Emulate the BC1{EQ,NE}Z FPU instructions") added support for emulating the new R6 BC1{EQ,NE}Z branches but it missed the case where the instruction that caused the exception was not on a DS. Signed-off-by: Markos Chandras Fixes: c8a34581ec09 ("MIPS: Emulate the BC1{EQ,NE}Z FPU instructions") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10738/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/math-emu/cp1emu.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 7a47b359622e5..2b95e34fa9e89 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1181,6 +1181,24 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, } break; + case bc1eqz_op: + case bc1nez_op: + if (!cpu_has_mips_r6 || delay_slot(xcp)) + return SIGILL; + + cond = likely = 0; + switch (MIPSInst_RS(ir)) { + case bc1eqz_op: + if (get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1) + cond = 1; + break; + case bc1nez_op: + if (!(get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)) + cond = 1; + break; + } + goto branch_common; + case bc_op: if (delay_slot(xcp)) return SIGILL; @@ -1207,7 +1225,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, case bct_op: break; } - +branch_common: set_delay_slot(xcp); if (cond) { /* From 1a7af1d9c5256dab6c33caf0b855d38102e79bb5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 13 Jun 2015 10:16:31 -0400 Subject: [PATCH 0812/2091] mac80211: enable assoc check for mesh interfaces commit 3633ebebab2bbe88124388b7620442315c968e8f upstream. We already set a station to be associated when peering completes, both in user space and in the kernel. Thus we should always have an associated sta before sending data frames to that station. Failure to check assoc state can cause crashes in the lower-level driver due to transmitting unicast data frames before driver sta structures (e.g. ampdu state in ath9k) are initialized. This occurred when forwarding in the presence of fixed mesh paths: frames were transmitted to stations with whom we hadn't yet completed peering. Reported-by: Alexis Green Tested-by: Jesse Jones Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 667111ee6a20f..5787f15a3a120 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -301,9 +301,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type == NL80211_IFTYPE_WDS) return TX_CONTINUE; - if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - return TX_CONTINUE; - if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; From 348b074897b6ce7004a9b4024b2c641a1262a961 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 21 Aug 2015 17:25:15 +1000 Subject: [PATCH 0813/2091] cxl: Remove racy attempt to force EEH invocation in reset commit 9d8e27673c45927fee9e7d8992ffb325a6b0b0e4 upstream. cxl_reset currently PERSTs the slot, and then repeatedly tries to read MMIO space in order to kick off EEH. There are 2 problems with this: it's unnecessary, and it's racy. It's unnecessary because the PERST will bring down the PHB link. That will be picked up by the CAPP, which will send out an HMI. Skiboot, noticing an HMI from the CAPP, will send an OPAL notification to the kernel, which will trigger EEH recovery. It's also racy: the EEH recovery triggered by the CAPP will eventually cause the MMIO space to have its mapping invalidated and the pointer NULLed out. This races with our attempt to read the MMIO space. This is causing OOPSes in testing. Simply drop all the attempts to force EEH detection, and trust that Skiboot will send the notification and that we'll act on it. The Skiboot code to send the EEH notification has been in Skiboot for as long as CAPP recovery has been supported, so we don't need to worry about breaking obscure setups with ancient firmware. Cc: Ryan Grimm Fixes: 62fa19d4b4fd ("cxl: Add ability to reset the card") Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1ef01647265f9..34edf15018788 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -778,8 +778,6 @@ int cxl_reset(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; - int i; - u32 val; dev_info(&dev->dev, "CXL reset\n"); @@ -794,20 +792,6 @@ int cxl_reset(struct cxl *adapter) return rc; } - /* the PERST done above fences the PHB. So, reset depends on EEH - * to unbind the driver, tell Sapphire to reinit the PHB, and rebind - * the driver. Do an mmio read explictly to ensure EEH notices the - * fenced PHB. Retry for a few seconds before giving up. */ - i = 0; - while (((val = mmio_read32be(adapter->p1_mmio)) != 0xffffffff) && - (i < 5)) { - msleep(500); - i++; - } - - if (val != 0xffffffff) - dev_err(&dev->dev, "cxl: PERST failed to trigger EEH\n"); - return rc; } From a847e529661790162905169d28e9fe6e819345b7 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 15 Sep 2015 15:04:07 +1000 Subject: [PATCH 0814/2091] cxl: Fix unbalanced pci_dev_get in cxl_probe commit 2925c2fdf1e0eb642482f5b30577e9435aaa8edb upstream. Currently the first thing we do in cxl_probe is to grab a reference on the pci device. Later on, we call device_register on our adapter. In our remove path, we call device_unregister, but we never call pci_dev_put. We therefore leak the device every time we do a reflash. device_register/unregister is sufficient to hold the reference. Therefore, drop the call to pci_dev_get. Here's why this is safe. The proposed cxl_probe(pdev) calls cxl_adapter_init: a) init calls cxl_adapter_alloc, which creates a struct cxl, conventionally called adapter. This struct contains a device entry, adapter->dev. b) init calls cxl_configure_adapter, where we set adapter->dev.parent = &dev->dev (here dev is the pci dev) So at this point, the cxl adapter's device's parent is the PCI device that I want to be refcounted properly. c) init calls cxl_register_adapter *) cxl_register_adapter calls device_register(&adapter->dev) So now we're in device_register, where dev is the adapter device, and we want to know if the PCI device is safe after we return. device_register(&adapter->dev) calls device_initialize() and then device_add(). device_add() does a get_device(). device_add() also explicitly grabs the device's parent, and calls get_device() on it: parent = get_device(dev->parent); So therefore, device_register() takes a lock on the parent PCI dev, which is what pci_dev_get() was guarding. pci_dev_get() can therefore be safely removed. Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access") Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 34edf15018788..9eab76c48e590 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1046,8 +1046,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; - pci_dev_get(dev); - if (cxl_verbose) dump_cxl_config_space(dev); From c4f568f41c5ba3a16f70743240b7cad77ea250ec Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 15 Aug 2015 10:45:06 -0400 Subject: [PATCH 0815/2091] ext4: don't manipulate recovery flag when freezing no-journal fs commit c642dc9e1aaed953597e7092d7df329e6234096e upstream. At some point along this sequence of changes: f6e63f9 ext4: fold ext4_nojournal_sops into ext4_sops bb04457 ext4: support freezing ext2 (nojournal) file systems 9ca9238 ext4: Use separate super_operations structure for no_journal filesystems ext4 started setting needs_recovery on filesystems without journals when they are unfrozen. This makes no sense, and in fact confuses blkid to the point where it doesn't recognize the filesystem at all. (freeze ext2; unfreeze ext2; run blkid; see no output; run dumpe2fs, see needs_recovery set on fs w/ no journal). To fix this, don't manipulate the INCOMPAT_RECOVER feature on filesystems without journals. Reported-by: Stu Mark Reviewed-by: Jan Kara Signed-off-by: Eric Sandeen Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6b4eb94b04a59..4d6cad65cd317 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4807,10 +4807,11 @@ static int ext4_freeze(struct super_block *sb) error = jbd2_journal_flush(journal); if (error < 0) goto out; + + /* Journal blocked and flushed, clear needs_recovery flag. */ + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); } - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: if (journal) @@ -4828,8 +4829,11 @@ static int ext4_unfreeze(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - /* Reset the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + if (EXT4_SB(sb)->s_journal) { + /* Reset the needs_recovery flag before the fs is unlocked. */ + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + } + ext4_commit_super(sb, 1); return 0; } From 2a6f41747737303b2241f0e30c6960a30102ac26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 Aug 2015 10:03:57 -0400 Subject: [PATCH 0816/2091] Revert "ext4: remove block_device_ejected" commit bdfe0cbd746aa9b2509c2f6d6be17193cf7facd7 upstream. This reverts commit 08439fec266c3cc5702953b4f54bdf5649357de0. Unfortunately we still need to test for bdi->dev to avoid a crash when a USB stick is yanked out while a file system is mounted: usb 2-2: USB disconnect, device number 2 Buffer I/O error on dev sdb1, logical block 15237120, lost sync page write JBD2: Error -5 detected when updating journal superblock for sdb1-8. BUG: unable to handle kernel paging request at 34beb000 IP: [] __percpu_counter_add+0x18/0xc0 *pdpt = 0000000023db9001 *pde = 0000000000000000 Oops: 0000 [#1] SMP CPU: 0 PID: 4083 Comm: umount Tainted: G U OE 4.1.1-040101-generic #201507011435 Hardware name: LENOVO 7675CTO/7675CTO, BIOS 7NETC2WW (2.22 ) 03/22/2011 task: ebf06b50 ti: ebebc000 task.ti: ebebc000 EIP: 0060:[] EFLAGS: 00010082 CPU: 0 EIP is at __percpu_counter_add+0x18/0xc0 EAX: f21c8e88 EBX: f21c8e88 ECX: 00000000 EDX: 00000001 ESI: 00000001 EDI: 00000000 EBP: ebebde60 ESP: ebebde40 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 34beb000 CR3: 33354200 CR4: 000007f0 Stack: c1abe100 edcb0098 edcb00ec ffffffff f21c8e68 ffffffff f21c8e68 f286d160 ebebde84 c1160454 00000010 00000282 f72a77f8 00000984 f72a77f8 f286d160 f286d170 ebebdea0 c11e613f 00000000 00000282 f72a77f8 edd7f4d0 00000000 Call Trace: [] account_page_dirtied+0x74/0x110 [] __set_page_dirty+0x3f/0xb0 [] mark_buffer_dirty+0x53/0xc0 [] ext4_commit_super+0x17b/0x250 [] ext4_put_super+0xc1/0x320 [] ? fsnotify_unmount_inodes+0x1aa/0x1c0 [] ? evict_inodes+0xca/0xe0 [] generic_shutdown_super+0x6a/0xe0 [] ? prepare_to_wait_event+0xd0/0xd0 [] ? unregister_shrinker+0x40/0x50 [] kill_block_super+0x26/0x70 [] deactivate_locked_super+0x45/0x80 [] deactivate_super+0x47/0x60 [] cleanup_mnt+0x39/0x80 [] __cleanup_mnt+0x10/0x20 [] task_work_run+0x91/0xd0 [] do_notify_resume+0x7c/0x90 [] work_notify Code: 8b 55 e8 e9 f4 fe ff ff 90 90 90 90 90 90 90 90 90 90 90 55 89 e5 83 ec 20 89 5d f4 89 c3 89 75 f8 89 d6 89 7d fc 89 cf 8b 48 14 <64> 8b 01 89 45 ec 89 c2 8b 45 08 c1 fa 1f 01 75 ec 89 55 f0 89 EIP: [] __percpu_counter_add+0x18/0xc0 SS:ESP 0068:ebebde40 CR2: 0000000034beb000 ---[ end trace dd564a7bea834ecd ]--- Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=101011 Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4d6cad65cd317..ff89971e3ee0f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -324,6 +324,22 @@ static void save_error_info(struct super_block *sb, const char *func, ext4_commit_super(sb, 1); } +/* + * The del_gendisk() function uninitializes the disk-specific data + * structures, including the bdi structure, without telling anyone + * else. Once this happens, any attempt to call mark_buffer_dirty() + * (for example, by ext4_commit_super), will cause a kernel OOPS. + * This is a kludge to prevent these oops until we can put in a proper + * hook in del_gendisk() to inform the VFS and file system layers. + */ +static int block_device_ejected(struct super_block *sb) +{ + struct inode *bd_inode = sb->s_bdev->bd_inode; + struct backing_dev_info *bdi = inode_to_bdi(bd_inode); + + return bdi->dev == NULL; +} + static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) { struct super_block *sb = journal->j_private; @@ -4591,7 +4607,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; - if (!sbh) + if (!sbh || block_device_ejected(sb)) return error; if (buffer_write_io_error(sbh)) { /* From 1bcc586eae97f5dfb9249c2d8c9406f0f8da57ae Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Tue, 18 Aug 2015 20:50:10 +0100 Subject: [PATCH 0817/2091] arm64: kconfig: Move LIST_POISON to a safe value commit bf0c4e04732479f650ff59d1ee82de761c0071f0 upstream. Move the poison pointer offset to 0xdead000000000000, a recognized value that is not mappable by user-space exploits. Acked-by: Catalin Marinas Signed-off-by: Thierry Strudel Signed-off-by: Jeff Vander Stoep Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7796af4b1d6f6..99fe93a5021ff 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,10 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config LOCKDEP_SUPPORT def_bool y From 380cd983b40fb5a58d7c06a11c736ce9d531d408 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Aug 2015 07:12:33 +0100 Subject: [PATCH 0818/2091] arm64: flush FP/SIMD state correctly after execve() commit 674c242c9323d3c293fc4f9a3a3a619fe3063290 upstream. When a task calls execve(), its FP/SIMD state is flushed so that none of the original program state is observeable by the incoming program. However, since this flushing consists of setting the in-memory copy of the FP/SIMD state to all zeroes, the CPU field is set to CPU 0 as well, which indicates to the lazy FP/SIMD preserve/restore code that the FP/SIMD state does not need to be reread from memory if the task is scheduled again on CPU 0 without any other tasks having entered userland (or used the FP/SIMD in kernel mode) on the same CPU in the mean time. If this happens, the FP/SIMD state of the old program will still be present in the registers when the new program starts. So set the CPU field to the invalid value of NR_CPUS when performing the flush, by calling fpsimd_flush_task_state(). Reported-by: Chunyan Zhang Reported-by: Janet Liu Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/fpsimd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3dca15634e69c..c31e59fe2cb88 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -157,6 +157,7 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); } From 01cb08b46fbcb7572db3cfa41a46b054ec8c2fc9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Aug 2015 10:34:41 +0100 Subject: [PATCH 0819/2091] of/fdt: make memblock maximum physical address arch configurable commit 8eafeb48022816513abc4f440bdad4c350fe81a3 upstream. When parsing the memory nodes to populate the memblock memory table, we check against high and low limits and clip any memory that exceeds either one of them. However, for arm64, the high limit of (phys_addr_t)~0 is not very meaningful, since phys_addr_t is 64 bits (i.e., no limit) but there may be other constraints that limit the memory ranges that we can support. So rename MAX_PHYS_ADDR to MAX_MEMBLOCK_ADDR (for clarity) and only define it if the arch does not supply a definition of its own. Acked-by: Rob Herring Reviewed-by: Catalin Marinas Tested-by: Stuart Yoder Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/of/fdt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index cde35c5d0191b..d91f721a05b66 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -955,7 +955,9 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, } #ifdef CONFIG_HAVE_MEMBLOCK -#define MAX_PHYS_ADDR ((phys_addr_t)~0) +#ifndef MAX_MEMBLOCK_ADDR +#define MAX_MEMBLOCK_ADDR ((phys_addr_t)~0) +#endif void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) { @@ -972,16 +974,16 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) } size &= PAGE_MASK; - if (base > MAX_PHYS_ADDR) { + if (base > MAX_MEMBLOCK_ADDR) { pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", base, base + size); return; } - if (base + size - 1 > MAX_PHYS_ADDR) { + if (base + size - 1 > MAX_MEMBLOCK_ADDR) { pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", - ((u64)MAX_PHYS_ADDR) + 1, base + size); - size = MAX_PHYS_ADDR - base + 1; + ((u64)MAX_MEMBLOCK_ADDR) + 1, base + size); + size = MAX_MEMBLOCK_ADDR - base + 1; } if (base + size < phys_offset) { From fe6b3cafd6a0ee59a6bd5b5a0e2fee18ea257328 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Aug 2015 10:34:42 +0100 Subject: [PATCH 0820/2091] arm64: set MAX_MEMBLOCK_ADDR according to linear region size commit 34ba2c4247e5c4b1542b1106e156af324660c4f0 upstream. The linear region size of a 39-bit VA kernel is only 256 GB, which may be insufficient to cover all of system RAM, even on platforms that have much less than 256 GB of memory but which is laid out very sparsely. So make sure we clip the memory we will not be able to map before installing it into the memblock memory table, by setting MAX_MEMBLOCK_ADDR accordingly. Reviewed-by: Catalin Marinas Tested-by: Stuart Yoder Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/memory.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f800d45ea2265..44a59c20e7735 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -113,6 +113,14 @@ extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ memstart_addr; }) +/* + * The maximum physical address that the linear direct mapping + * of system RAM can cover. (PAGE_OFFSET can be interpreted as + * a 2's complement signed quantity and negated to derive the + * maximum size of the linear mapping.) + */ +#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) + /* * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. From a70957a9cab26b99de59514598eaf201312f8956 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2015 12:07:06 +0100 Subject: [PATCH 0821/2091] arm64: compat: fix vfp save/restore across signal handlers in big-endian commit bdec97a855ef1e239f130f7a11584721c9a1bf04 upstream. When saving/restoring the VFP registers from a compat (AArch32) signal frame, we rely on the compat registers forming a prefix of the native register file and therefore make use of copy_{to,from}_user to transfer between the native fpsimd_state and the compat_vfp_sigframe. Unfortunately, this doesn't work so well in a big-endian environment. Our fpsimd save/restore code operates directly on 128-bit quantities (Q registers) whereas the compat_vfp_sigframe represents the registers as an array of 64-bit (D) registers. The architecture packs the compat D registers into the Q registers, with the least significant bytes holding the lower register. Consequently, we need to swap the 64-bit halves when converting between these two representations on a big-endian machine. This patch replaces the __copy_{to,from}_user invocations in our compat VFP signal handling code with explicit __put_user loops that operate on 64-bit values and swap them accordingly. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal32.c | 47 +++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index c0cff3410166e..c58aee062590c 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); From ad09182eee001c592b7a442fad5e7b2ca61b34aa Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 2 Sep 2015 18:49:28 +0100 Subject: [PATCH 0822/2091] arm64: head.S: initialise mdcr_el2 in el2_setup commit d10bcd473301888f957ec4b6b12aa3621be78d59 upstream. When entering the kernel at EL2, we fail to initialise the MDCR_EL2 register which controls debug access and PMU capabilities at EL1. This patch ensures that the register is initialised so that all traps are disabled and all the PMU counters are available to the host. When a guest is scheduled, KVM takes care to configure trapping appropriately. Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/head.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 19f915e8f6e0f..36aa31ff2c060 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -565,6 +565,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr From 1feed3799f32961ccfade28cd755f93c023fa21c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Mar 2015 12:15:02 +0000 Subject: [PATCH 0823/2091] arm64: errata: add module build workaround for erratum #843419 commit df057cc7b4fa59e9b55f07ffdb6c62bf02e99a00 upstream. Cortex-A53 processors <= r0p4 are affected by erratum #843419 which can lead to a memory access using an incorrect address in certain sequences headed by an ADRP instruction. There is a linker fix to generate veneers for ADRP instructions, but this doesn't work for kernel modules which are built as unlinked ELF objects. This patch adds a new config option for the erratum which, when enabled, builds kernel modules with the mcmodel=large flag. This uses absolute addressing for all kernel symbols, thereby removing the use of ADRP as a PC-relative form of addressing. The ADRP relocs are removed from the module loader so that we fail to load any potentially affected modules. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 16 ++++++++++++++++ arch/arm64/Makefile | 4 ++++ arch/arm64/kernel/module.c | 2 ++ 3 files changed, 22 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 99fe93a5021ff..6f0a3b41b0090 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -413,6 +413,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4d2a925998f92..81151663ef385 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -30,6 +30,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf4107f6efe..876eb8df50bf3 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; From 26550ff8f2ef668a526ca8fedbcf86bbd018169b Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Wed, 5 Aug 2015 11:53:57 +0100 Subject: [PATCH 0824/2091] arm/arm64: KVM: vgic: Check for !irqchip_in_kernel() when mapping resources commit c2f58514cfb374d5368c9da945f1765cd48eb0da upstream. Until b26e5fdac43c ("arm/arm64: KVM: introduce per-VM ops"), kvm_vgic_map_resources() used to include a check on irqchip_in_kernel(), and vgic_v2_map_resources() still has it. But now vm_ops are not initialized until we call kvm_vgic_create(). Therefore kvm_vgic_map_resources() can being called without a VGIC, and we die because vm_ops.map_resources is NULL. Fixing this restores QEMU's kernel-irqchip=off option to a working state, allowing to use GIC emulation in userspace. Fixes: b26e5fdac43c ("arm/arm64: KVM: introduce per-VM ops") Signed-off-by: Pavel Fedin [maz: reworked commit message] Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56e..d6223cbcb6618 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -450,7 +450,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { + if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { ret = kvm_vgic_map_resources(kvm); if (ret) return ret; From 7dd1e0b3f23eafbbbac8b018b591c613f8468368 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Sep 2015 16:06:03 +0100 Subject: [PATCH 0825/2091] KVM: arm64: add workaround for Cortex-A57 erratum #852523 commit 43297dda0a51e4ffed0888ce727c218cfb7474b6 upstream. When restoring the system register state for an AArch32 guest at EL2, writes to DACR32_EL2 may not be correctly synchronised by Cortex-A57, which can lead to the guest effectively running with junk in the DACR and running into unexpected domain faults. This patch works around the issue by re-ordering our restoration of the AArch32 register aliases so that they happen before the AArch64 system registers. Ensuring that the registers are restored in this order guarantees that they will be correctly synchronised by the core. Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 5befd010e2325..4dad27ba7d3d2 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -947,13 +947,15 @@ ENTRY(__kvm_vcpu_run) // Guest context add x2, x0, #VCPU_CONTEXT + // We must restore the 32-bit state before the sysregs, thanks + // to Cortex-A57 erratum #852523. + restore_guest_32bit_state bl __restore_sysregs bl __restore_fpsimd skip_debug_state x3, 1f bl __restore_debug 1: - restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. From f283a20ab79c44af3e493cfbcc2550404d75c3b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 16 Sep 2015 16:18:59 +0100 Subject: [PATCH 0826/2091] arm64: KVM: Disable virtual timer even if the guest is not using it commit c4cbba9fa078f55d9f6d081dbb4aec7cf969e7c7 upstream. When running a guest with the architected timer disabled (with QEMU and the kernel_irqchip=off option, for example), it is important to make sure the timer gets turned off. Otherwise, the guest may try to enable it anyway, leading to a screaming HW interrupt. The fix is to unconditionally turn off the virtual timer on guest exit. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 4dad27ba7d3d2..64f9e60b31da1 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -844,8 +844,6 @@ mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -853,6 +851,9 @@ str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 From 37e7e6bf19f75255444a6a53d90a16d8c5e5a471 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 3 Sep 2015 22:20:00 -0700 Subject: [PATCH 0827/2091] Input: evdev - do not report errors form flush() commit eb38f3a4f6e86f8bb10a3217ebd85ecc5d763aae upstream. We've got bug reports showing the old systemd-logind (at least system-210) aborting unexpectedly, and this turned out to be because of an invalid error code from close() call to evdev devices. close() is supposed to return only either EINTR or EBADFD, while the device returned ENODEV. logind was overreacting to it and decided to kill itself when an unexpected error code was received. What a tragedy. The bad error code comes from flush fops, and actually evdev_flush() returns ENODEV when device is disconnected or client's access to it is revoked. But in these cases the fact that flush did not actually happen is not an error, but rather normal behavior. For non-disconnected devices result of flush is also not that interesting as there is no potential of data loss and even if it fails application has no way of handling the error. Because of that we are better off always returning success from evdev_flush(). Also returning EINTR from flush()/close() is discouraged (as it is not clear how application should handle this error), so let's stop taking evdev->mutex interruptibly. Bugzilla: http://bugzilla.suse.com/show_bug.cgi?id=939834 Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/evdev.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index a18f41b89b6a9..2ae522f0d2b2f 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -290,19 +290,14 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - int retval; - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; + mutex_lock(&evdev->mutex); - if (!evdev->exist || client->revoked) - retval = -ENODEV; - else - retval = input_flush_device(&evdev->handle, file); + if (evdev->exist && !client->revoked) + input_flush_device(&evdev->handle, file); mutex_unlock(&evdev->mutex); - return retval; + return 0; } static void evdev_free(struct device *dev) From ae7d175f7d461643dd6eface450704c4076cfc0d Mon Sep 17 00:00:00 2001 From: Yao-Wen Mao Date: Fri, 28 Aug 2015 16:33:25 +0800 Subject: [PATCH 0828/2091] ALSA: usb-audio: correct the value cache check. commit 6aa6925cad06159dc6e25857991bbc4960821242 upstream. The check of cval->cached should be zero-based (including master channel). Signed-off-by: Yao-Wen Mao Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 8b7e391dd0b80..cd8ed2e393a2d 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2522,7 +2522,7 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) for (c = 0; c < MAX_CHANNELS; c++) { if (!(cval->cmask & (1 << c))) continue; - if (cval->cached & (1 << c)) { + if (cval->cached & (1 << (c + 1))) { err = snd_usb_set_cur_mix_value(cval, c + 1, idx, cval->cache_val[idx]); if (err < 0) From cd7885f07b7cf2e44b29c64d3f79e6cb104e1b19 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 13 Aug 2015 18:02:39 +0200 Subject: [PATCH 0829/2091] ALSA: hda - Enable headphone jack detect on old Fujitsu laptops commit bb148bdeb0ab16fc0ae8009799471e4d7180073b upstream. According to the bug report, FSC Amilo laptops with ALC880 can detect the headphone jack but currently the driver disables it. It's partly intentionally, as non-working jack detect was reported in the past. Let's enable now. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=102501 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 91f6928560e18..5f75e1c4cc93f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1134,7 +1134,7 @@ static const struct hda_fixup alc880_fixups[] = { /* override all pins as BIOS on old Amilo is broken */ .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x0121411f }, /* HP */ + { 0x14, 0x0121401f }, /* HP */ { 0x15, 0x99030120 }, /* speaker */ { 0x16, 0x99030130 }, /* bass speaker */ { 0x17, 0x411111f0 }, /* N/A */ @@ -1154,7 +1154,7 @@ static const struct hda_fixup alc880_fixups[] = { /* almost compatible with FUJITSU, but no bass and SPDIF */ .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x14, 0x0121411f }, /* HP */ + { 0x14, 0x0121401f }, /* HP */ { 0x15, 0x99030120 }, /* speaker */ { 0x16, 0x411111f0 }, /* N/A */ { 0x17, 0x411111f0 }, /* N/A */ From 68e17e9c950fcde261e286491b24d2015ea0edcb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 13 Aug 2015 18:05:06 +0200 Subject: [PATCH 0830/2091] ALSA: hda - Use ALC880_FIXUP_FUJITSU for FSC Amilo M1437 commit a161574e200ae63a5042120e0d8c36830e81bde3 upstream. It turned out that the machine has a bass speaker, so take a correct fixup entry. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=102501 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5f75e1c4cc93f..d82949cc32d8d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1363,7 +1363,7 @@ static const struct snd_pci_quirk alc880_fixup_tbl[] = { SND_PCI_QUIRK(0x161f, 0x203d, "W810", ALC880_FIXUP_W810), SND_PCI_QUIRK(0x161f, 0x205d, "Medion Rim 2150", ALC880_FIXUP_MEDION_RIM), SND_PCI_QUIRK(0x1631, 0xe011, "PB 13201056", ALC880_FIXUP_6ST_AUTOMUTE), - SND_PCI_QUIRK(0x1734, 0x107c, "FSC F1734", ALC880_FIXUP_F1734), + SND_PCI_QUIRK(0x1734, 0x107c, "FSC Amilo M1437", ALC880_FIXUP_FUJITSU), SND_PCI_QUIRK(0x1734, 0x1094, "FSC Amilo M1451G", ALC880_FIXUP_FUJITSU), SND_PCI_QUIRK(0x1734, 0x10ac, "FSC AMILO Xi 1526", ALC880_FIXUP_F1734), SND_PCI_QUIRK(0x1734, 0x10b0, "FSC Amilo Pi1556", ALC880_FIXUP_FUJITSU), From 0ea955bb6792090d06e93a4822407d6d6238bf74 Mon Sep 17 00:00:00 2001 From: Woodrow Shen Date: Fri, 4 Sep 2015 15:08:12 +0800 Subject: [PATCH 0831/2091] ALSA: hda - Add some FIXUP quirks for white noise on Dell laptop. commit 1adecc6755e1e4193b5618ddb2e107f6d6e88f4b upstream. Dell laptop has a series model to use the same codec but different subsystem ID. At the same time they happens the white noise by login screen and headphone; for fixing them together, I only can add these IDs to FIXUP function ALC292_FIXUP_DISABLE_AAMIX, then try to solve such the similar issues. Codec: Realtek ALC3235 Vendor Id: 0x10ec0293 Subsystem Id: 0x102806dd Subsystem Id: 0x102806df Subsystem Id: 0x102806e0 BugLink: https://bugs.launchpad.net/bugs/1492132 Signed-off-by: Woodrow Shen Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d82949cc32d8d..6ac5487a7e6d3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5118,8 +5118,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x06db, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06dd, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From d521521ee7c630dfe5f0036946b820f2ace80cb1 Mon Sep 17 00:00:00 2001 From: Niranjan Sivakumar Date: Sat, 5 Sep 2015 18:20:35 +0200 Subject: [PATCH 0832/2091] ALSA: hda - Fix white noise on Dell M3800 commit 467e1436ba85f78b8c4610c4549eb255a8211c42 upstream. The M3800 is very minor workstation variant of the XPS 15 which has already been patched for this issue. I figured it's probably more important for this version of the laptop to be patched than the regular XPS as Dell sells is pre-configured with Ubuntu to be used as a Linux workstation. I have tested the patch on my the hardware on Linux 4.2.0. Signed-off-by: Niranjan Sivakumar Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6ac5487a7e6d3..6fe862594e9b9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6457,6 +6457,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13), SND_PCI_QUIRK(0x1028, 0x060a, "Dell XPS 13", ALC668_FIXUP_DELL_XPS13), + SND_PCI_QUIRK(0x1028, 0x060d, "Dell M3800", ALC668_FIXUP_DELL_XPS13), SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0696, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), From 56347e799e690aebf26037fb2e11325078cedeb3 Mon Sep 17 00:00:00 2001 From: David Dueck Date: Tue, 28 Jul 2015 09:48:16 +0200 Subject: [PATCH 0833/2091] pinctrl: at91: fix null pointer dereference commit 1ab36387ea4face01aac3560b396b1e2ce07c4ff upstream. Not all gpio banks are necessarily enabled, in the current code this can lead to null pointer dereferences. [ 51.130000] Unable to handle kernel NULL pointer dereference at virtual address 00000058 [ 51.130000] pgd = dee04000 [ 51.130000] [00000058] *pgd=3f66d831, *pte=00000000, *ppte=00000000 [ 51.140000] Internal error: Oops: 17 [#1] ARM [ 51.140000] Modules linked in: [ 51.140000] CPU: 0 PID: 1664 Comm: cat Not tainted 4.1.1+ #6 [ 51.140000] Hardware name: Atmel SAMA5 [ 51.140000] task: df6dd880 ti: dec60000 task.ti: dec60000 [ 51.140000] PC is at at91_pinconf_get+0xb4/0x200 [ 51.140000] LR is at at91_pinconf_get+0xb4/0x200 [ 51.140000] pc : [] lr : [] psr: 600f0013 sp : dec61e48 ip : 600f0013 fp : df522538 [ 51.140000] r10: df52250c r9 : 00000058 r8 : 00000068 [ 51.140000] r7 : 00000000 r6 : df53c910 r5 : 00000000 r4 : dec61e7c [ 51.140000] r3 : 00000000 r2 : c06746d4 r1 : 00000000 r0 : 00000003 [ 51.140000] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 51.140000] Control: 10c53c7d Table: 3ee04059 DAC: 00000015 [ 51.140000] Process cat (pid: 1664, stack limit = 0xdec60208) [ 51.140000] Stack: (0xdec61e48 to 0xdec62000) [ 51.140000] 1e40: 00000358 00000000 df522500 ded15f80 c05a9d08 ded15f80 [ 51.140000] 1e60: 0000048c 00000061 df522500 ded15f80 c05a9d08 c01e7304 ded15f80 00000000 [ 51.140000] 1e80: c01e6008 00000060 0000048c c01e6034 c01e5f6c ded15f80 dec61ec0 00000000 [ 51.140000] 1ea0: 00020000 ded6f280 dec61f80 00000001 00000001 c00ae0b8 b6e80000 ded15fb0 [ 51.140000] 1ec0: 00000000 00000000 df4bc974 00000055 00000800 ded6f280 b6e80000 ded6f280 [ 51.140000] 1ee0: ded6f280 00020000 b6e80000 00000000 00020000 c0090dec c0671e1c dec61fb0 [ 51.140000] 1f00: b6f8b510 00000001 00004201 c000924c 00000000 00000003 00000003 00000000 [ 51.140000] 1f20: df4bc940 00022000 00000022 c066e188 b6e7f000 c00836f4 000b6e7f ded6f280 [ 51.140000] 1f40: ded6f280 b6e80000 dec61f80 ded6f280 00020000 c0091508 00000000 00000003 [ 51.140000] 1f60: 00022000 00000000 00000000 ded6f280 ded6f280 00020000 b6e80000 c0091d9c [ 51.140000] 1f80: 00000000 00000000 ffffffff 00020000 00020000 b6e80000 00000003 c000f124 [ 51.140000] 1fa0: dec60000 c000efa0 00020000 00020000 00000003 b6e80000 00020000 000271c4 [ 51.140000] 1fc0: 00020000 00020000 b6e80000 00000003 7fffe000 00000000 00000000 00020000 [ 51.140000] 1fe0: 00000000 bef50b64 00013835 b6f29c76 400f0030 00000003 00000000 00000000 [ 51.140000] [] (at91_pinconf_get) from [] (at91_pinconf_dbg_show+0x18/0x2c0) [ 51.140000] [] (at91_pinconf_dbg_show) from [] (pinconf_pins_show+0xc8/0xf8) [ 51.140000] [] (pinconf_pins_show) from [] (seq_read+0x1a0/0x464) [ 51.140000] [] (seq_read) from [] (__vfs_read+0x20/0xd0) [ 51.140000] [] (__vfs_read) from [] (vfs_read+0x7c/0x108) [ 51.140000] [] (vfs_read) from [] (SyS_read+0x40/0x94) [ 51.140000] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x3c) [ 51.140000] Code: eb010ec2 e30a0d08 e34c005a eb0ae5a7 (e5993000) [ 51.150000] ---[ end trace fb3c370da3ea4794 ]--- Fixes: a0b957f306fa ("pinctrl: at91: allow to have disabled gpio bank") Signed-off-by: David Dueck Acked-by: Ludovic Desroches Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Cc: Boris Brezillon Cc: Jean-Christophe PLAGNIOL-VILLARD Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 2f797cb7e2050..7747814508854 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -320,6 +320,9 @@ static const struct pinctrl_ops at91_pctrl_ops = { static void __iomem *pin_to_controller(struct at91_pinctrl *info, unsigned int bank) { + if (!gpio_chips[bank]) + return NULL; + return gpio_chips[bank]->regbase; } @@ -729,6 +732,10 @@ static int at91_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, pin = &pins_conf[i]; at91_pin_dbg(info->dev, pin); pio = pin_to_controller(info, pin->bank); + + if (!pio) + continue; + mask = pin_to_mask(pin->pin); at91_mux_disable_interrupt(pio, mask); switch (pin->mux) { @@ -848,6 +855,10 @@ static int at91_pinconf_get(struct pinctrl_dev *pctldev, *config = 0; dev_dbg(info->dev, "%s:%d, pin_id=%d", __func__, __LINE__, pin_id); pio = pin_to_controller(info, pin_to_bank(pin_id)); + + if (!pio) + return -EINVAL; + pin = pin_id % MAX_NB_GPIO_PER_BANK; if (at91_mux_get_multidrive(pio, pin)) @@ -889,6 +900,10 @@ static int at91_pinconf_set(struct pinctrl_dev *pctldev, "%s:%d, pin_id=%d, config=0x%lx", __func__, __LINE__, pin_id, config); pio = pin_to_controller(info, pin_to_bank(pin_id)); + + if (!pio) + return -EINVAL; + pin = pin_id % MAX_NB_GPIO_PER_BANK; mask = pin_to_mask(pin); From 7584b2d8dbe6f49a76d756b83682a298f33cbfb6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 27 Aug 2015 14:12:36 +1000 Subject: [PATCH 0834/2091] powerpc/pseries: Fix corrupted pdn list commit 590c7567a2895f939525ead57b0334c6d47986f0 upstream. Commit cca87d30 ("powerpc/pci: Refactor pci_dn") introduced pdn list for SRIOV VFs. It means the pdn is be put into the child list of its parent pdn when the pdn is created. When doing PCI hot unplugging on pSeries, the PCI device node as well as its pdn are released through procfs entry "powerpc/ofdt". Some one else grabs the memory chunk of the pdn and update it accordingly. At the same time, the pdn is still tracked in the child list of parent pdn. It leads to corrupted child list in the parent pdn. This fixes above issue by removing the pdn from the child list of its parent pdn when the device node is detached from the system. Note the pdn is free'd when the device node is released if the device node is dynamic one. Otherwise, the device node as well as the pdn won't be released. Fixes: cca87d30 ("powerpc/pci: Refactor pci_dn") Reported-by: Santwana Samantray Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/setup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index df6a7041922b6..e6e8b241d7173 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -268,6 +268,11 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act eeh_dev_init(PCI_DN(np), pci->phb); } break; + case OF_RECONFIG_DETACH_NODE: + pci = PCI_DN(np); + if (pci) + list_del(&pci->list); + break; default: err = NOTIFY_DONE; break; From 91552f87853c6e50b3968f6514ddb9084acb9f10 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 14 Aug 2015 16:03:19 +1000 Subject: [PATCH 0835/2091] powerpc/eeh: Probe after unbalanced kref check commit e642d11bdbfe8eb10116ab3959a2b5d75efda832 upstream. In the complete hotplug case, EEH PEs are supposed to be released and set to NULL. Normally, this is done by eeh_remove_device(), which is called from pcibios_release_device(). However, if something is holding a kref to the device, it will not be released, and the PE will remain. eeh_add_device_late() has a check for this which will explictly destroy the PE in this case. This check in eeh_add_device_late() occurs after a call to eeh_ops->probe(). On PowerNV, probe is a pointer to pnv_eeh_probe(), which will exit without probing if there is an existing PE. This means that on PowerNV, devices with outstanding krefs will not be rediscovered by EEH correctly after a complete hotplug. This is affecting CXL (CAPI) devices in the field. Put the probe after the kref check so that the PE is destroyed and affected devices are correctly rediscovered by EEH. Fixes: d91dafc02f42 ("powerpc/eeh: Delay probing EEH device during hotplug") Cc: Gavin Shan Signed-off-by: Daniel Axtens Acked-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9ee61d15653d6..1d94bc2a10476 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1118,9 +1118,6 @@ void eeh_add_device_late(struct pci_dev *dev) return; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - /* * The EEH cache might not be removed correctly because of * unbalanced kref to the device during unplug time, which @@ -1144,6 +1141,9 @@ void eeh_add_device_late(struct pci_dev *dev) dev->dev.archdata.edev = NULL; } + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) + eeh_ops->probe(pdn, NULL); + edev->pdev = dev; dev->dev.archdata.edev = edev; From f1ab3c0449ca09b72e6eea7eb077244bfffb7478 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 28 Aug 2015 11:57:00 +1000 Subject: [PATCH 0836/2091] powerpc/eeh: Fix fenced PHB caused by eeh_slot_error_detail() commit 259800135c654a098d9f0adfdd3d1f20eef1f231 upstream. The config space of some PCI devices can't be accessed when their PEs are in frozen state. Otherwise, fenced PHB might be seen. Those PEs are identified with flag EEH_PE_CFG_RESTRICTED, meaing EEH_PE_CFG_BLOCKED is set automatically when the PE is put to frozen state (EEH_PE_ISOLATED). eeh_slot_error_detail() restores PCI device BARs with eeh_pe_restore_bars(), which then calls eeh_ops->restore_config() to reinitialize the PCI device in (OPAL) firmware. eeh_ops->restore_config() produces PCI config access that causes fenced PHB. The problem was reported on below adapter: 0001:01:00.0 0200: 14e4:168e (rev 10) 0001:01:00.0 Ethernet controller: Broadcom Corporation \ NetXtreme II BCM57810 10 Gigabit Ethernet (rev 10) This fixes the issue by skipping eeh_pe_restore_bars() in eeh_slot_error_detail() when EEH_PE_CFG_BLOCKED is set for the PE. Fixes: b6541db1 ("powerpc/eeh: Block PCI config access upon frozen PE") Reported-by: Manvanthara B. Puttashankar Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 1d94bc2a10476..cb565ad0a5b63 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -310,11 +310,26 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) if (!(pe->type & EEH_PE_PHB)) { if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + + /* + * The config space of some PCI devices can't be accessed + * when their PEs are in frozen state. Otherwise, fenced + * PHB might be seen. Those PEs are identified with flag + * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED + * is set automatically when the PE is put to EEH_PE_ISOLATED. + * + * Restoring BARs possibly triggers PCI config access in + * (OPAL) firmware and then causes fenced PHB. If the + * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's + * pointless to restore BARs and dump config space. + */ eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); + if (!(pe->state & EEH_PE_CFG_BLOCKED)) { + eeh_pe_restore_bars(pe); - pci_regs_buf[0] = 0; - eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); + pci_regs_buf[0] = 0; + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); + } } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); From e4b334216318a348011304a8f83e8c33d9664f38 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 7 Aug 2015 16:19:43 +1000 Subject: [PATCH 0837/2091] powerpc/mm: Fix pte_pagesize_index() crash on 4K w/64K hash commit 74b5037baa2011a2799e2c43adde7d171b072f9e upstream. The powerpc kernel can be built to have either a 4K PAGE_SIZE or a 64K PAGE_SIZE. However when built with a 4K PAGE_SIZE there is an additional config option which can be enabled, PPC_HAS_HASH_64K, which means the kernel also knows how to hash a 64K page even though the base PAGE_SIZE is 4K. This is used in one obscure configuration, to support 64K pages for SPU local store on the Cell processor when the rest of the kernel is using 4K pages. In this configuration, pte_pagesize_index() is defined to just pass through its arguments to get_slice_psize(). However pte_pagesize_index() is called for both user and kernel addresses, whereas get_slice_psize() only knows how to handle user addresses. This has been broken forever, however until recently it happened to work. That was because in get_slice_psize() the large kernel address would cause the right shift of the slice mask to return zero. However in commit 7aa0727f3302 ("powerpc/mm: Increase the slice range to 64TB"), the get_slice_psize() code was changed so that instead of a right shift we do an array lookup based on the address. When passed a kernel address this means we index way off the end of the slice array and return random junk. That is only fatal if we happen to hit something non-zero, but when we do return a non-zero value we confuse the MMU code and eventually cause a check stop. This fix is ugly, but simple. When we're called for a kernel address we return 4K, which is always correct in this configuration, otherwise we use the slice mask. Fixes: 7aa0727f3302 ("powerpc/mm: Increase the slice range to 64TB") Reported-by: Cyril Bur Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/pgtable-ppc64.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 43e6ad424c7fc..88d27e3258d2b 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -135,7 +135,19 @@ #define pte_iterate_hashed_end() } while(0) #ifdef CONFIG_PPC_HAS_HASH_64K -#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) \ + ({ \ + unsigned int psize; \ + if (is_kernel_addr(addr)) \ + psize = MMU_PAGE_4K; \ + else \ + psize = get_slice_psize(mm, addr); \ + psize; \ + }) #else #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K #endif From ce813f1fef5077fb318c0802aa0be148a580e632 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 17 Jul 2015 12:46:58 +0200 Subject: [PATCH 0838/2091] powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers commit 1c2cb594441d02815d304cccec9742ff5c707495 upstream. The EPOW interrupt handler uses rtas_get_sensor(), which in turn uses rtas_busy_delay() to wait for RTAS becoming ready in case it is necessary. But rtas_busy_delay() is annotated with might_sleep() and thus may not be used by interrupts handlers like the EPOW handler! This leads to the following BUG when CONFIG_DEBUG_ATOMIC_SLEEP is enabled: BUG: sleeping function called from invalid context at arch/powerpc/kernel/rtas.c:496 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.2.0-rc2-thuth #6 Call Trace: [c00000007ffe7b90] [c000000000807670] dump_stack+0xa0/0xdc (unreliable) [c00000007ffe7bc0] [c0000000000e1f14] ___might_sleep+0x134/0x180 [c00000007ffe7c20] [c00000000002aec0] rtas_busy_delay+0x30/0xd0 [c00000007ffe7c50] [c00000000002bde4] rtas_get_sensor+0x74/0xe0 [c00000007ffe7ce0] [c000000000083264] ras_epow_interrupt+0x44/0x450 [c00000007ffe7d90] [c000000000120260] handle_irq_event_percpu+0xa0/0x300 [c00000007ffe7e70] [c000000000120524] handle_irq_event+0x64/0xc0 [c00000007ffe7eb0] [c000000000124dbc] handle_fasteoi_irq+0xec/0x260 [c00000007ffe7ef0] [c00000000011f4f0] generic_handle_irq+0x50/0x80 [c00000007ffe7f20] [c000000000010f3c] __do_irq+0x8c/0x200 [c00000007ffe7f90] [c0000000000236cc] call_do_irq+0x14/0x24 [c00000007e6f39e0] [c000000000011144] do_IRQ+0x94/0x110 [c00000007e6f3a30] [c000000000002594] hardware_interrupt_common+0x114/0x180 Fix this issue by introducing a new rtas_get_sensor_fast() function that does not use rtas_busy_delay() - and thus can only be used for sensors that do not cause a BUSY condition - known as "fast" sensors. The EPOW sensor is defined to be "fast" in sPAPR - mpe. Fixes: 587f83e8dd50 ("powerpc/pseries: Use rtas_get_sensor in RAS code") Signed-off-by: Thomas Huth Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/kernel/rtas.c | 17 +++++++++++++++++ arch/powerpc/platforms/pseries/ras.c | 3 ++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 7a4ede16b2836..b77ef369c0f0e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -343,6 +343,7 @@ extern void rtas_power_off(void); extern void rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); +extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7a488c108410b..caffb10e7aa3e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -584,6 +584,23 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); +int rtas_get_sensor_fast(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 2, 2, state, sensor, index); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 02e4a17455164..3b6647e574b6d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -189,7 +189,8 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, + &state); if (state > 3) critical = 1; /* Time Critical */ From b40924367be91f86c882a6a023a50fbfd7f9c430 Mon Sep 17 00:00:00 2001 From: Leonidas Da Silva Barbosa Date: Mon, 13 Jul 2015 13:51:01 -0300 Subject: [PATCH 0839/2091] powerpc: Uncomment and make enable_kernel_vsx() routine available commit 72cd7b44bc99376b3f3c93cedcd052663fcdf705 upstream. enable_kernel_vsx() function was commented since anything was using it. However, vmx-crypto driver uses VSX instructions which are only available if VSX is enable. Otherwise it rises an exception oops. This patch uncomment enable_kernel_vsx() routine and makes it available. Signed-off-by: Leonidas S. Barbosa Signed-off-by: Herbert Xu Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/switch_to.h | 1 + arch/powerpc/kernel/process.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 58abeda64cb7a..15cca17cba4b9 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -29,6 +29,7 @@ static inline void save_early_sprs(struct thread_struct *prev) {} extern void enable_kernel_fp(void); extern void enable_kernel_altivec(void); +extern void enable_kernel_vsx(void); extern int emulate_altivec(struct pt_regs *); extern void __giveup_vsx(struct task_struct *); extern void giveup_vsx(struct task_struct *); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index febb50dd53285..0596373cd1c31 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -204,8 +204,6 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -#if 0 -/* not currently used, but some crazy RAID module might want to later */ void enable_kernel_vsx(void) { WARN_ON(preemptible()); @@ -220,7 +218,6 @@ void enable_kernel_vsx(void) #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_vsx); -#endif void giveup_vsx(struct task_struct *tsk) { From 98747a5651a63b7e9246880909013753c7b633eb Mon Sep 17 00:00:00 2001 From: Leonidas Da Silva Barbosa Date: Mon, 13 Jul 2015 13:51:39 -0300 Subject: [PATCH 0840/2091] crypto: vmx - Adding enable_kernel_vsx() to access VSX instructions commit 2d6f0600b2cd755959527230ef5a6fba97bb762a upstream. vmx-crypto driver make use of some VSX instructions which are only available if VSX is enabled. Running in cases where VSX are not enabled vmx-crypto fails in a VSX exception. In order to fix this enable_kernel_vsx() was added to turn on VSX instructions for vmx-crypto. Signed-off-by: Leonidas S. Barbosa Signed-off-by: Herbert Xu Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/vmx/aes.c | 3 +++ drivers/crypto/vmx/aes_cbc.c | 3 +++ drivers/crypto/vmx/aes_ctr.c | 3 +++ drivers/crypto/vmx/ghash.c | 4 ++++ 4 files changed, 13 insertions(+) diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index ab300ea19434e..41f93334cc441 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c @@ -80,6 +80,7 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); pagefault_enable(); @@ -97,6 +98,7 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_encrypt(src, dst, &ctx->enc_key); pagefault_enable(); } @@ -111,6 +113,7 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_decrypt(src, dst, &ctx->dec_key); pagefault_enable(); } diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 1a559b7dddb5f..c8e7f653e5d36 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -81,6 +81,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); pagefault_enable(); @@ -108,6 +109,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); @@ -143,6 +145,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, } else { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt(desc, &walk); diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 96dbee4bf4a6d..266e708d63df9 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -79,6 +79,7 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); pagefault_enable(); @@ -97,6 +98,7 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); pagefault_enable(); @@ -127,6 +129,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr, (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv); pagefault_enable(); diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index d0ffe277af5ca..917b3f09e724e 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c @@ -116,6 +116,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_init_p8(ctx->htable, (const u64 *) key); pagefault_enable(); @@ -142,6 +143,7 @@ static int p8_ghash_update(struct shash_desc *desc, GHASH_DIGEST_SIZE - dctx->bytes); pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); @@ -154,6 +156,7 @@ static int p8_ghash_update(struct shash_desc *desc, if (len) { pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, src, len); pagefault_enable(); @@ -182,6 +185,7 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out) dctx->buffer[i] = 0; pagefault_disable(); enable_kernel_altivec(); + enable_kernel_vsx(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); From b46f51da057f2a4728f8cc4e30e64e297151db76 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Sep 2015 11:24:17 +1000 Subject: [PATCH 0841/2091] powerpc/boot: Specify ABI v2 when building an LE boot wrapper commit 655471f54c2e395ba29ae4156ba0f49928177cc1 upstream. The kernel does it, not the boot wrapper, which breaks with some cross compilers that still default to ABI v1. Fixes: 147c05168fc8 ("powerpc/boot: Add support for 64bit little endian wrapper") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda53b8ed..4eec430d8fa86 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc From f5a73e9c4a1c5dc9f1d90abf782c5f9ac694e443 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 15 Sep 2015 12:30:08 +0530 Subject: [PATCH 0842/2091] powerpc/mm: Recompute hash value after a failed update commit 36b35d5d807b7e57aff7d08e63de8b17731ee211 upstream. If we had secondary hash flag set, we ended up modifying hash value in the updatepp code path. Hence with a failed updatepp we will be using a wrong hash value for the following hash insert. Fix this by recomputing hash before insert. Without this patch we can end up with using wrong slot number in linux pte. That can result in us missing an hash pte update or invalidate which can cause memory corruption or even machine check. Fixes: 6d492ecc6489 ("powerpc/THP: Add code to handle HPTE faults for hugepages") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hugepage-hash64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9d6a46f..4d87122cf6a72 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; From c1589311c9e46c84aea04546b65be12481db8d81 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 11 Sep 2015 16:27:27 +0200 Subject: [PATCH 0843/2091] CIFS: fix type confusion in copy offload ioctl commit 4c17a6d56bb0cad3066a714e94f7185a24b40f49 upstream. This might lead to local privilege escalation (code execution as kernel) for systems where the following conditions are met: - CONFIG_CIFS_SMB2 and CONFIG_CIFS_POSIX are enabled - a cifs filesystem is mounted where: - the mount option "vers" was used and set to a value >=2.0 - the attacker has write access to at least one file on the filesystem To attack this, an attacker would have to guess the target_tcon pointer (but guessing wrong doesn't cause a crash, it just returns an error code) and win a narrow race. Signed-off-by: Jann Horn Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 8b7898b7670f8..64a9bca976d05 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); From a75c632bfbd637a80832511b91ab4f46cbc88175 Mon Sep 17 00:00:00 2001 From: Jeffery Miller Date: Tue, 1 Sep 2015 11:23:02 -0400 Subject: [PATCH 0844/2091] Add radeon suspend/resume quirk for HP Compaq dc5750. commit 09bfda10e6efd7b65bcc29237bee1765ed779657 upstream. With the radeon driver loaded the HP Compaq dc5750 Small Form Factor machine fails to resume from suspend. Adding a quirk similar to other devices avoids the problem and the system resumes properly. Signed-off-by: Jeffery Miller Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_combios.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index c097d3a82bda7..a9b01bcf7d0a2 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -3387,6 +3387,14 @@ void radeon_combios_asic_init(struct drm_device *dev) rdev->pdev->subsystem_device == 0x30ae) return; + /* quirk for rs4xx HP Compaq dc5750 Small Form Factor to make it resume + * - it hangs on resume inside the dynclk 1 table. + */ + if (rdev->family == CHIP_RS480 && + rdev->pdev->subsystem_vendor == 0x103c && + rdev->pdev->subsystem_device == 0x280a) + return; + /* DYN CLK 1 */ table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); if (table) From 397995fe3a36cf86913f724ee1d1933443e86d06 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 4 Sep 2015 15:42:39 -0700 Subject: [PATCH 0845/2091] mm: check if section present during memory block registering commit 04697858d89e4bf2650364f8d6956e2554e8ef88 upstream. Tony Luck found on his setup, if memory block size 512M will cause crash during booting. BUG: unable to handle kernel paging request at ffffea0074000020 IP: get_nid_for_pfn+0x17/0x40 PGD 128ffcb067 PUD 128ffc9067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0-rc8 #1 ... Call Trace: ? register_mem_sect_under_node+0x66/0xe0 register_one_node+0x17b/0x240 ? pci_iommu_alloc+0x6e/0x6e topology_init+0x3c/0x95 do_one_initcall+0xcd/0x1f0 The system has non continuous RAM address: BIOS-e820: [mem 0x0000001300000000-0x0000001cffffffff] usable BIOS-e820: [mem 0x0000001d70000000-0x0000001ec7ffefff] usable BIOS-e820: [mem 0x0000001f00000000-0x0000002bffffffff] usable BIOS-e820: [mem 0x0000002c18000000-0x0000002d6fffefff] usable BIOS-e820: [mem 0x0000002e00000000-0x00000039ffffffff] usable So there are start sections in memory block not present. For example: memory block : [0x2c18000000, 0x2c20000000) 512M first three sections are not present. The current register_mem_sect_under_node() assume first section is present, but memory block section number range [start_section_nr, end_section_nr] would include not present section. For arch that support vmemmap, we don't setup memmap for struct page area within not present sections area. So skip the pfn range that belong to absent section. [akpm@linux-foundation.org: simplification] [rientjes@google.com: more simplification] Fixes: bdee237c0343 ("x86: mm: Use 2GB memory block size on large memory x86-64 systems") Fixes: 982792c782ef ("x86, mm: probe memory block size for generic x86 64bit") Signed-off-by: Yinghai Lu Signed-off-by: David Rientjes Reported-by: Tony Luck Tested-by: Tony Luck Cc: Greg KH Cc: Ingo Molnar Tested-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index a2aa65b4215d3..b10479c873576 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -388,6 +388,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int page_nid; + /* + * memory block could have several absent sections from start. + * skip pfn range from absent section + */ + if (!pfn_present(pfn)) { + pfn = round_down(pfn + PAGES_PER_SECTION, + PAGES_PER_SECTION) - 1; + continue; + } + page_nid = get_nid_for_pfn(pfn); if (page_nid < 0) continue; From f48b9555a631b2363feb0be735f4d608b4c72e3c Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Sun, 12 Jul 2015 20:18:42 +0800 Subject: [PATCH 0846/2091] x86/mm: Initialize pmd_idx in page_table_range_init_count() commit 9962eea9e55f797f05f20ba6448929cab2a9f018 upstream. The variable pmd_idx is not initialized for the first iteration of the for loop. Assign the proper value which indexes the start address. Fixes: 719272c45b82 'x86, mm: only call early_ioremap_page_table_range_init() once' Signed-off-by: Minfei Huang Cc: tony.luck@intel.com Cc: wangnan0@huawei.com Cc: david.vrabel@citrix.com Reviewed-by: yinghai@kernel.org Link: http://lkml.kernel.org/r/1436703522-29552-1-git-send-email-mhuang@redhat.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c8140e12816a5..c23ab1ee3a9a9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,7 @@ page_table_range_init_count(unsigned long start, unsigned long end) vaddr = start; pgd_idx = pgd_index(vaddr); + pmd_idx = pmd_index(vaddr); for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); From c85ea6919f1cf092ad6418323e49faac6ac7cf3c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 21 Aug 2015 14:11:51 -0700 Subject: [PATCH 0847/2091] mm: make page pfmemalloc check more robust commit 2f064f3485cd29633ad1b3cfb00cc519509a3d72 upstream. Commit c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") added checks for page->pfmemalloc to __skb_fill_page_desc(): if (page->pfmemalloc && !page->mapping) skb->pfmemalloc = true; It assumes page->mapping == NULL implies that page->pfmemalloc can be trusted. However, __delete_from_page_cache() can set set page->mapping to NULL and leave page->index value alone. Due to being in union, a non-zero page->index will be interpreted as true page->pfmemalloc. So the assumption is invalid if the networking code can see such a page. And it seems it can. We have encountered this with a NFS over loopback setup when such a page is attached to a new skbuf. There is no copying going on in this case so the page confuses __skb_fill_page_desc which interprets the index as pfmemalloc flag and the network stack drops packets that have been allocated using the reserves unless they are to be queued on sockets handling the swapping which is the case here and that leads to hangs when the nfs client waits for a response from the server which has been dropped and thus never arrive. The struct page is already heavily packed so rather than finding another hole to put it in, let's do a trick instead. We can reuse the index again but define it to an impossible value (-1UL). This is the page index so it should never see the value that large. Replace all direct users of page->pfmemalloc by page_is_pfmemalloc which will hide this nastiness from unspoiled eyes. The information will get lost if somebody wants to use page->index obviously but that was the case before and the original code expected that the information should be persisted somewhere else if that is really needed (e.g. what SLAB and SLUB do). [akpm@linux-foundation.org: fix blooper in slub] Fixes: c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") Signed-off-by: Michal Hocko Debugged-by: Vlastimil Babka Debugged-by: Jiri Bohac Cc: Eric Dumazet Cc: David Miller Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- include/linux/mm.h | 28 +++++++++++++++++++ include/linux/mm_types.h | 9 ------ include/linux/skbuff.h | 14 ++++------ mm/page_alloc.c | 7 +++-- mm/slab.c | 4 +-- mm/slub.c | 2 +- net/core/skbuff.c | 2 +- 11 files changed, 46 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index c754b2027281f..c9da1b5d4804d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -216,7 +216,7 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, static inline bool fm10k_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ec79e6461f568..4f6bf996851ec 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6596,7 +6596,7 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, static inline bool igb_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5be12a00e1f44..463ff47200f1a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1829,7 +1829,7 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, static inline bool ixgbe_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } /** diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e71cdde9cb017..1d7b00b038a2e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -765,7 +765,7 @@ static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, static inline bool ixgbevf_page_is_reserved(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } /** diff --git a/include/linux/mm.h b/include/linux/mm.h index 0755b9fd03a7d..b2085582d44e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1001,6 +1001,34 @@ static inline int page_mapped(struct page *page) return atomic_read(&(page)->_mapcount) >= 0; } +/* + * Return true only if the page has been allocated with + * ALLOC_NO_WATERMARKS and the low watermark was not + * met implying that the system is under some pressure. + */ +static inline bool page_is_pfmemalloc(struct page *page) +{ + /* + * Page index cannot be this large so this must be + * a pfmemalloc page. + */ + return page->index == -1UL; +} + +/* + * Only to be called by the page allocator on a freshly allocated + * page. + */ +static inline void set_page_pfmemalloc(struct page *page) +{ + page->index = -1UL; +} + +static inline void clear_page_pfmemalloc(struct page *page) +{ + page->index = 0; +} + /* * Different kinds of faults, as returned by handle_mm_fault(). * Used to decide whether a process gets delivered SIGBUS or diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007c..c0c6b33535fb1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -63,15 +63,6 @@ struct page { union { pgoff_t index; /* Our offset within mapping. */ void *freelist; /* sl[aou]b first free object */ - bool pfmemalloc; /* If set by the page allocator, - * ALLOC_NO_WATERMARKS was set - * and the low watermark was not - * met implying that the system - * is under some pressure. The - * caller should try ensure - * this page is only used to - * free other pages. - */ }; union { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f15154a879c71..eb1c55b8255a1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1590,20 +1590,16 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; /* - * Propagate page->pfmemalloc to the skb if we can. The problem is - * that not all callers have unique ownership of the page. If - * pfmemalloc is set, we check the mapping as a mapping implies - * page->index is set (index and pfmemalloc share space). - * If it's a valid mapping, we cannot use page->pfmemalloc but we - * do not lose pfmemalloc information as the pages would not be - * allocated using __GFP_MEMALLOC. + * Propagate page pfmemalloc to the skb if we can. The problem is + * that not all callers have unique ownership of the page but rely + * on page_is_pfmemalloc doing the right thing(tm). */ frag->page.p = page; frag->page_offset = off; skb_frag_size_set(frag, size); page = compound_head(page); - if (page->pfmemalloc && !page->mapping) + if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } @@ -2250,7 +2246,7 @@ static inline struct page *dev_alloc_page(void) static inline void skb_propagate_pfmemalloc(struct page *page, struct sk_buff *skb) { - if (page && page->pfmemalloc) + if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c04..18490f3bd7f1f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -983,12 +983,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, set_page_owner(page, order, gfp_flags); /* - * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to + * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to * allocate the page. The expectation is that the caller is taking * steps that will free more memory. The caller should avoid the page * being used for !PFMEMALLOC purposes. */ - page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + if (alloc_flags & ALLOC_NO_WATERMARKS) + set_page_pfmemalloc(page); + else + clear_page_pfmemalloc(page); return 0; } diff --git a/mm/slab.c b/mm/slab.c index 7eb38dd1cefa2..3dd2d1ff9d5d9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1602,7 +1602,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, } /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ - if (unlikely(page->pfmemalloc)) + if (page_is_pfmemalloc(page)) pfmemalloc_active = true; nr_pages = (1 << cachep->gfporder); @@ -1613,7 +1613,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); __SetPageSlab(page); - if (page->pfmemalloc) + if (page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { diff --git a/mm/slub.c b/mm/slub.c index 54c0876b43d55..08342c523a85b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1427,7 +1427,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) inc_slabs_node(s, page_to_nid(page), page->objects); page->slab_cache = s; __SetPageSlab(page); - if (page->pfmemalloc) + if (page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); start = page_address(page); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 41ec02242ea7c..a2e4e47b28391 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -340,7 +340,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) if (skb && frag_size) { skb->head_frag = 1; - if (virt_to_head_page(data)->pfmemalloc) + if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; From 78cc6a0aa1812329d31c3633bef364e9b15f0051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20H=C3=A4rdeman?= Date: Tue, 19 May 2015 19:03:12 -0300 Subject: [PATCH 0848/2091] rc-core: fix remove uevent generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a66b0c41ad277ae62a3ae6ac430a71882f899557 upstream. The input_dev is already gone when the rc device is being unregistered so checking for its presence only means that no remove uevent will be generated. Signed-off-by: David Härdeman Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/rc-main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index f8c5e47a30aa7..0aba9ff921026 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1191,9 +1191,6 @@ static int rc_dev_uevent(struct device *device, struct kobj_uevent_env *env) { struct rc_dev *dev = to_rc_dev(device); - if (!dev || !dev->input_dev) - return -ENODEV; - if (dev->rc_map.name) ADD_HOTPLUG_VAR("NAME=%s", dev->rc_map.name); if (dev->driver_name) From f82aa0e2a52daa65a7c6648c5d5d331c93d61774 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 12 Jun 2015 20:06:23 -0300 Subject: [PATCH 0849/2091] v4l: omap3isp: Fix sub-device power management code commit 9d39f05490115bf145e5ea03c0b7ec9d3d015b01 upstream. Commit 813f5c0ac5cc ("media: Change media device link_notify behaviour") modified the media controller link setup notification API and updated the OMAP3 ISP driver accordingly. As a side effect it introduced a bug by turning power on after setting the link instead of before. This results in sub-devices not being powered down in some cases when they should be. Fix it. Fixes: 813f5c0ac5cc [media] media: Change media device link_notify behaviour Signed-off-by: Sakari Ailus Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/omap3isp/isp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 18d0a871747fe..947d8be7b2451 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -829,14 +829,14 @@ static int isp_pipeline_link_notify(struct media_link *link, u32 flags, int ret; if (notification == MEDIA_DEV_NOTIFY_POST_LINK_CH && - !(link->flags & MEDIA_LNK_FL_ENABLED)) { + !(flags & MEDIA_LNK_FL_ENABLED)) { /* Powering off entities is assumed to never fail. */ isp_pipeline_pm_power(source, -sink_use); isp_pipeline_pm_power(sink, -source_use); return 0; } - if (notification == MEDIA_DEV_NOTIFY_POST_LINK_CH && + if (notification == MEDIA_DEV_NOTIFY_PRE_LINK_CH && (flags & MEDIA_LNK_FL_ENABLED)) { ret = isp_pipeline_pm_power(source, sink_use); From 3eaf132eca3a276733faafcdd09fbf3e05091849 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Mon, 29 Jun 2015 18:19:06 -0300 Subject: [PATCH 0850/2091] media: am437x-vpfe: Requested frame size and fmt overwritten by current sensor setting commit f47c9045643f91e76d8a9030828b9fe1cf4a6bcf upstream. Upon a S_FMT the input/requested frame size and pixel format is overwritten by the current sub-device settings. Fix this so application can actually set the frame size and format. Fixes: 417d2e507edc ("[media] media: platform: add VPFE capture driver support for AM437X") Signed-off-by: Benoit Parrot Acked-by: Lad, Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/am437x/am437x-vpfe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index a30cc2f7e4f12..4e86f8d8ca26f 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1577,7 +1577,7 @@ static int vpfe_s_fmt(struct file *file, void *priv, return -EBUSY; } - ret = vpfe_try_fmt(file, priv, fmt); + ret = vpfe_try_fmt(file, priv, &format); if (ret) return ret; From c7f2670397e5a1ecfeb3fe5995306d44ba911404 Mon Sep 17 00:00:00 2001 From: Benoit Parrot Date: Wed, 15 Jul 2015 18:00:06 -0300 Subject: [PATCH 0851/2091] media: am437x-vpfe: Fix a race condition during release commit c99235fa3ef833c3c23926085f2bb68851c8460a upstream. There was a race condition where during cleanup/release operation on-going streaming would cause a kernel panic because the hardware module was disabled prematurely with IRQ still pending. Fixes: 417d2e507edc ("[media] media: platform: add VPFE capture driver support for AM437X") Signed-off-by: Benoit Parrot Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/am437x/am437x-vpfe.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index 4e86f8d8ca26f..ddf59ee5ca400 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1185,14 +1185,24 @@ static int vpfe_initialize_device(struct vpfe_device *vpfe) static int vpfe_release(struct file *file) { struct vpfe_device *vpfe = video_drvdata(file); + bool fh_singular; int ret; mutex_lock(&vpfe->lock); - if (v4l2_fh_is_singular_file(file)) - vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev); + /* Save the singular status before we call the clean-up helper */ + fh_singular = v4l2_fh_is_singular_file(file); + + /* the release helper will cleanup any on-going streaming */ ret = _vb2_fop_release(file, NULL); + /* + * If this was the last open file. + * Then de-initialize hw module. + */ + if (fh_singular) + vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev); + mutex_unlock(&vpfe->lock); return ret; From fe995635458e8967dedc75ed8d47fe208e77a9be Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 12 Aug 2015 11:54:35 +0100 Subject: [PATCH 0852/2091] Btrfs: check if previous transaction aborted to avoid fs corruption commit 1f9b8c8fbc9a4d029760b16f477b9d15500e3a34 upstream. While we are committing a transaction, it's possible the previous one is still finishing its commit and therefore we wait for it to finish first. However we were not checking if that previous transaction ended up getting aborted after we waited for it to commit, so we ended up committing the current transaction which can lead to fs corruption because the new superblock can point to trees that have had one or more nodes/leafs that were never durably persisted. The following sequence diagram exemplifies how this is possible: CPU 0 CPU 1 transaction N starts (...) btrfs_commit_transaction(N) cur_trans->state = TRANS_STATE_COMMIT_START; (...) cur_trans->state = TRANS_STATE_COMMIT_DOING; (...) cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; btrfs_start_transaction() --> starts transaction N + 1 btrfs_write_and_wait_transaction(trans, root); --> starts writing all new or COWed ebs created at transaction N creates some new ebs, COWs some existing ebs but doesn't COW or deletes eb X btrfs_commit_transaction(N + 1) (...) cur_trans->state = TRANS_STATE_COMMIT_START; (...) wait_for_commit(root, prev_trans); --> prev_trans == transaction N btrfs_write_and_wait_transaction() continues writing ebs --> fails writing eb X, we abort transaction N and set bit BTRFS_FS_STATE_ERROR on fs_info->fs_state, so no new transactions can start after setting that bit cleanup_transaction() btrfs_cleanup_one_transaction() wakes up task at CPU 1 continues, doesn't abort because cur_trans->aborted (transaction N + 1) is zero, and no checks for bit BTRFS_FS_STATE_ERROR in fs_info->fs_state are made btrfs_write_and_wait_transaction(trans, root); --> succeeds, no errors during writeback write_ctree_super(trans, root, 0); --> succeeds --> we have now a superblock that points us to some root that uses eb X, which was never written to disk In this scenario future attempts to read eb X from disk results in an error message like "parent transid verify failed on X wanted Y found Z". So fix this by aborting the current transaction if after waiting for the previous transaction we verify that it was aborted. Signed-off-by: Filipe Manana Reviewed-by: Josef Bacik Reviewed-by: Liu Bo Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 94e909c5a5034..00d18c2bdb0f2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1875,8 +1875,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->trans_lock); wait_for_commit(root, prev_trans); + ret = prev_trans->aborted; btrfs_put_transaction(prev_trans); + if (ret) + goto cleanup_transaction; } else { spin_unlock(&root->fs_info->trans_lock); } From 9b6d61edd38cdedf81c1bbe79fc637c533df984a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 20:06:38 -0400 Subject: [PATCH 0853/2091] NFSv4/pnfs: Ensure we don't miss a file extension commit 2b83d3de4c18af49800e0b26ae013db4fcf43a4a upstream. pNFS writes don't return attributes, however that doesn't mean that we should ignore the fact that they may be extending the file. This patch ensures that if a write is seen to extend the file, then we always set an attribute barrier, and update the cached file size. Signed-off-by: Trond Myklebust Cc: Peng Tao Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index daf3556428458..07115b9b1ad28 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1383,24 +1383,27 @@ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, { struct nfs_pgio_args *argp = &hdr->args; struct nfs_pgio_res *resp = &hdr->res; + u64 size = argp->offset + resp->count; if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + fattr->size = size; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) { + fattr->valid &= ~NFS_ATTR_FATTR_SIZE; return; - if (argp->offset + resp->count != fattr->size) - return; - if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + } + if (size != fattr->size) return; /* Set attribute barrier */ nfs_fattr_set_barrier(fattr); + /* ...and update size */ + fattr->valid |= NFS_ATTR_FATTR_SIZE; } void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) { - struct nfs_fattr *fattr = hdr->res.fattr; + struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; - if (fattr == NULL) - return; spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); From 8751006ab36573c2314b3ef99164eb7079a86175 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 30 Jul 2015 21:52:44 +0800 Subject: [PATCH 0854/2091] nfsd: Fix an FS_LAYOUT_TYPES/LAYOUT_TYPES encode bug commit 6896f15aabde505b35888039af93d1d182a0108a upstream. Currently we'll respond correctly to a request for either FS_LAYOUT_TYPES or LAYOUT_TYPES, but not to a request for both attributes simultaneously. Signed-off-by: Kinglong Mee Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d4d84451e0e63..3dd1b616b92b3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2139,6 +2139,27 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } +static inline __be32 +nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type) +{ + __be32 *p; + + if (layout_type) { + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(layout_type); + } else { + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); + } + + return 0; +} + #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID @@ -2692,20 +2713,16 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_encode_hyper(p, stat.ino); } #ifdef CONFIG_NFSD_PNFS - if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || - (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { - if (exp->ex_layout_type) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - *p++ = cpu_to_be32(exp->ex_layout_type); - } else { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(0); - } + if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); + if (status) + goto out; + } + + if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) { + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); + if (status) + goto out; } if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { From 0940ed480a9e75bddcfa7c2d7e42411b08514534 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Aug 2015 12:41:47 -0400 Subject: [PATCH 0855/2091] nfsd: ensure that the ol stateid hash reference is only put once commit e85687393f3ee0a77ccca016f903d1558bb69258 upstream. When an open or lock stateid is hashed, we take an extra reference to it. When we unhash it, we drop that reference. The code however does not properly account for the case where we have two callers concurrently trying to unhash the stateid. This can lead to list corruption and the hash reference being put more than once. Fix this by having unhash_ol_stateid use list_del_init on the st_perfile list_head, and then testing to see if that list_head is empty before releasing the hash reference. This means that some of the unhashing wrappers now become bool return functions so we can test to see whether the stateid was unhashed before we put the reference. Reported-by: Andrew W Elble Tested-by: Andrew W Elble Reported-by: Anna Schumaker Tested-by: Anna Schumaker Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6e13504f736e2..68e2e9dce647f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1004,16 +1004,20 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) sop->so_ops->so_free(sop); } -static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) +static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); + if (list_empty(&stp->st_perfile)) + return false; + spin_lock(&fp->fi_lock); - list_del(&stp->st_perfile); + list_del_init(&stp->st_perfile); spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); + return true; } static void nfs4_free_ol_stateid(struct nfs4_stid *stid) @@ -1063,25 +1067,27 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, list_add(&stp->st_locks, reaplist); } -static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) +static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); list_del_init(&stp->st_locks); - unhash_ol_stateid(stp); nfs4_unhash_stid(&stp->st_stid); + return unhash_ol_stateid(stp); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + bool unhashed; spin_lock(&oo->oo_owner.so_client->cl_lock); - unhash_lock_stateid(stp); + unhashed = unhash_lock_stateid(stp); spin_unlock(&oo->oo_owner.so_client->cl_lock); - nfs4_put_stid(&stp->st_stid); + if (unhashed) + nfs4_put_stid(&stp->st_stid); } static void unhash_lockowner_locked(struct nfs4_lockowner *lo) @@ -1129,7 +1135,7 @@ static void release_lockowner(struct nfs4_lockowner *lo) while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); @@ -1142,21 +1148,26 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, { struct nfs4_ol_stateid *stp; + lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock); + while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); put_ol_stateid_locked(stp, reaplist); } } -static void unhash_open_stateid(struct nfs4_ol_stateid *stp, +static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { + bool unhashed; + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); - unhash_ol_stateid(stp); + unhashed = unhash_ol_stateid(stp); release_open_stateid_locks(stp, reaplist); + return unhashed; } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -1164,8 +1175,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); - unhash_open_stateid(stp, &reaplist); - put_ol_stateid_locked(stp, &reaplist); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); free_ol_stateid_reaplist(&reaplist); } @@ -1210,8 +1221,8 @@ static void release_openowner(struct nfs4_openowner *oo) while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_open_stateid(stp, &reaplist); - put_ol_stateid_locked(stp, &reaplist); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); @@ -4714,7 +4725,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_for_locks(stp->st_stid.sc_file, lockowner(stp->st_stateowner))) break; - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; @@ -4930,20 +4941,23 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; + bool unhashed; LIST_HEAD(reaplist); s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); - unhash_open_stateid(s, &reaplist); + unhashed = unhash_open_stateid(s, &reaplist); if (clp->cl_minorversion) { - put_ol_stateid_locked(s, &reaplist); + if (unhashed) + put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); - move_to_close_lru(s, clp->net); + if (unhashed) + move_to_close_lru(s, clp->net); } } @@ -5982,7 +5996,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, struct list_head *collect, - void (*func)(struct nfs4_ol_stateid *)) + bool (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; struct nfs4_ol_stateid *stp, *st_next; @@ -5996,9 +6010,9 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, list_for_each_entry_safe(lst, lst_next, &stp->st_locks, st_locks) { if (func) { - func(lst); - nfsd_inject_add_lock_to_list(lst, - collect); + if (func(lst)) + nfsd_inject_add_lock_to_list(lst, + collect); } ++count; /* From ea056d3d54709b92498af8f8b7175ca2e9e02980 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Aug 2015 12:41:48 -0400 Subject: [PATCH 0856/2091] nfsd: ensure that delegation stateid hash references are only put once commit 3fcbbd244ed1d20dc0eb7d48d729503992fa9b7d upstream. It's possible that a DELEGRETURN could race with (e.g.) client expiry, in which case we could end up putting the delegation hash reference more than once. Have unhash_delegation_locked return a bool that indicates whether it was already unhashed. In the case of destroy_delegation we only conditionally put the hash reference if that returns true. The other callers of unhash_delegation_locked call it while walking list_heads that shouldn't yet be detached. If we find that it doesn't return true in those cases, then throw a WARN_ON as that indicates that we have a partially hashed delegation, and that something is likely very wrong. Tested-by: Andrew W Elble Tested-by: Anna Schumaker Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 68e2e9dce647f..397798368b1ae 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -777,13 +777,16 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } -static void +static bool unhash_delegation_locked(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; lockdep_assert_held(&state_lock); + if (list_empty(&dp->dl_perfile)) + return false; + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; @@ -792,16 +795,21 @@ unhash_delegation_locked(struct nfs4_delegation *dp) list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); + return true; } static void destroy_delegation(struct nfs4_delegation *dp) { + bool unhashed; + spin_lock(&state_lock); - unhash_delegation_locked(dp); + unhashed = unhash_delegation_locked(dp); spin_unlock(&state_lock); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + if (unhashed) { + put_clnt_odstate(dp->dl_clnt_odstate); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); + nfs4_put_stid(&dp->dl_stid); + } } static void revoke_delegation(struct nfs4_delegation *dp) @@ -1725,7 +1733,7 @@ __destroy_client(struct nfs4_client *clp) spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -4357,7 +4365,7 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = min(new_timeo, t); break; } - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -6282,7 +6290,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, continue; atomic_inc(&clp->cl_refcount); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, victims); } ++count; @@ -6612,7 +6620,7 @@ nfs4_state_shutdown_net(struct net *net) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); From 7bc97ee9c3ffd89d6226ec82398f5fbe843164e8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 30 Jul 2015 13:00:56 +1000 Subject: [PATCH 0857/2091] NFSv4: don't set SETATTR for O_RDONLY|O_EXCL commit efcbc04e16dfa95fef76309f89710dd1d99a5453 upstream. It is unusual to combine the open flags O_RDONLY and O_EXCL, but it appears that libre-office does just that. [pid 3250] stat("/home/USER/.config", {st_mode=S_IFDIR|0700, st_size=8192, ...}) = 0 [pid 3250] open("/home/USER/.config/libreoffice/4-suse/user/extensions/buildid", O_RDONLY|O_EXCL NFSv4 takes O_EXCL as a sign that a setattr command should be sent, probably to reset the timestamps. When it was an O_RDONLY open, the SETATTR command does not identify any actual attributes to change. If no delegation was provided to the open, the SETATTR uses the all-zeros stateid and the request is accepted (at least by the Linux NFS server - no harm, no foul). If a read-delegation was provided, this is used in the SETATTR request, and a Netapp filer will justifiably claim NFS4ERR_BAD_STATEID, which the Linux client takes as a sign to retry - indefinitely. So only treat O_EXCL specially if O_CREAT was also given. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d3f2051266093..58392d35f17a8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2413,7 +2413,7 @@ static int _nfs4_do_open(struct inode *dir, goto err_free_label; state = ctx->state; - if ((opendata->o_arg.open_flags & O_EXCL) && + if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); From e204275a010138f27bca6bdc24ae6b042cd568e8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Aug 2015 12:06:30 -0400 Subject: [PATCH 0858/2091] NFS: Don't let the ctime override attribute barriers. commit 7c2dad99d60c86ec686b3bfdcb787c450a7ea89f upstream. Chuck reports seeing cases where a GETATTR that happens to race with an asynchronous WRITE is overriding the file size, despite the attribute barrier being set by the writeback code. The culprit turns out to be the check in nfs_ctime_need_update(), which sees that the ctime is newer than the cached ctime, and assumes that it is safe to override the attribute barrier. This patch removes that override, and ensures that attribute barriers are always respected. Reported-by: Chuck Lever Fixes: a08a8cd375db9 ("NFS: Add attribute update barriers to NFS writebacks") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/inode.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5d25b9d97c29c..f1478544aa0cb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1270,13 +1270,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; } -static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_FATTR_CTIME)) - return 0; - return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; -} - static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) @@ -1425,7 +1418,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n const struct nfs_inode *nfsi = NFS_I(inode); return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || - nfs_ctime_need_update(inode, fattr) || ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } From 0bdce6a850fe41e414700193996165fd787352f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Aug 2015 10:59:07 -0400 Subject: [PATCH 0859/2091] NFSv4.1/pNFS: Fix borken function _same_data_server_addrs_locked() commit 6f536936b79bd4b5cea8fb0e5b8b0bce8cd1ea4a upstream. - Switch back to using list_for_each_entry(). Fixes an incorrect test for list NULL termination. - Do not assume that lists are sorted. - Finally, consider an existing entry to match if it consists of a subset of the addresses in the new entry. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs_nfs.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f37e25b6311c8..1705c78ee2d8a 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -359,26 +359,31 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } +/* + * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does, + * declare a match. + */ static bool _same_data_server_addrs_locked(const struct list_head *dsaddrs1, const struct list_head *dsaddrs2) { struct nfs4_pnfs_ds_addr *da1, *da2; - - /* step through both lists, comparing as we go */ - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); - da1 != NULL && da2 != NULL; - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return false; + struct sockaddr *sa1, *sa2; + bool match = false; + + list_for_each_entry(da1, dsaddrs1, da_node) { + sa1 = (struct sockaddr *)&da1->da_addr; + match = false; + list_for_each_entry(da2, dsaddrs2, da_node) { + sa2 = (struct sockaddr *)&da2->da_addr; + match = same_sockaddr(sa1, sa2); + if (match) + break; + } + if (!match) + break; } - if (da1 == NULL && da2 == NULL) - return true; - - return false; + return match; } /* From 87fbed4145998fbb3960c243c70cf78fbd7e5f42 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 15 Aug 2015 21:52:10 +0800 Subject: [PATCH 0860/2091] NFS: Fix a NULL pointer dereference of migration recovery ops for v4.2 client commit 18e3b739fdc826481c6a1335ce0c5b19b3d415da upstream. ---Steps to Reproduce-- # cat /etc/exports /nfs/referal *(rw,insecure,no_subtree_check,no_root_squash,crossmnt) /nfs/old *(ro,insecure,subtree_check,root_squash,crossmnt) # mount -t nfs nfs-server:/nfs/ /mnt/ # ll /mnt/*/ # cat /etc/exports /nfs/referal *(rw,insecure,no_subtree_check,no_root_squash,crossmnt,refer=/nfs/old/@nfs-server) /nfs/old *(ro,insecure,subtree_check,root_squash,crossmnt) # service nfs restart # ll /mnt/*/ --->>>>> oops here [ 5123.102925] BUG: unable to handle kernel NULL pointer dereference at (null) [ 5123.103363] IP: [] nfs4_proc_get_locations+0x9b/0x120 [nfsv4] [ 5123.103752] PGD 587b9067 PUD 3cbf5067 PMD 0 [ 5123.104131] Oops: 0000 [#1] [ 5123.104529] Modules linked in: nfsv4(OE) nfs(OE) fscache(E) nfsd(OE) xfs libcrc32c iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi coretemp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ppdev vmw_balloon parport_pc parport i2c_piix4 shpchp auth_rpcgss nfs_acl vmw_vmci lockd grace sunrpc vmwgfx drm_kms_helper ttm drm mptspi serio_raw scsi_transport_spi e1000 mptscsih mptbase ata_generic pata_acpi [last unloaded: nfsd] [ 5123.105887] CPU: 0 PID: 15853 Comm: ::1-manager Tainted: G OE 4.2.0-rc6+ #214 [ 5123.106358] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/20/2014 [ 5123.106860] task: ffff88007620f300 ti: ffff88005877c000 task.ti: ffff88005877c000 [ 5123.107363] RIP: 0010:[] [] nfs4_proc_get_locations+0x9b/0x120 [nfsv4] [ 5123.107909] RSP: 0018:ffff88005877fdb8 EFLAGS: 00010246 [ 5123.108435] RAX: ffff880053f3bc00 RBX: ffff88006ce6c908 RCX: ffff880053a0d240 [ 5123.108968] RDX: ffffea0000e6d940 RSI: ffff8800399a0000 RDI: ffff88006ce6c908 [ 5123.109503] RBP: ffff88005877fe28 R08: ffffffff81c708a0 R09: 0000000000000000 [ 5123.110045] R10: 00000000000001a2 R11: ffff88003ba7f5c8 R12: ffff880054c55800 [ 5123.110618] R13: 0000000000000000 R14: ffff880053a0d240 R15: ffff880053a0d240 [ 5123.111169] FS: 0000000000000000(0000) GS:ffffffff81c27000(0000) knlGS:0000000000000000 [ 5123.111726] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5123.112286] CR2: 0000000000000000 CR3: 0000000054cac000 CR4: 00000000001406f0 [ 5123.112888] Stack: [ 5123.113458] ffffea0000e6d940 ffff8800399a0000 00000000000167d0 0000000000000000 [ 5123.114049] 0000000000000000 0000000000000000 0000000000000000 00000000a7ec82c6 [ 5123.114662] ffff88005877fe18 ffffea0000e6d940 ffff8800399a0000 ffff880054c55800 [ 5123.115264] Call Trace: [ 5123.115868] [] nfs4_try_migration+0xbb/0x220 [nfsv4] [ 5123.116487] [] nfs4_run_state_manager+0x4ab/0x7b0 [nfsv4] [ 5123.117104] [] ? nfs4_do_reclaim+0x510/0x510 [nfsv4] [ 5123.117813] [] kthread+0xd7/0xf0 [ 5123.118456] [] ? kthread_worker_fn+0x160/0x160 [ 5123.119108] [] ret_from_fork+0x3f/0x70 [ 5123.119723] [] ? kthread_worker_fn+0x160/0x160 [ 5123.120329] Code: 4c 8b 6a 58 74 17 eb 52 48 8d 55 a8 89 c6 4c 89 e7 e8 4a b5 ff ff 8b 45 b0 85 c0 74 1c 4c 89 f9 48 8b 55 90 48 8b 75 98 48 89 df <41> ff 55 00 3d e8 d8 ff ff 41 89 c6 74 cf 48 8b 4d c8 65 48 33 [ 5123.121643] RIP [] nfs4_proc_get_locations+0x9b/0x120 [nfsv4] [ 5123.122308] RSP [ 5123.122942] CR2: 0000000000000000 Fixes: ec011fe847 ("NFS: Introduce a vector of migration recovery ops") Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 58392d35f17a8..c468ef8eb4afa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8571,6 +8571,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif From 8d1920be7e762a77468c5ea82aab1ac4358d8fc9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Aug 2015 12:57:07 -0500 Subject: [PATCH 0861/2091] NFS: nfs_set_pgio_error sometimes misses errors commit e9ae58aeee8842a50f7e199d602a5ccb2e41a95f upstream. We should ensure that we always set the pgio_header's error field if a READ or WRITE RPC call returns an error. The current code depends on 'hdr->good_bytes' always being initialised to a large value, which is not always done correctly by callers. When this happens, applications may end up missing important errors. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7b45526785360..069914ce76410 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { spin_lock(&hdr->lock); - if (pos < hdr->io_start + hdr->good_bytes) { - set_bit(NFS_IOHDR_ERROR, &hdr->flags); + if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags) + || pos < hdr->io_start + hdr->good_bytes) { clear_bit(NFS_IOHDR_EOF, &hdr->flags); hdr->good_bytes = pos - hdr->io_start; hdr->error = error; From 73e8e7b2bbf68ddc590a0e725c2099fe0fa75b6a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 20 Aug 2015 01:52:59 +0800 Subject: [PATCH 0862/2091] NFS41/flexfiles: update inode after write finishes commit 69f230d907e8c1ca3f9bd528993eeb98f712b0dd upstream. Otherwise we break fstest case tests/read_write/mctime.t Does files layout need the same fix as well? Cc: Anna Schumaker Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6f5f0f425e86b..64604cf3af317 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1039,6 +1039,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->res.verf->committed == NFS_DATA_SYNC) ff_layout_set_layoutcommit(hdr); + if (task->tk_status >= 0) + nfs_writeback_update_inode(hdr); + return 0; } From 66bfdda4b2042bca88b429a55ca010d5ab94f991 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Aug 2015 18:56:07 -0500 Subject: [PATCH 0863/2091] NFSv4: Force a post-op attribute update when holding a delegation commit aaae3f00d3f67f681a1f3cb7af999e976e8a24ce upstream. If the ctime or mtime or change attribute have changed because of an operation we initiated, we should make sure that we force an attribute update. However we do not want to mark the page cache for revalidation. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f1478544aa0cb..976ba792fbc69 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1480,6 +1480,13 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr { unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + /* + * Don't revalidate the pagecache if we hold a delegation, but do + * force an attribute update + */ + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED; + if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); From b5a6dec79b2764584e5d5105f3f49bc58bf99513 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 22 Aug 2015 06:40:00 +0800 Subject: [PATCH 0864/2091] NFS41/flexfiles: zero out DS write wcc commit 5420401079e152ff68a8024f6a375804b1c21505 upstream. We do not want to update inode attributes with DS values. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 64604cf3af317..fecd9201dbadf 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1039,6 +1039,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->res.verf->committed == NFS_DATA_SYNC) ff_layout_set_layoutcommit(hdr); + /* zero out fattr since we don't care DS attr at all */ + hdr->fattr.valid = 0; if (task->tk_status >= 0) nfs_writeback_update_inode(hdr); From f6384199b2592674c7e6cfcfb4c0c54eacf992d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Aug 2015 20:37:39 -0400 Subject: [PATCH 0865/2091] NFSv4.1/flexfiles: Fix a protocol error in layoutreturn commit d13549074cf066d6d5bb29903d044beffea342d3 upstream. According to the flexfiles protocol, the layoutreturn should specify an array of errors in the following format: struct ff_ioerr4 { offset4 ffie_offset; length4 ffie_length; stateid4 ffie_stateid; device_error4 ffie_errors<>; }; This patch fixes up the code to ensure that our ffie_errors is indeed encoded as an array (albeit with only a single entry). Reported-by: Tom Haynes Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index f13e1969eedd9..b28fa4cbea526 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -500,16 +500,19 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, range->offset, range->length)) continue; /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) - * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) + * + array length + deviceid(NFS4_DEVICEID4_SIZE) + * + status(4) + opnum(4) */ p = xdr_reserve_space(xdr, - 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); + 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); if (unlikely(!p)) return -ENOBUFS; p = xdr_encode_hyper(p, err->offset); p = xdr_encode_hyper(p, err->length); p = xdr_encode_opaque_fixed(p, &err->stateid, NFS4_STATEID_SIZE); + /* Encode 1 error */ + *p++ = cpu_to_be32(1); p = xdr_encode_opaque_fixed(p, &err->deviceid, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(err->status); From 3d5c6b90ed90eadcd9c66951877697d848e63ac1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 30 Aug 2015 18:37:59 -0700 Subject: [PATCH 0866/2091] NFSv4.1: Fix a protocol issue with CLOSE stateids commit 4a1e2feb9d246775dee0f78ed5b18826bae2b1c5 upstream. According to RFC5661 Section 18.2.4, CLOSE is supposed to return the zero stateid. This means that nfs_clear_open_stateid_locked() cannot assume that the result stateid will always match the 'other' field of the existing open stateid when trying to determine a race with a parallel OPEN. Instead, we look at the argument, and check for matches. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c468ef8eb4afa..d69bd6ae0ff01 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1216,6 +1216,7 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state) } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, + nfs4_stateid *arg_stateid, nfs4_stateid *stateid, fmode_t fmode) { clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -1234,8 +1235,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (stateid == NULL) return; /* Handle races with OPEN */ - if (!nfs4_stateid_match_other(stateid, &state->open_stateid) || - !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) || + (nfs4_stateid_match_other(stateid, &state->open_stateid) && + !nfs4_stateid_is_newer(stateid, &state->open_stateid))) { nfs_resync_open_stateid_locked(state); return; } @@ -1244,10 +1246,12 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, nfs4_stateid_copy(&state->open_stateid, stateid); } -static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +static void nfs_clear_open_stateid(struct nfs4_state *state, + nfs4_stateid *arg_stateid, + nfs4_stateid *stateid, fmode_t fmode) { write_seqlock(&state->seqlock); - nfs_clear_open_stateid_locked(state, stateid, fmode); + nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode); write_sequnlock(&state->seqlock); if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(state->owner->so_server->nfs_client); @@ -2672,7 +2676,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) goto out_release; } } - nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode); + nfs_clear_open_stateid(state, &calldata->arg.stateid, + res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); From d40d9de9d3533ef6607342ec153b426b9d2b892e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Aug 2015 00:14:20 -0500 Subject: [PATCH 0867/2091] Revert "NFSv4: Remove incorrect check in can_open_delegated()" commit 36319608e28701c07cad80ae3be8b0fdfb1ab40f upstream. This reverts commit 4e379d36c050b0117b5d10048be63a44f5036115. This commit opens up a race between the recovery code and the open code. Reported-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d69bd6ae0ff01..c245874d7e9d0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1152,6 +1152,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) return 0; if ((delegation->type & fmode) != fmode) return 0; + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) + return 0; if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return 0; nfs_mark_delegation_referenced(delegation); From 0e592fde0adf6047581a43ca057f17e919cb082d Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Thu, 27 Aug 2015 10:01:33 +0530 Subject: [PATCH 0868/2091] net: sunrpc: fix tracepoint Warning: unknown op '->' commit 051ac3848a94f21cfdec899cc9c65ce7f9f116fa upstream. `perf stat -e sunrpc:svc_xprt_do_enqueue true` results in Warning: unknown op '->' Warning: [sunrpc:svc_xprt_do_enqueue] unknown op '->' Similar warning for svc_handle_xprt as well. Actually TP_printk() should never dereference an address saved in the ring buffer that points somewhere in the kernel. There's no guarantee that that object still exists (with the exception of static strings). Therefore change all the arguments for TP_printk(), so that it references values existing in the ring buffer only. While doing that, also fix another possible bug when argument xprt could be NULL and TP_fast_assign() tries to access it's elements. Signed-off-by: Pratyush Anand Reviewed-by: Jeff Layton Acked-by: Steven Rostedt Fixes: 83a712e0afef "sunrpc: add some tracepoints around ..." Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- include/trace/events/sunrpc.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index fd1a02cb3c823..003dca9338039 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -529,18 +529,21 @@ TRACE_EVENT(svc_xprt_do_enqueue, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) - __field(struct svc_rqst *, rqst) + __field_struct(struct sockaddr_storage, ss) + __field(int, pid) + __field(unsigned long, flags) ), TP_fast_assign( __entry->xprt = xprt; - __entry->rqst = rqst; + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); + __entry->pid = rqst? rqst->rq_task->pid : 0; + __entry->flags = xprt ? xprt->xpt_flags : 0; ), TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, - (struct sockaddr *)&__entry->xprt->xpt_remote, - __entry->rqst ? __entry->rqst->rq_task->pid : 0, - show_svc_xprt_flags(__entry->xprt->xpt_flags)) + (struct sockaddr *)&__entry->ss, + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); TRACE_EVENT(svc_xprt_dequeue, @@ -589,16 +592,20 @@ TRACE_EVENT(svc_handle_xprt, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) __field(int, len) + __field_struct(struct sockaddr_storage, ss) + __field(unsigned long, flags) ), TP_fast_assign( __entry->xprt = xprt; + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); __entry->len = len; + __entry->flags = xprt ? xprt->xpt_flags : 0; ), TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len, - show_svc_xprt_flags(__entry->xprt->xpt_flags)) + (struct sockaddr *)&__entry->ss, + __entry->len, show_svc_xprt_flags(__entry->flags)) ); #endif /* _TRACE_SUNRPC_H */ From fc56e1157e720c07a5ef553f492349af547eb60c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Aug 2015 15:33:51 -0400 Subject: [PATCH 0869/2091] SUNRPC: Fix a thinko in xs_connect() commit 99b1a4c32ad22024ac6198a4337aaec5ea23168f upstream. It is rather pointless to test the value of transport->inet after calling xs_reset_transport(), since it will always be zero, and so we will never see any exponential back off behaviour. Also don't force early connections for SOFTCONN tasks. If the server disconnects us, we should respect the exponential backoff. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 66891e32c5e31..f191b3e7c3861 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2216,13 +2216,14 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - /* Start by resetting any existing state */ - xs_reset_transport(transport); - - if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); + + /* Start by resetting any existing state */ + xs_reset_transport(transport); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, xprt->reestablish_timeout); From f160db25e9c4baa65871f4f7972064f1d7ecb160 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Aug 2015 13:36:30 -0700 Subject: [PATCH 0870/2091] SUNRPC: xs_reset_transport must mark the connection as disconnected commit 0c78789e3a030615c6650fde89546cadf40ec2cc upstream. In case the reconnection attempt fails. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f191b3e7c3861..3f7ba7051ec0e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -834,6 +834,7 @@ static void xs_reset_transport(struct sock_xprt *transport) sk->sk_user_data = NULL; xs_restore_old_callbacks(transport, sk); + xprt_clear_connected(xprt); write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); From 77bb3c931d577185760c59428a87e8966a126b0b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Sep 2015 23:43:17 -0400 Subject: [PATCH 0871/2091] SUNRPC: Ensure that we wait for connections to complete before retrying commit 0fdea1e8a2853f79d39b8555cc9de16a7e0ab26f upstream. Commit 718ba5b87343, moved the responsibility for unlocking the socket to xs_tcp_setup_socket, meaning that the socket will be unlocked before we know that it has finished trying to connect. The following patch is based on an initial patch by Russell King to ensure that we delay clearing the XPRT_CONNECTING flag until we either know that we failed to initiate a connection attempt, or the connection attempt itself failed. Fixes: 718ba5b87343 ("SUNRPC: Add helpers to prevent socket create from racing") Reported-by: Russell King Reported-by: Russell King Tested-by: Russell King Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/xprtsock.h | 3 +++ net/sunrpc/xprtsock.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7591788e9fbff..357e44c1a46b1 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,7 @@ struct sock_xprt { /* * Connection of transports */ + unsigned long sock_state; struct delayed_work connect_worker; struct sockaddr_storage srcaddr; unsigned short srcport; @@ -76,6 +77,8 @@ struct sock_xprt { */ #define TCP_RPC_REPLY (1UL << 6) +#define XPRT_SOCK_CONNECTING 1U + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3f7ba7051ec0e..5e3ad598d3f55 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1434,6 +1434,7 @@ static void xs_tcp_data_ready(struct sock *sk) static void xs_tcp_state_change(struct sock *sk) { struct rpc_xprt *xprt; + struct sock_xprt *transport; read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) @@ -1445,13 +1446,12 @@ static void xs_tcp_state_change(struct sock *sk) sock_flag(sk, SOCK_ZAPPED), sk->sk_shutdown); + transport = container_of(xprt, struct sock_xprt, xprt); trace_rpc_socket_state_change(xprt, sk->sk_socket); switch (sk->sk_state) { case TCP_ESTABLISHED: spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { - struct sock_xprt *transport = container_of(xprt, - struct sock_xprt, xprt); /* Reset TCP record info */ transport->tcp_offset = 0; @@ -1460,6 +1460,8 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); + xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, -EAGAIN); } @@ -1495,6 +1497,9 @@ static void xs_tcp_state_change(struct sock *sk) smp_mb__after_atomic(); break; case TCP_CLOSE: + if (test_and_clear_bit(XPRT_SOCK_CONNECTING, + &transport->sock_state)) + xprt_clear_connecting(xprt); xs_sock_mark_closed(xprt); } out: @@ -2111,6 +2116,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; + set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { case 0: @@ -2175,7 +2181,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_unlock_connect(xprt, transport); - xprt_clear_connecting(xprt); return; case -EINVAL: /* Happens, for instance, if the user specified a link From 85d1ba73e42e8317b21ba71ab277fb762a45e6b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2015 15:53:24 -0400 Subject: [PATCH 0872/2091] SUNRPC: Lock the transport layer on shutdown commit 79234c3db6842a3de03817211d891e0c2878f756 upstream. Avoid all races with the connect/disconnect handlers by taking the transport lock. Reported-by:"Suzuki K. Poulose" Acked-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a1..d109d308ec3a5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -611,6 +611,7 @@ static void xprt_autoclose(struct work_struct *work) xprt->ops->close(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt_release_write(xprt, NULL); + wake_up_bit(&xprt->state, XPRT_LOCKED); } /** @@ -720,6 +721,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) xprt->ops->release_xprt(xprt, NULL); out: spin_unlock_bh(&xprt->transport_lock); + wake_up_bit(&xprt->state, XPRT_LOCKED); } /** @@ -1389,6 +1391,10 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); + + /* Exclude transport connect/disconnect handlers */ + wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + del_timer_sync(&xprt->timer); rpc_xprt_debugfs_unregister(xprt); From 68912df901b9807a708cd1599b30fa56548d15e1 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 12 Aug 2015 19:21:46 +0900 Subject: [PATCH 0873/2091] rtc: s3c: fix disabled clocks for alarm commit 1fb1c35f56bb6ab4a65920c648154b0f78f634a5 upstream. The clock enable/disable codes for alarm have been removed from commit 24e1455493da ("drivers/rtc/rtc-s3c.c: delete duplicate clock control") and the clocks are disabled even if alarm is set, so alarm interrupt can't happen. The s3c_rtc_setaie function can be called several times with 'enabled' argument having same value, so it needs to check whether clocks are enabled or not. Signed-off-by: Joonyoung Shim Reviewed-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-s3c.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 76cbad7a99d34..c5a2523b0185c 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -39,6 +39,7 @@ struct s3c_rtc { void __iomem *base; struct clk *rtc_clk; struct clk *rtc_src_clk; + bool clk_disabled; struct s3c_rtc_data *data; @@ -71,9 +72,12 @@ static void s3c_rtc_enable_clk(struct s3c_rtc *info) unsigned long irq_flags; spin_lock_irqsave(&info->alarm_clk_lock, irq_flags); - clk_enable(info->rtc_clk); - if (info->data->needs_src_clk) - clk_enable(info->rtc_src_clk); + if (info->clk_disabled) { + clk_enable(info->rtc_clk); + if (info->data->needs_src_clk) + clk_enable(info->rtc_src_clk); + info->clk_disabled = false; + } spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags); } @@ -82,9 +86,12 @@ static void s3c_rtc_disable_clk(struct s3c_rtc *info) unsigned long irq_flags; spin_lock_irqsave(&info->alarm_clk_lock, irq_flags); - if (info->data->needs_src_clk) - clk_disable(info->rtc_src_clk); - clk_disable(info->rtc_clk); + if (!info->clk_disabled) { + if (info->data->needs_src_clk) + clk_disable(info->rtc_src_clk); + clk_disable(info->rtc_clk); + info->clk_disabled = true; + } spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags); } @@ -128,6 +135,11 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled) s3c_rtc_disable_clk(info); + if (enabled) + s3c_rtc_enable_clk(info); + else + s3c_rtc_disable_clk(info); + return 0; } From 4530473fda002bbcb567419d55f5bc9f7f6541fc Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Fri, 21 Aug 2015 18:43:41 +0900 Subject: [PATCH 0874/2091] rtc: s5m: fix to update ctrl register commit ff02c0444b83201ff76cc49deccac8cf2bffc7bc upstream. According to datasheet, the S2MPS13X and S2MPS14X should update write buffer via setting WUDR bit to high after ctrl register is written. If not, ALARM interrupt of rtc-s5m doesn't happen first time when i use tools/testing/selftests/timers/rtctest.c test program and hour format is used to 12 hour mode in Odroid-XU3 board. One more issue is the RTC doesn't keep time on Odroid-XU3 board when i turn on board after power off even if RTC battery is connected. It can be solved as setting WUDR & RUDR bits to high at the same time after RTC_CTRL register is written. It's same with condition of only writing ALARM registers, so this is for only S2MPS14 and we should set WUDR & A_UDR bits to high on S2MPS13. I can't find any reasonable description about this like fix from datasheet, but can find similar codes from rtc driver source of hardkernel kernel and vendor kernel. Signed-off-by: Joonyoung Shim Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-s5m.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index 8c70d785ba739..ab60287ee72d6 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -635,6 +635,16 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) case S2MPS13X: data[0] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT); ret = regmap_write(info->regmap, info->regs->ctrl, data[0]); + if (ret < 0) + break; + + /* + * Should set WUDR & (RUDR or AUDR) bits to high after writing + * RTC_CTRL register like writing Alarm registers. We can't find + * the description from datasheet but vendor code does that + * really. + */ + ret = s5m8767_rtc_set_alarm_reg(info); break; default: From 1a64393e4a48bb45c6b50a557bf719d0c4d01045 Mon Sep 17 00:00:00 2001 From: Mitja Spes Date: Wed, 2 Sep 2015 10:02:29 +0200 Subject: [PATCH 0875/2091] rtc: abx80x: fix RTC write bit commit 5f1b2f77646fc0ef2f36fc554f5722a1381d0892 upstream. Fix RTC write bit as per application manual Signed-off-by: Mitja Spes Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-abx80x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index 4337c3bc6acef..afea84c7a155c 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -28,7 +28,7 @@ #define ABX8XX_REG_WD 0x07 #define ABX8XX_REG_CTRL1 0x10 -#define ABX8XX_CTRL_WRITE BIT(1) +#define ABX8XX_CTRL_WRITE BIT(0) #define ABX8XX_CTRL_12_24 BIT(6) #define ABX8XX_REG_CFG_KEY 0x1f From 645305df79c555470ab6728561314ea852e2612f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 2 Sep 2015 18:17:29 +0200 Subject: [PATCH 0876/2091] PCI,parisc: Enable 64-bit bus addresses on PA-RISC commit e02a653e15d8d32e9e768fd99a3271aafe5c5d77 upstream. Commit 3a9ad0b ("PCI: Add pci_bus_addr_t") unconditionally introduced usage of 64-bit PCI bus addresses on all 64-bit platforms which broke PA-RISC. It turned out that due to enabling the 64-bit addresses, the PCI logic decided to use the GMMIO instead of the LMMIO region. This commit simply disables registering the GMMIO and thus we fall back to use the LMMIO region as before. Reverts commit 45ea2a5fed6dacb9bb0558d8b21eacc1c45d5bb4 ("PCI: Don't use 64-bit bus addresses on PA-RISC") To: linux-parisc@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Meelis Roos Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/lba_pci.c | 7 +++++-- drivers/pci/Kconfig | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index dceb9ddfd99af..a32c1f6c252cd 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1556,8 +1556,11 @@ lba_driver_probe(struct parisc_device *dev) if (lba_dev->hba.lmmio_space.flags) pci_add_resource_offset(&resources, &lba_dev->hba.lmmio_space, lba_dev->hba.lmmio_space_offset); - if (lba_dev->hba.gmmio_space.flags) - pci_add_resource(&resources, &lba_dev->hba.gmmio_space); + if (lba_dev->hba.gmmio_space.flags) { + /* pci_add_resource(&resources, &lba_dev->hba.gmmio_space); */ + pr_warn("LBA: Not registering GMMIO space %pR\n", + &lba_dev->hba.gmmio_space); + } pci_add_resource(&resources, &lba_dev->hba.bus_num); diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 944f50015ed07..73de4efcbe6ed 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -2,7 +2,7 @@ # PCI configuration # config PCI_BUS_ADDR_T_64BIT - def_bool y if (ARCH_DMA_ADDR_T_64BIT || (64BIT && !PARISC)) + def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT) depends on PCI config PCI_MSI From f39b5f920977852886e4491d354e72b95935ec0b Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 7 Sep 2015 20:13:28 -0400 Subject: [PATCH 0877/2091] parisc: Use double word condition in 64bit CAS operation commit 1b59ddfcf1678de38a1f8ca9fb8ea5eebeff1843 upstream. The attached change fixes the condition used in the "sub" instruction. A double word comparison is needed. This fixes the 64-bit LWS CAS operation on 64-bit kernels. I can now enable 64-bit atomic support in GCC. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/syscall.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 7ef22e3387e09..0b8d26d3ba43b 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -821,7 +821,7 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT 19: ldd,ma 0(%sr3,%r26), %r29 - sub,= %r29, %r25, %r0 + sub,*= %r29, %r25, %r0 b,n cas2_end 20: std,ma %r24, 0(%sr3,%r26) copy %r0, %r28 From 804a6f7fb449d20d7fa560d040c75e95d8faf3e3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 3 Sep 2015 22:45:21 +0200 Subject: [PATCH 0878/2091] parisc: Filter out spurious interrupts in PA-RISC irq handler commit b1b4e435e4ef7de77f07bf2a42c8380b960c2d44 upstream. When detecting a serial port on newer PA-RISC machines (with iosapic) we have a long way to go to find the right IRQ line, registering it, then registering the serial port and the irq handler for the serial port. During this phase spurious interrupts for the serial port may happen which then crashes the kernel because the action handler might not have been set up yet. So, basically it's a race condition between the serial port hardware and the CPU which sets up the necessary fields in the irq sructs. The main reason for this race is, that we unmask the serial port irqs too early without having set up everything properly before (which isn't easily possible because we need the IRQ number to register the serial ports). This patch is a work-around for this problem. It adds checks to the CPU irq handler to verify if the IRQ action field has been initialized already. If not, we just skip this interrupt (which isn't critical for a serial port at bootup). The real fix would probably involve rewriting all PA-RISC specific IRQ code (for CPU, IOSAPIC, GSC and EISA) to use IRQ domains with proper parenting of the irq chips and proper irq enabling along this line. This bug has been in the PA-RISC port since the beginning, but the crashes happened very rarely with currently used hardware. But on the latest machine which I bought (a C8000 workstation), which uses the fastest CPUs (4 x PA8900, 1GHz) and which has the largest possible L1 cache size (64MB each), the kernel crashed at every boot because of this race. So, without this patch the machine would currently be unuseable. For the record, here is the flow logic: 1. serial_init_chip() in 8250_gsc.c calls iosapic_serial_irq(). 2. iosapic_serial_irq() calls txn_alloc_irq() to find the irq. 3. iosapic_serial_irq() calls cpu_claim_irq() to register the CPU irq 4. cpu_claim_irq() unmasks the CPU irq (which it shouldn't!) 5. serial_init_chip() then registers the 8250 port. Problems: - In step 4 the CPU irq shouldn't have been registered yet, but after step 5 - If serial irq happens between 4 and 5 have finished, the kernel will crash Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/irq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index f3191db6e2e94..c0eab24f6a9e3 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *regs) struct pt_regs *old_regs; unsigned long eirr_val; int irq, cpu = smp_processor_id(); -#ifdef CONFIG_SMP struct irq_desc *desc; +#ifdef CONFIG_SMP cpumask_t dest; #endif @@ -521,8 +521,12 @@ void do_cpu_irq_mask(struct pt_regs *regs) goto set_out; irq = eirr_to_irq(eirr_val); -#ifdef CONFIG_SMP + /* Filter out spurious interrupts, mostly from serial port at bootup */ desc = irq_to_desc(irq); + if (unlikely(!desc->action)) + goto set_out; + +#ifdef CONFIG_SMP cpumask_copy(&dest, desc->irq_data.affinity); if (irqd_is_per_cpu(&desc->irq_data) && !cpumask_test_cpu(smp_processor_id(), &dest)) { From 154dff393c2a7b554469f626e442901ce8acc905 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Tue, 8 Sep 2015 15:02:21 -0700 Subject: [PATCH 0879/2091] vmscan: fix increasing nr_isolated incurred by putback unevictable pages commit c54839a722a02818677bcabe57e957f0ce4f841d upstream. reclaim_clean_pages_from_list() assumes that shrink_page_list() returns number of pages removed from the candidate list. But shrink_page_list() puts back mlocked pages without passing it to caller and without counting as nr_reclaimed. This increases nr_isolated. To fix this, this patch changes shrink_page_list() to pass unevictable pages back to caller. Caller will take care those pages. Minchan said: It fixes two issues. 1. With unevictable page, cma_alloc will be successful. Exactly speaking, cma_alloc of current kernel will fail due to unevictable pages. 2. fix leaking of NR_ISOLATED counter of vmstat With it, too_many_isolated works. Otherwise, it could make hang until the process get SIGKILL. Signed-off-by: Jaewon Kim Acked-by: Minchan Kim Cc: Mel Gorman Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0d024fc8aa8ef..1a17bd7c0ce58 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1153,7 +1153,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageSwapCache(page)) try_to_free_swap(page); unlock_page(page); - putback_lru_page(page); + list_add(&page->lru, &ret_pages); continue; activate_locked: From 244d3c13db7a94ceecfb591fba716a525d53aa38 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 9 Sep 2015 15:38:28 -0700 Subject: [PATCH 0880/2091] fs: if a coredump already exists, unlink and recreate with O_EXCL commit fbb1816942c04429e85dbf4c1a080accc534299e upstream. It was possible for an attacking user to trick root (or another user) into writing his coredumps into an attacker-readable, pre-existing file using rename() or link(), causing the disclosure of secret data from the victim process' virtual memory. Depending on the configuration, it was also possible to trick root into overwriting system files with coredumps. Fix that issue by never writing coredumps into existing files. Requirements for the attack: - The attack only applies if the victim's process has a nonzero RLIMIT_CORE and is dumpable. - The attacker can trick the victim into coredumping into an attacker-writable directory D, either because the core_pattern is relative and the victim's cwd is attacker-writable or because an absolute core_pattern pointing to a world-writable directory is used. - The attacker has one of these: A: on a system with protected_hardlinks=0: execute access to a folder containing a victim-owned, attacker-readable file on the same partition as D, and the victim-owned file will be deleted before the main part of the attack takes place. (In practice, there are lots of files that fulfill this condition, e.g. entries in Debian's /var/lib/dpkg/info/.) This does not apply to most Linux systems because most distros set protected_hardlinks=1. B: on a system with protected_hardlinks=1: execute access to a folder containing a victim-owned, attacker-readable and attacker-writable file on the same partition as D, and the victim-owned file will be deleted before the main part of the attack takes place. (This seems to be uncommon.) C: on any system, independent of protected_hardlinks: write access to a non-sticky folder containing a victim-owned, attacker-readable file on the same partition as D (This seems to be uncommon.) The basic idea is that the attacker moves the victim-owned file to where he expects the victim process to dump its core. The victim process dumps its core into the existing file, and the attacker reads the coredump from it. If the attacker can't move the file because he does not have write access to the containing directory, he can instead link the file to a directory he controls, then wait for the original link to the file to be deleted (because the kernel checks that the link count of the corefile is 1). A less reliable variant that requires D to be non-sticky works with link() and does not require deletion of the original link: link() the file into D, but then unlink() it directly before the kernel performs the link count check. On systems with protected_hardlinks=0, this variant allows an attacker to not only gain information from coredumps, but also clobber existing, victim-writable files with coredumps. (This could theoretically lead to a privilege escalation.) Signed-off-by: Jann Horn Cc: Kees Cook Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index bbbe139ab2802..ad81b3dd961ec 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -506,10 +506,10 @@ void do_coredump(const siginfo_t *siginfo) const struct cred *old_cred; struct cred *cred; int retval = 0; - int flag = 0; int ispipe; struct files_struct *displaced; - bool need_nonrelative = false; + /* require nonrelative corefile path and be extra careful */ + bool need_suid_safe = false; bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -543,9 +543,8 @@ void do_coredump(const siginfo_t *siginfo) */ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ - flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_nonrelative = true; + need_suid_safe = true; } retval = coredump_wait(siginfo->si_signo, &core_state); @@ -626,7 +625,7 @@ void do_coredump(const siginfo_t *siginfo) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - if (need_nonrelative && cn.corename[0] != '/') { + if (need_suid_safe && cn.corename[0] != '/') { printk(KERN_WARNING "Pid %d(%s) can only dump core "\ "to fully qualified path!\n", task_tgid_vnr(current), current->comm); @@ -634,8 +633,35 @@ void do_coredump(const siginfo_t *siginfo) goto fail_unlock; } + /* + * Unlink the file if it exists unless this is a SUID + * binary - in that case, we're running around with root + * privs and don't want to unlink another user's coredump. + */ + if (!need_suid_safe) { + mm_segment_t old_fs; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* + * If it doesn't exist, that's fine. If there's some + * other problem, we'll catch it at the filp_open(). + */ + (void) sys_unlink((const char __user *)cn.corename); + set_fs(old_fs); + } + + /* + * There is a race between unlinking and creating the + * file, but if that causes an EEXIST here, that's + * fine - another process raced with us while creating + * the corefile, and the other process won. To userspace, + * what matters is that at least one of the two processes + * writes its coredump successfully, not which one. + */ cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + O_CREAT | 2 | O_NOFOLLOW | + O_LARGEFILE | O_EXCL, 0600); if (IS_ERR(cprm.file)) goto fail_unlock; From 2be9c8262419a2db45e7461b1eb26ead770a4438 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 9 Sep 2015 15:38:30 -0700 Subject: [PATCH 0881/2091] fs: Don't dump core if the corefile would become world-readable. commit 40f705a736eac10e7dca7ab5dd5ed675a6df031d upstream. On a filesystem like vfat, all files are created with the same owner and mode independent of who created the file. When a vfat filesystem is mounted with root as owner of all files and read access for everyone, root's processes left world-readable coredumps on it (but other users' processes only left empty corefiles when given write access because of the uid mismatch). Given that the old behavior was inconsistent and insecure, I don't see a problem with changing it. Now, all processes refuse to dump core unless the resulting corefile will only be readable by their owner. Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index ad81b3dd961ec..8dd099dc5f9b2 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -678,11 +678,15 @@ void do_coredump(const siginfo_t *siginfo) if (!S_ISREG(inode->i_mode)) goto close_fail; /* - * Dont allow local users get cute and trick others to coredump - * into their pre-created files. + * Don't dump core if the filesystem changed owner or mode + * of the file during file creation. This is an issue when + * a process dumps core while its cwd is e.g. on a vfat + * filesystem. */ if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; + if ((inode->i_mode & 0677) != 0600) + goto close_fail; if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) From 2b1e7d58a82a911fae3dcb762590a68e78806434 Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Mon, 3 Aug 2015 14:33:28 +0800 Subject: [PATCH 0882/2091] mmc: sdhci-pci: set the clear transfer mode register quirk for O2Micro commit 143b648ddf1583905fa15d32be27a31442fc7933 upstream. This patch fixes MMC not working issue on O2Micro/BayHub Host, which requires transfer mode register to be cleared when sending no DMA command. Signed-off-by: Peter Guo Signed-off-by: Adam Lee Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 7a3fc16d0a6c6..53cfc7cedefec 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -549,6 +549,7 @@ static int jmicron_resume(struct sdhci_pci_chip *chip) static const struct sdhci_pci_fixes sdhci_o2 = { .probe = sdhci_pci_o2_probe, .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD, .probe_slot = sdhci_pci_o2_probe_slot, .resume = sdhci_pci_o2_resume, }; From cea49b29549b4eec81dad4ade3acc7fc175cfccd Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 18 Aug 2015 16:21:39 +0800 Subject: [PATCH 0883/2091] mmc: sdhci: also get preset value and driver type for MMC_DDR52 commit 0dafa60eb2506617e6968b97cc5a44914a7fb1a6 upstream. commit bb8175a8aa42 ("mmc: sdhci: clarify DDR timing mode between SD-UHS and eMMC") added MMC_DDR52 as eMMC's DDR mode to be distinguished from SD-UHS, but it missed setting driver type for MMC_DDR52 timing mode. So sometimes we get the following error on Marvell BG2Q DMP board: [ 1.559598] mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb00 [ 1.569314] mmcblk0: retrying using single block read [ 1.575676] mmcblk0: error -84 transferring data, sector 2, nr 6, cmd response 0x900, card status 0x0 [ 1.585202] blk_update_request: I/O error, dev mmcblk0, sector 2 [ 1.591818] mmcblk0: error -84 transferring data, sector 3, nr 5, cmd response 0x900, card status 0x0 [ 1.601341] blk_update_request: I/O error, dev mmcblk0, sector 3 This patches fixes this by adding the missing driver type setting. Fixes: bb8175a8aa42 ("mmc: sdhci: clarify DDR timing mode ...") Signed-off-by: Jisheng Zhang Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index bec8a307f8cd3..fd41b91436ec2 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1146,6 +1146,7 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host) preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104); break; case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50); break; case MMC_TIMING_MMC_HS400: @@ -1598,7 +1599,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) (ios->timing == MMC_TIMING_UHS_SDR25) || (ios->timing == MMC_TIMING_UHS_SDR50) || (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50))) { + (ios->timing == MMC_TIMING_UHS_DDR50) || + (ios->timing == MMC_TIMING_MMC_DDR52))) { u16 preset; sdhci_enable_preset_value(host, true); From cd6a4dd89b4cdcc220c255ba5fe29b677d431cbd Mon Sep 17 00:00:00 2001 From: Jialing Fu Date: Fri, 28 Aug 2015 11:13:09 +0800 Subject: [PATCH 0884/2091] mmc: core: fix race condition in mmc_wait_data_done commit 71f8a4b81d040b3d094424197ca2f1bf811b1245 upstream. The following panic is captured in ker3.14, but the issue still exists in latest kernel. --------------------------------------------------------------------- [ 20.738217] c0 3136 (Compiler) Unable to handle kernel NULL pointer dereference at virtual address 00000578 ...... [ 20.738499] c0 3136 (Compiler) PC is at _raw_spin_lock_irqsave+0x24/0x60 [ 20.738527] c0 3136 (Compiler) LR is at _raw_spin_lock_irqsave+0x20/0x60 [ 20.740134] c0 3136 (Compiler) Call trace: [ 20.740165] c0 3136 (Compiler) [] _raw_spin_lock_irqsave+0x24/0x60 [ 20.740200] c0 3136 (Compiler) [] __wake_up+0x1c/0x54 [ 20.740230] c0 3136 (Compiler) [] mmc_wait_data_done+0x28/0x34 [ 20.740262] c0 3136 (Compiler) [] mmc_request_done+0xa4/0x220 [ 20.740314] c0 3136 (Compiler) [] sdhci_tasklet_finish+0xac/0x264 [ 20.740352] c0 3136 (Compiler) [] tasklet_action+0xa0/0x158 [ 20.740382] c0 3136 (Compiler) [] __do_softirq+0x10c/0x2e4 [ 20.740411] c0 3136 (Compiler) [] irq_exit+0x8c/0xc0 [ 20.740439] c0 3136 (Compiler) [] handle_IRQ+0x48/0xac [ 20.740469] c0 3136 (Compiler) [] gic_handle_irq+0x38/0x7c ---------------------------------------------------------------------- Because in SMP, "mrq" has race condition between below two paths: path1: CPU0: static void mmc_wait_data_done(struct mmc_request *mrq) { mrq->host->context_info.is_done_rcv = true; // // If CPU0 has just finished "is_done_rcv = true" in path1, and at // this moment, IRQ or ICache line missing happens in CPU0. // What happens in CPU1 (path2)? // // If the mmcqd thread in CPU1(path2) hasn't entered to sleep mode: // path2 would have chance to break from wait_event_interruptible // in mmc_wait_for_data_req_done and continue to run for next // mmc_request (mmc_blk_rw_rq_prep). // // Within mmc_blk_rq_prep, mrq is cleared to 0. // If below line still gets host from "mrq" as the result of // compiler, the panic happens as we traced. wake_up_interruptible(&mrq->host->context_info.wait); } path2: CPU1: static int mmc_wait_for_data_req_done(... { ... while (1) { wait_event_interruptible(context_info->wait, (context_info->is_done_rcv || context_info->is_new_req)); static void mmc_blk_rw_rq_prep(... { ... memset(brq, 0, sizeof(struct mmc_blk_request)); This issue happens very coincidentally; however adding mdelay(1) in mmc_wait_data_done as below could duplicate it easily. static void mmc_wait_data_done(struct mmc_request *mrq) { mrq->host->context_info.is_done_rcv = true; + mdelay(1); wake_up_interruptible(&mrq->host->context_info.wait); } At runtime, IRQ or ICache line missing may just happen at the same place of the mdelay(1). This patch gets the mmc_context_info at the beginning of function, it can avoid this race condition. Signed-off-by: Jialing Fu Tested-by: Shawn Lin Fixes: 2220eedfd7ae ("mmc: fix async request mechanism ....") Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 92e7671426ebc..588fb7908642a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -330,8 +330,10 @@ EXPORT_SYMBOL(mmc_start_bkops); */ static void mmc_wait_data_done(struct mmc_request *mrq) { - mrq->host->context_info.is_done_rcv = true; - wake_up_interruptible(&mrq->host->context_info.wait); + struct mmc_context_info *context_info = &mrq->host->context_info; + + context_info->is_done_rcv = true; + wake_up_interruptible(&context_info->wait); } static void mmc_wait_done(struct mmc_request *mrq) From ed8b312450dae5e5a9e5bc022e1261a2e6414984 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 6 Jul 2015 12:49:23 +1000 Subject: [PATCH 0885/2091] md/raid5: avoid races when changing cache size. commit 2d5b569b665ea6d0b15c52529ff06300de81a7ce upstream. Cache size can grow or shrink due to various pressures at any time. So when we resize the cache as part of a 'grow' operation (i.e. change the size to allow more devices) we need to blocks that automatic growing/shrinking. So introduce a mutex. auto grow/shrink uses mutex_trylock() and just doesn't bother if there is a blockage. Resizing the whole cache holds the mutex to ensure that the correct number of new stripes is allocated. This bug can result in some stripes not being freed when an array is stopped. This leads to the kmem_cache not being freed and a subsequent array can try to use the same kmem_cache and get confused. Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 31 +++++++++++++++++++++++++------ drivers/md/raid5.h | 3 ++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b6793d2e051f3..b369cbea4e54b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2151,6 +2151,9 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!sc) return -ENOMEM; + /* Need to ensure auto-resizing doesn't interfere */ + mutex_lock(&conf->cache_size_mutex); + for (i = conf->max_nr_stripes; i; i--) { nsh = alloc_stripe(sc, GFP_KERNEL); if (!nsh) @@ -2167,6 +2170,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) kmem_cache_free(sc, nsh); } kmem_cache_destroy(sc); + mutex_unlock(&conf->cache_size_mutex); return -ENOMEM; } /* Step 2 - Must use GFP_NOIO now. @@ -2213,6 +2217,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; + mutex_unlock(&conf->cache_size_mutex); /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); @@ -5846,12 +5851,14 @@ static void raid5d(struct md_thread *thread) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); - if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { + if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && + mutex_trylock(&conf->cache_size_mutex)) { grow_one_stripe(conf, __GFP_NOWARN); /* Set flag even if allocation failed. This helps * slow down allocation requests when mem is short */ set_bit(R5_DID_ALLOC, &conf->cache_state); + mutex_unlock(&conf->cache_size_mutex); } async_tx_issue_pending_all(); @@ -5883,18 +5890,22 @@ raid5_set_cache_size(struct mddev *mddev, int size) return -EINVAL; conf->min_nr_stripes = size; + mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) ; + mutex_unlock(&conf->cache_size_mutex); err = md_allow_write(mddev); if (err) return err; + mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) break; + mutex_unlock(&conf->cache_size_mutex); return 0; } @@ -6360,11 +6371,18 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); - int ret = 0; - while (ret < sc->nr_to_scan) { - if (drop_one_stripe(conf) == 0) - return SHRINK_STOP; - ret++; + unsigned long ret = SHRINK_STOP; + + if (mutex_trylock(&conf->cache_size_mutex)) { + ret= 0; + while (ret < sc->nr_to_scan) { + if (drop_one_stripe(conf) == 0) { + ret = SHRINK_STOP; + break; + } + ret++; + } + mutex_unlock(&conf->cache_size_mutex); } return ret; } @@ -6433,6 +6451,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; spin_lock_init(&conf->device_lock); seqcount_init(&conf->gen_lock); + mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896d603ad0da9..03472fbbd8823 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -482,7 +482,8 @@ struct r5conf { */ int active_name; char cache_name[2][32]; - struct kmem_cache *slab_cache; /* for allocating stripes */ + struct kmem_cache *slab_cache; /* for allocating stripes */ + struct mutex cache_size_mutex; /* Protect changes to cache size */ int seq_flush, seq_write; int quiesce; From d7edf5fe979269233c9d20e5101ae004df05070e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2015 17:09:57 +1000 Subject: [PATCH 0886/2091] md/raid5: don't let shrink_slab shrink too far. commit 49895bcc7e566ba455eb2996607d6fbd3447ce16 upstream. I have a report of drop_one_stripe() called from raid5_cache_scan() apparently finding ->max_nr_stripes == 0. This should not be allowed. So add a test to keep max_nr_stripes above min_nr_stripes. Also use a 'mask' rather than a 'mod' in drop_one_stripe to ensure 'hash' is valid even if max_nr_stripes does reach zero. Fixes: edbe83ab4c27 ("md/raid5: allow the stripe_cache to grow and shrink.") Reported-by: Tomas Papan Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b369cbea4e54b..23af6772f1469 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2245,7 +2245,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) static int drop_one_stripe(struct r5conf *conf) { struct stripe_head *sh; - int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS; + int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; spin_lock_irq(conf->hash_locks + hash); sh = get_free_stripe(conf, hash); @@ -6375,7 +6375,8 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, if (mutex_trylock(&conf->cache_size_mutex)) { ret= 0; - while (ret < sc->nr_to_scan) { + while (ret < sc->nr_to_scan && + conf->max_nr_stripes > conf->min_nr_stripes) { if (drop_one_stripe(conf) == 0) { ret = SHRINK_STOP; break; From ae286448cf64d37128b74c72b9b5435da7e4ba17 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 6 Jul 2015 17:37:49 +1000 Subject: [PATCH 0887/2091] md/raid10: always set reshape_safe when initializing reshape_position. commit 299b0685e31c9f3dcc2d58ee3beca761a40b44b3 upstream. 'reshape_position' tracks where in the reshape we have reached. 'reshape_safe' tracks where in the reshape we have safely recorded in the metadata. These are compared to determine when to update the metadata. So it is important that reshape_safe is initialised properly. Currently it isn't. When starting a reshape from the beginning it usually has the correct value by luck. But when reducing the number of devices in a RAID10, it has the wrong value and this leads to the metadata not being updated correctly. This can lead to corruption if the reshape is not allowed to complete. This patch is suitable for any -stable kernel which supports RAID10 reshape, which is 3.5 and later. Fixes: 3ea7daa5d7fd ("md/raid10: add reshape support") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f55c3f35b7463..fe0122771642b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3566,6 +3566,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* far_copies must be 1 */ conf->prev.stride = conf->dev_sectors; } + conf->reshape_safe = conf->reshape_progress; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); @@ -3770,7 +3771,6 @@ static int run(struct mddev *mddev) } conf->offset_diff = min_offset_diff; - conf->reshape_safe = conf->reshape_progress; clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); @@ -4113,6 +4113,7 @@ static int raid10_start_reshape(struct mddev *mddev) conf->reshape_progress = size; } else conf->reshape_progress = 0; + conf->reshape_safe = conf->reshape_progress; spin_unlock_irq(&conf->device_lock); if (mddev->delta_disks && mddev->bitmap) { @@ -4180,6 +4181,7 @@ static int raid10_start_reshape(struct mddev *mddev) rdev->new_data_offset = rdev->data_offset; smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; mddev->reshape_position = MaxSector; spin_unlock_irq(&conf->device_lock); return ret; @@ -4534,6 +4536,7 @@ static void end_reshape(struct r10conf *conf) md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); /* read-ahead size must cover two whole stripes, which is From 18c45d9c8e9a8449f73b333eb1dd354d88ceb186 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Jul 2015 10:20:07 +1000 Subject: [PATCH 0888/2091] md: flush ->event_work before stopping array. commit ee5d004fd0591536a061451eba2b187092e9127c upstream. The 'event_work' worker used by dm-raid may still be running when the array is stopped. This can result in an oops. So flush the workqueue on which it is run after detaching and before destroying the device. Reported-by: Heinz Mauelshagen Signed-off-by: NeilBrown Fixes: 9d09e663d550 ("dm: raid456 basic support") Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index e4621511d118b..e8c44fcb1ad14 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5365,6 +5365,8 @@ static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; mddev_detach(mddev); + /* Ensure ->event_work is done */ + flush_workqueue(md_misc_wq); spin_lock(&mddev->lock); mddev->ready = 0; mddev->pers = NULL; From 70ebd071ea1a363f859a2b2480bfe12cb4cf93b3 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 25 Mar 2015 00:28:48 -0500 Subject: [PATCH 0889/2091] iommu/fsl: Really fix init section(s) content commit 57fb907da89977640ef183556a621336c1348fa0 upstream. '0f1fb99 iommu/fsl: Fix section mismatch' was intended to address the modpost warning and the potential crash. Crash which is actually easy to trigger with a 'unbind' followed by a 'bind' sequence. The fix is wrong as fsl_of_pamu_driver.driver gets added by bus_add_driver() to a couple of klist(s) which become invalid/corrupted as soon as the init sections are freed. Depending on when/how the init sections storage is reused various/random errors and crashes will happen 'cd70d46 iommu/fsl: Various cleanups' contains annotations that go further down the wrong path laid by '0f1fb99 iommu/fsl: Fix section mismatch' Now remove all the incorrect annotations from the above mentioned patches (not exactly a revert) and those previously existing in the code, This fixes the modpost warning(s), the unbind/bind sequence crashes and the random errors/crashes Fixes: 0f1fb99b62ce ("iommu/fsl: Fix section mismatch") Fixes: cd70d4659ff3 ("iommu/fsl: Various cleanups") Signed-off-by: Emil Medve Acked-by: Varun Sethi Tested-by: Madalin Bucur Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/fsl_pamu.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index abeedc9a78c27..2570f2a25dc43 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -41,7 +41,6 @@ struct pamu_isr_data { static struct paace *ppaact; static struct paace *spaact; -static struct ome *omt __initdata; /* * Table for matching compatible strings, for device tree @@ -50,7 +49,7 @@ static struct ome *omt __initdata; * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0" * string would be used. */ -static const struct of_device_id guts_device_ids[] __initconst = { +static const struct of_device_id guts_device_ids[] = { { .compatible = "fsl,qoriq-device-config-1.0", }, { .compatible = "fsl,qoriq-device-config-2.0", }, {} @@ -599,7 +598,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) * Memory accesses to QMAN and BMAN private memory need not be coherent, so * clear the PAACE entry coherency attribute for them. */ -static void __init setup_qbman_paace(struct paace *ppaace, int paace_type) +static void setup_qbman_paace(struct paace *ppaace, int paace_type) { switch (paace_type) { case QMAN_PAACE: @@ -629,7 +628,7 @@ static void __init setup_qbman_paace(struct paace *ppaace, int paace_type) * this table to translate device transaction to appropriate corenet * transaction. */ -static void __init setup_omt(struct ome *omt) +static void setup_omt(struct ome *omt) { struct ome *ome; @@ -666,7 +665,7 @@ static void __init setup_omt(struct ome *omt) * Get the maximum number of PAACT table entries * and subwindows supported by PAMU */ -static void __init get_pamu_cap_values(unsigned long pamu_reg_base) +static void get_pamu_cap_values(unsigned long pamu_reg_base) { u32 pc_val; @@ -676,9 +675,9 @@ static void __init get_pamu_cap_values(unsigned long pamu_reg_base) } /* Setup PAMU registers pointing to PAACT, SPAACT and OMT */ -static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, - phys_addr_t ppaact_phys, phys_addr_t spaact_phys, - phys_addr_t omt_phys) +static int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, + phys_addr_t ppaact_phys, phys_addr_t spaact_phys, + phys_addr_t omt_phys) { u32 *pc; struct pamu_mmap_regs *pamu_regs; @@ -720,7 +719,7 @@ static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu } /* Enable all device LIODNS */ -static void __init setup_liodns(void) +static void setup_liodns(void) { int i, len; struct paace *ppaace; @@ -846,7 +845,7 @@ struct ccsr_law { /* * Create a coherence subdomain for a given memory block. */ -static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) +static int create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) { struct device_node *np; const __be32 *iprop; @@ -988,7 +987,7 @@ static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) static const struct { u32 svr; u32 port_id; -} port_id_map[] __initconst = { +} port_id_map[] = { {(SVR_P2040 << 8) | 0x10, 0xFF000000}, /* P2040 1.0 */ {(SVR_P2040 << 8) | 0x11, 0xFF000000}, /* P2040 1.1 */ {(SVR_P2041 << 8) | 0x10, 0xFF000000}, /* P2041 1.0 */ @@ -1006,7 +1005,7 @@ static const struct { #define SVR_SECURITY 0x80000 /* The Security (E) bit */ -static int __init fsl_pamu_probe(struct platform_device *pdev) +static int fsl_pamu_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; void __iomem *pamu_regs = NULL; @@ -1022,6 +1021,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) int irq; phys_addr_t ppaact_phys; phys_addr_t spaact_phys; + struct ome *omt; phys_addr_t omt_phys; size_t mem_size = 0; unsigned int order = 0; @@ -1200,7 +1200,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) return ret; } -static struct platform_driver fsl_of_pamu_driver __initdata = { +static struct platform_driver fsl_of_pamu_driver = { .driver = { .name = "fsl-of-pamu", }, From fb2908b509749657cab5f7dbda2f24add8668969 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Aug 2015 16:48:32 +0100 Subject: [PATCH 0890/2091] iommu/io-pgtable-arm: Unmap and free table when overwriting with block commit cf27ec930be906e142c752f9161197d69ca534d7 upstream. When installing a block mapping, we unconditionally overwrite a non-leaf PTE if we find one. However, this can cause a problem if the following sequence of events occur: (1) iommu_map called for a 4k (i.e. PAGE_SIZE) mapping at some address - We initialise the page table all the way down to a leaf entry - No TLB maintenance is required, because we're going from invalid to valid. (2) iommu_unmap is called on the mapping installed in (1) - We walk the page table to the final (leaf) entry and zero it - We only changed a valid leaf entry, so we invalidate leaf-only (3) iommu_map is called on the same address as (1), but this time for a 2MB (i.e. BLOCK_SIZE) mapping) - We walk the page table down to the penultimate level, where we find a table entry - We overwrite the table entry with a block mapping and return without any TLB maintenance and without freeing the memory used by the now-orphaned table. This last step can lead to a walk-cache caching the overwritten table entry, causing unexpected faults when the new mapping is accessed by a device. One way to fix this would be to collapse the page table when freeing the last page at a given level, but this would require expensive iteration on every map call. Instead, this patch detects the case when we are overwriting a table entry and explicitly unmaps the table first, which takes care of both freeing and TLB invalidation. Reported-by: Brian Starkey Tested-by: Brian Starkey Signed-off-by: Will Deacon Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4e460216bd164..e29d5d7fe2206 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -200,6 +200,10 @@ typedef u64 arm_lpae_iopte; static bool selftest_running = false; +static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep); + static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, @@ -207,10 +211,21 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte = prot; - /* We require an unmap first */ if (iopte_leaf(*ptep, lvl)) { + /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; + } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { + /* + * We need to unmap and free the old table before + * overwriting it with a block entry. + */ + arm_lpae_iopte *tblp; + size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + + tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); + if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz)) + return -EINVAL; } if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) From 1f398b31044845397b5e2fefdb7ac247180e6a7c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 6 Aug 2015 14:20:31 +0200 Subject: [PATCH 0891/2091] iommu/tegra-smmu: Parameterize number of TLB lines commit 11cec15bf3fb498206ef63b1fa26c27689e02d0e upstream. The number of TLB lines was increased from 16 on Tegra30 to 32 on Tegra114 and later. Parameterize the value so that the initial default can be set accordingly. On Tegra30, initializing the value to 32 would effectively disable the TLB and hence cause massive latencies for memory accesses translated through the SMMU. This is especially noticeable for isochronuous clients such as display, whose FIFOs would continuously underrun. Fixes: 891846516317 ("memory: Add NVIDIA Tegra memory controller support") Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/tegra-smmu.c | 9 +++++++-- drivers/memory/tegra/tegra114.c | 1 + drivers/memory/tegra/tegra124.c | 1 + drivers/memory/tegra/tegra30.c | 1 + include/soc/tegra/mc.h | 1 + 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index c845d99ecf6b8..e0ff5f4d7fed5 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -26,6 +26,7 @@ struct tegra_smmu { const struct tegra_smmu_soc *soc; unsigned long pfn_mask; + unsigned long tlb_mask; unsigned long *asids; struct mutex lock; @@ -65,7 +66,8 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_TLB_CONFIG 0x14 #define SMMU_TLB_CONFIG_HIT_UNDER_MISS (1 << 29) #define SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION (1 << 28) -#define SMMU_TLB_CONFIG_ACTIVE_LINES(x) ((x) & 0x3f) +#define SMMU_TLB_CONFIG_ACTIVE_LINES(smmu) \ + ((smmu)->soc->num_tlb_lines & (smmu)->tlb_mask) #define SMMU_PTC_CONFIG 0x18 #define SMMU_PTC_CONFIG_ENABLE (1 << 29) @@ -716,6 +718,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu->pfn_mask = BIT_MASK(mc->soc->num_address_bits - PAGE_SHIFT) - 1; dev_dbg(dev, "address bits: %u, PFN mask: %#lx\n", mc->soc->num_address_bits, smmu->pfn_mask); + smmu->tlb_mask = (smmu->soc->num_tlb_lines << 1) - 1; + dev_dbg(dev, "TLB lines: %u, mask: %#lx\n", smmu->soc->num_tlb_lines, + smmu->tlb_mask); value = SMMU_PTC_CONFIG_ENABLE | SMMU_PTC_CONFIG_INDEX_MAP(0x3f); @@ -725,7 +730,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, smmu_writel(smmu, value, SMMU_PTC_CONFIG); value = SMMU_TLB_CONFIG_HIT_UNDER_MISS | - SMMU_TLB_CONFIG_ACTIVE_LINES(0x20); + SMMU_TLB_CONFIG_ACTIVE_LINES(smmu); if (soc->supports_round_robin_arbitration) value |= SMMU_TLB_CONFIG_ROUND_ROBIN_ARBITRATION; diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index 511e9a25c151c..16c4d26f51e72 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -935,6 +935,7 @@ static const struct tegra_smmu_soc tegra114_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra114_swgroups), .supports_round_robin_arbitration = false, .supports_request_limit = false, + .num_tlb_lines = 32, .num_asids = 4, .ops = &tegra114_smmu_ops, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index 278d40b854c15..b153d0b732cf6 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -981,6 +981,7 @@ static const struct tegra_smmu_soc tegra124_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra124_swgroups), .supports_round_robin_arbitration = true, .supports_request_limit = true, + .num_tlb_lines = 32, .num_asids = 128, .ops = &tegra124_smmu_ops, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index 71fe9376fe533..f422b18f45f30 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -957,6 +957,7 @@ static const struct tegra_smmu_soc tegra30_smmu_soc = { .num_swgroups = ARRAY_SIZE(tegra30_swgroups), .supports_round_robin_arbitration = false, .supports_request_limit = false, + .num_tlb_lines = 16, .num_asids = 4, .ops = &tegra30_smmu_ops, }; diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 63deb8d9f82af..d298857cd8459 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -59,6 +59,7 @@ struct tegra_smmu_soc { bool supports_round_robin_arbitration; bool supports_request_limit; + unsigned int num_tlb_lines; unsigned int num_asids; const struct tegra_smmu_ops *ops; From feab9b2a1002c23d415f7a2db4f4777c60d3eb52 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 25 Aug 2015 10:54:28 +0200 Subject: [PATCH 0892/2091] iommu/vt-d: Really use upper context table when necessary commit 4df4eab168c1c4058603be55a3169d4a45779cc0 upstream. There is a bug in iommu_context_addr() which will always use the lower context table, even when the upper context table needs to be used. Fix this issue. Fixes: 03ecc32c5274 ("iommu/vt-d: support extended root and context entries") Reported-by: Xiao, Nan Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c87c4b1bfc005..c23427951ec1c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -681,6 +681,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu struct context_entry *context; u64 *entry; + entry = &root->lo; if (ecs_enabled(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; @@ -688,7 +689,6 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu } devfn *= 2; } - entry = &root->lo; if (*entry & 1) context = phys_to_virt(*entry & VTD_PAGE_MASK); else { From 22fda415e1316c6077277341cd5d8679f6f630fa Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 5 Aug 2015 11:26:36 -0500 Subject: [PATCH 0893/2091] eCryptfs: Invalidate dcache entries when lower i_nlink is zero commit 5556e7e6d30e8e9b5ee51b0e5edd526ee80e5e36 upstream. Consider eCryptfs dcache entries to be stale when the corresponding lower inode's i_nlink count is zero. This solves a problem caused by the lower inode being directly modified, without going through the eCryptfs mount, leaving stale eCryptfs dentries cached and the eCryptfs inode's i_nlink count not being cleared. Signed-off-by: Tyler Hicks Reported-by: Richard Weinberger Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/dentry.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 8db0b464483f9..63cd2c147221a 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -45,20 +45,20 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - int rc; - - if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) - return 1; + int rc = 1; if (flags & LOOKUP_RCU) return -ECHILD; - rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) + rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (d_really_is_positive(dentry)) { - struct inode *lower_inode = - ecryptfs_inode_to_lower(d_inode(dentry)); + struct inode *inode = d_inode(dentry); - fsstack_copy_attr_all(d_inode(dentry), lower_inode); + fsstack_copy_attr_all(inode, ecryptfs_inode_to_lower(inode)); + if (!inode->i_nlink) + return 0; } return rc; } From 919a78e3e4df2c28f8b6c1aff5d6d9d9beff0d52 Mon Sep 17 00:00:00 2001 From: Hin-Tak Leung Date: Wed, 9 Sep 2015 15:38:07 -0700 Subject: [PATCH 0894/2091] hfs: fix B-tree corruption after insertion at position 0 commit b4cc0efea4f0bfa2477c56af406cfcf3d3e58680 upstream. Fix B-tree corruption when a new record is inserted at position 0 in the node in hfs_brec_insert(). This is an identical change to the corresponding hfs b-tree code to Sergei Antonov's "hfsplus: fix B-tree corruption after insertion at position 0", to keep similar code paths in the hfs and hfsplus drivers in sync, where appropriate. Signed-off-by: Hin-Tak Leung Cc: Sergei Antonov Cc: Joe Perches Reviewed-by: Vyacheslav Dubeyko Cc: Anton Altaparmakov Cc: Al Viro Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hfs/brec.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 9f4ee7f520261..6fc766df04617 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -131,13 +131,16 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len) hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -166,9 +169,6 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len) goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -366,6 +366,8 @@ static int hfs_brec_update_parent(struct hfs_find_data *fd) if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; From 0796f55ba4db268f3a6a6c6bb9e481ce9eaf5d48 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 10 Aug 2015 10:45:45 +0200 Subject: [PATCH 0895/2091] ideapad-laptop: Add Lenovo Yoga 3 14 to no_hw_rfkill dmi list commit fa92a31b3335478c545cdc8e79e1e9b788184e6b upstream. Like some of the other Yoga models the Lenovo Yoga 3 14 does not have a hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always reported blocking breaking wifi. This commit adds the Lenovo Yoga 3 14 to the no_hw_rfkill dmi list, fixing the wifi breakage. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1239050 Signed-off-by: Hans de Goede Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index cb7cd8d79329a..cd78f1166b33d 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -851,6 +851,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 2"), }, }, + { + .ident = "Lenovo Yoga 3 14", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Yoga 3 14"), + }, + }, { .ident = "Lenovo Yoga 3 Pro 1370", .matches = { From bff7d8e55354c86893324b85eed532cb0c94f965 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 31 Jul 2015 14:13:22 -0700 Subject: [PATCH 0896/2091] IB/srp: Handle partial connection success correctly commit c257ea6f9f9aed0b173e0c2932bb8dac5612cdc6 upstream. Avoid that the following kernel warning is reported if the SRP target system accepts fewer channels per connection than what was requested by the initiator system: WARNING: at drivers/infiniband/ulp/srp/ib_srp.c:617 srp_destroy_qp+0xb1/0x120 [ib_srp]() Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] srp_destroy_qp+0xb1/0x120 [ib_srp] [] srp_create_ch_ib+0x19b/0x420 [ib_srp] [] srp_create_target+0x7d7/0xa94 [ib_srp] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xef/0x170 [] vfs_write+0xb4/0x130 [] sys_write+0x5f/0xa0 [] system_call_fastpath+0x16/0x1b Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Cc: Christoph Hellwig Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 75c01b27bd0bd..a120d5c119c61 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3266,7 +3266,7 @@ static ssize_t srp_create_target(struct device *dev, srp_free_ch_ib(target, ch); srp_free_req_data(target, ch); target->ch_count = ch - target->ch; - break; + goto connected; } } @@ -3276,6 +3276,7 @@ static ssize_t srp_create_target(struct device *dev, node_idx++; } +connected: target->scsi_host->nr_hw_queues = target->ch_count; ret = srp_add_target(host, target); From b25b17c50502368c2306a449ce217d34f8e0b287 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Aug 2015 11:01:09 -0700 Subject: [PATCH 0897/2091] IB/srp: Stop the scsi_eh_ and scsi_tmf_ threads if login fails commit bc44bd1d864664f3658352c6aaaa02557d49165d upstream. scsi_host_alloc() not only allocates memory for a SCSI host but also creates the scsi_eh_ kernel thread and the scsi_tmf_ workqueue. Stop these threads if login fails by calling scsi_host_put(). Reported-by: Konstantin Krotov Fixes: fb49c8bbaae7 ("Remove an extraneous scsi_host_put() from an error path") Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Sebastian Parschauer Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a120d5c119c61..025f931054447 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2761,6 +2761,13 @@ static int srp_sdev_count(struct Scsi_Host *host) return c; } +/* + * Return values: + * < 0 upon failure. Caller is responsible for SRP target port cleanup. + * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port + * removal has been scheduled. + * 0 and target->state != SRP_TARGET_REMOVED upon success. + */ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) { struct srp_rport_identifiers ids; @@ -3299,6 +3306,8 @@ static ssize_t srp_create_target(struct device *dev, mutex_unlock(&host->add_target_mutex); scsi_host_put(target->scsi_host); + if (ret < 0) + scsi_host_put(target->scsi_host); return ret; From 780dbff3f4b9613a36879f6defe69572a3f1c934 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 21 Jul 2015 08:36:07 -0400 Subject: [PATCH 0898/2091] IB/qib: Change lkey table allocation to support more MRs commit d6f1c17e162b2a11e708f28fa93f2f79c164b442 upstream. The lkey table is allocated with with a get_user_pages() with an order based on a number of index bits from a module parameter. The underlying kernel code cannot allocate that many contiguous pages. There is no reason the underlying memory needs to be physically contiguous. This patch: - switches the allocation/deallocation to vmalloc/vfree - caps the number of bits to 23 to insure at least 1 generation bit o this matches the module parameter description Reviewed-by: Vinit Agnihotri Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_keys.c | 4 ++++ drivers/infiniband/hw/qib/qib_verbs.c | 14 ++++++++++---- drivers/infiniband/hw/qib/qib_verbs.h | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index ad843c786e721..5afaa218508d2 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -86,6 +86,10 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) * unrestricted LKEY. */ rkt->gen++; + /* + * bits are capped in qib_verbs.c to insure enough bits + * for generation number + */ mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) << 8); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4a3599890ea5f..9dd5d9a0556b3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "qib.h" #include "qib_common.h" @@ -2089,10 +2090,16 @@ int qib_register_ib_device(struct qib_devdata *dd) * the LKEY). The remaining bits act as a generation number or tag. */ spin_lock_init(&dev->lk_table.lock); + /* insure generation is at least 4 bits see keys.c */ + if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) { + qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", + ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS); + ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS; + } dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); dev->lk_table.table = (struct qib_mregion __rcu **) - __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); + vmalloc(lk_tab_size); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; @@ -2265,7 +2272,7 @@ int qib_register_ib_device(struct qib_devdata *dd) sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); err_hdrs: - free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size)); + vfree(dev->lk_table.table); err_lk: kfree(dev->qp_table); err_qpt: @@ -2319,8 +2326,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) sizeof(struct qib_pio_header), dev->pio_hdrs, dev->pio_hdrs_phys); lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - free_pages((unsigned long) dev->lk_table.table, - get_order(lk_tab_size)); + vfree(dev->lk_table.table); kfree(dev->qp_table); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index bfc8948fdd359..44ca28c83fe60 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -647,6 +647,8 @@ struct qib_qpn_table { struct qpn_map map[QPNMAP_ENTRIES]; }; +#define MAX_LKEY_TABLE_BITS 23 + struct qib_lkey_table { spinlock_t lock; /* protect changes in this struct */ u32 next; /* next unused index (speeds search) */ From 909246d01e085df6d6808fea35146910c0f30b09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Aug 2015 11:00:37 +0200 Subject: [PATCH 0899/2091] IB/uverbs: reject invalid or unknown opcodes commit b632ffa7cee439ba5dce3b3bc4a5cbe2b3e20133 upstream. We have many WR opcodes that are only supported in kernel space and/or require optional information to be copied into the WR structure. Reject all those not explicitly handled so that we can't pass invalid information to drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs_cmd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a9f048990dfcd..ccc2494b4ea73 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2244,6 +2244,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->send_flags = user_wr->send_flags; if (is_ud) { + if (next->opcode != IB_WR_SEND && + next->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); if (!next->wr.ud.ah) { @@ -2283,9 +2289,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, user_wr->wr.atomic.compare_add; next->wr.atomic.swap = user_wr->wr.atomic.swap; next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + case IB_WR_SEND: break; default: - break; + ret = -EINVAL; + goto out_put; } } From ce3e4e26260af851fb0188306c8bb47748955ef8 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 13 Aug 2015 18:32:03 +0300 Subject: [PATCH 0900/2091] IB/uverbs: Fix race between ib_uverbs_open and remove_one commit 35d4a0b63dc0c6d1177d4f532a9deae958f0662c upstream. Fixes: 2a72f212263701b927559f6850446421d5906c41 ("IB/uverbs: Remove dev_table") Before this commit there was a device look-up table that was protected by a spin_lock used by ib_uverbs_open and by ib_uverbs_remove_one. When it was dropped and container_of was used instead, it enabled the race with remove_one as dev might be freed just after: dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev) but before the kref_get. In addition, this buggy patch added some dead code as container_of(x,y,z) can never be NULL and so dev can never be NULL. As a result the comment above ib_uverbs_open saying "the open method will either immediately run -ENXIO" is wrong as it can never happen. The solution follows Jason Gunthorpe suggestion from below URL: https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg25692.html cdev will hold a kref on the parent (the containing structure, ib_uverbs_device) and only when that kref is released it is guaranteed that open will never be called again. In addition, fixes the active count scheme to use an atomic not a kref to prevent WARN_ON as pointed by above comment from Jason. Signed-off-by: Yishai Hadas Signed-off-by: Shachar Raindel Reviewed-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/uverbs.h | 3 +- drivers/infiniband/core/uverbs_main.c | 43 +++++++++++++++++++-------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b716b08156446..bebf11a6622a7 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -85,7 +85,7 @@ */ struct ib_uverbs_device { - struct kref ref; + atomic_t refcount; int num_comp_vectors; struct completion comp; struct device *dev; @@ -94,6 +94,7 @@ struct ib_uverbs_device { struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; + struct kobject kobj; }; struct ib_uverbs_event_file { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 88cce9bb72fea..09686d49d4c14 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -129,14 +129,18 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static void ib_uverbs_release_dev(struct kref *ref) +static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = - container_of(ref, struct ib_uverbs_device, ref); + container_of(kobj, struct ib_uverbs_device, kobj); - complete(&dev->comp); + kfree(dev); } +static struct kobj_type ib_uverbs_dev_ktype = { + .release = ib_uverbs_release_dev, +}; + static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = @@ -302,13 +306,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, return context->device->dealloc_ucontext(context); } +static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) +{ + complete(&dev->comp); +} + static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); - kref_put(&file->device->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&file->device->refcount)) + ib_uverbs_comp_dev(file->device); kfree(file); } @@ -742,9 +752,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) int ret; dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); - if (dev) - kref_get(&dev->ref); - else + if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { @@ -765,6 +773,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) mutex_init(&file->mutex); filp->private_data = file; + kobject_get(&dev->kobj); return nonseekable_open(inode, filp); @@ -772,13 +781,16 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) module_put(dev->ib_dev->owner); err: - kref_put(&dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&dev->refcount)) + ib_uverbs_comp_dev(dev); + return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_device *dev = file->device; ib_uverbs_cleanup_ucontext(file, file->ucontext); @@ -786,6 +798,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); + kobject_put(&dev->kobj); return 0; } @@ -881,10 +894,11 @@ static void ib_uverbs_add_one(struct ib_device *device) if (!uverbs_dev) return; - kref_init(&uverbs_dev->ref); + atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); + kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -911,6 +925,7 @@ static void ib_uverbs_add_one(struct ib_device *device) cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; @@ -941,9 +956,10 @@ static void ib_uverbs_add_one(struct ib_device *device) clear_bit(devnum, overflow_map); err: - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); return; } @@ -963,9 +979,10 @@ static void ib_uverbs_remove_one(struct ib_device *device) else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); - kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); + if (atomic_dec_and_test(&uverbs_dev->refcount)) + ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); - kfree(uverbs_dev); + kobject_put(&uverbs_dev->kobj); } static char *uverbs_devnode(struct device *dev, umode_t *mode) From cdb6f383011c73248db3974bc5175ba45053005d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Aug 2015 18:32:48 +0300 Subject: [PATCH 0901/2091] IB/iser: Fix missing return status check in iser_send_data_out commit d16739055bd1f562ae4d83e69f7f7f1cefcfbe16 upstream. Since commit "IB/iser: Fix race between iser connection teardown..." iser_initialize_task_headers() might fail, so we need to check that. Fixes: 7414dde0a6c3a958e (IB/iser: Fix race between iser connection ...) Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iser_initiator.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 3e2118e8ed879..0a47f42fec24e 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -454,7 +454,7 @@ int iser_send_data_out(struct iscsi_conn *conn, unsigned long buf_offset; unsigned long data_seg_len; uint32_t itt; - int err = 0; + int err; struct ib_sge *tx_dsg; itt = (__force uint32_t)hdr->itt; @@ -475,7 +475,9 @@ int iser_send_data_out(struct iscsi_conn *conn, memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); /* build the tx desc */ - iser_initialize_task_headers(task, tx_desc); + err = iser_initialize_task_headers(task, tx_desc); + if (err) + goto send_data_out_error; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT]; tx_dsg = &tx_desc->tx_sg[1]; @@ -502,7 +504,7 @@ int iser_send_data_out(struct iscsi_conn *conn, send_data_out_error: kmem_cache_free(ig.desc_cache, tx_desc); - iser_err("conn %p failed err %d\n",conn, err); + iser_err("conn %p failed err %d\n", conn, err); return err; } From 6465d5d793ecc69c78c1ad7da2e4bbd13de9f063 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Aug 2015 18:32:50 +0300 Subject: [PATCH 0902/2091] IB/iser: Fix possible bogus DMA unmapping commit 8d5944d80359e645feb2ebd069a6f4caf7825e40 upstream. If iser_initialize_task_headers() routine failed before dma mapping, we should not attempt to unmap in cleanup_task(). Fixes: 7414dde0a6c3a958e (IB/iser: Fix race between iser connection ...) Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iscsi_iser.c | 12 ++++++++---- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 6a594aac22900..c933d882c35c4 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -201,6 +201,7 @@ iser_initialize_task_headers(struct iscsi_task *task, goto out; } + tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; @@ -360,16 +361,19 @@ iscsi_iser_task_xmit(struct iscsi_task *task) static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_tx_desc *tx_desc = &iser_task->desc; - struct iser_conn *iser_conn = task->conn->dd_data; + struct iser_tx_desc *tx_desc = &iser_task->desc; + struct iser_conn *iser_conn = task->conn->dd_data; struct iser_device *device = iser_conn->ib_conn.device; /* DEVICE_REMOVAL event might have already released the device */ if (!device) return; - ib_dma_unmap_single(device->ib_device, - tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); + if (likely(tx_desc->mapped)) { + ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, + ISER_HEADERS_LEN, DMA_TO_DEVICE); + tx_desc->mapped = false; + } /* mgmt tasks do not need special cleanup */ if (!task->sc) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 262ba1f8ee507..d2b6caf7694d7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -270,6 +270,7 @@ enum iser_desc_type { * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task + * @mapped: Is the task header mapped */ struct iser_tx_desc { struct iser_hdr iser_header; @@ -278,6 +279,7 @@ struct iser_tx_desc { u64 dma_addr; struct ib_sge tx_sg[2]; int num_sge; + bool mapped; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ From 26783066148258ea62996c30d9c063dd193cb7a2 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 1 Sep 2015 09:56:56 +0300 Subject: [PATCH 0903/2091] IB/mlx5: avoid destroying a NULL mr in reg_user_mr error flow commit 11d748045c6dadb279d1acdb6d2ea8f3f2ede85b upstream. The mlx5_ib_reg_user_mr() function will attempt to call clean_mr() in its error flow even though there is never a case where the error flow occurs with a valid MR pointer to destroy. Remove the clean_mr() call and the incorrect comment above it. Fixes: b4cfe447d47b ("IB/mlx5: Implement on demand paging by adding support for MMU notifiers") Cc: Eli Cohen Signed-off-by: Haggai Eran Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 71c5935838649..0c52f078759c0 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1119,19 +1119,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; error: - /* - * Destroy the umem *before* destroying the MR, to ensure we - * will not have any in-flight notifiers when destroying the - * MR. - * - * As the MR is completely invalid to begin with, and this - * error path is only taken if we can't push the mr entry into - * the pagefault tree, this is safe. - */ - ib_umem_release(umem); - /* Kill the MR, and return an error code. */ - clean_mr(mr); return ERR_PTR(err); } From fe5e52bf0b5b8a2ae1fefdb359fd8616ab7c9815 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 30 Jul 2015 17:34:21 +0300 Subject: [PATCH 0904/2091] IB/mlx4: Fix potential deadlock when sending mad to wire commit 90c1d8b6350cca9d8a234f03c77a317a7613bcee upstream. send_mad_to_wire takes the same spinlock that is taken in the interrupt context. Therefore, it needs irqsave/restore. Fixes: b9c5d6a64358 ('IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV') Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/mcg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index ed327e6c8fdca..a0559a8af4f4d 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -206,15 +206,16 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) { struct mlx4_ib_dev *dev = ctx->dev; struct ib_ah_attr ah_attr; + unsigned long flags; - spin_lock(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, flags); if (!dev->sm_ah[ctx->port - 1]) { /* port is not yet Active, sm_ah not ready */ - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); return -EAGAIN; } mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, NULL, mad); From 6c634f2d7447b3056f69c4c6442e5eeef839a441 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 30 Jul 2015 17:34:23 +0300 Subject: [PATCH 0905/2091] IB/mlx4: Forbid using sysfs to change RoCE pkeys commit 2b135db3e81301d0452e6aa107349abe67b097d6 upstream. The pkey mapping for RoCE must remain the default mapping: VFs: virtual index 0 = mapped to real index 0 (0xFFFF) All others indices: mapped to a real pkey index containing an invalid pkey. PF: virtual index i = real index i. Don't allow users to change these mappings using files found in sysfs. Fixes: c1e7e466120b ('IB/mlx4: Add iov directory in sysfs under the ib device') Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/sysfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 6797108ce8735..69fb5ba94d0f2 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -640,6 +640,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) struct mlx4_port *p; int i; int ret; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) == + IB_LINK_LAYER_ETHERNET; p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) @@ -657,7 +659,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) p->pkey_group.name = "pkey_idx"; p->pkey_group.attrs = - alloc_group_attrs(show_port_pkey, store_port_pkey, + alloc_group_attrs(show_port_pkey, + is_eth ? NULL : store_port_pkey, dev->dev->caps.pkey_table_len[port_num]); if (!p->pkey_group.attrs) { ret = -ENOMEM; From c6632eb0bba08cb911f19ef3a96ff7df346c80ae Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 30 Jul 2015 17:34:24 +0300 Subject: [PATCH 0906/2091] IB/mlx4: Use correct SL on AH query under RoCE commit 5e99b139f1b68acd65e36515ca347b03856dfb5a upstream. The mlx4 IB driver implementation for ib_query_ah used a wrong offset (28 instead of 29) when link type is Ethernet. Fixed to use the correct one. Fixes: fa417f7b520e ('IB/mlx4: Add support for IBoE') Signed-off-by: Shani Michaeli Signed-off-by: Noa Osherovich Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/ah.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index f50a546224adf..33fdd50123f73 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -148,9 +148,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + if (ll == IB_LINK_LAYER_ETHERNET) + ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29; + else + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; if (ah->av.ib.stat_rate) ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; From 6a85820d32b8f908e2dd41b75977fb77768118c2 Mon Sep 17 00:00:00 2001 From: Ariel Nahum Date: Sun, 9 Aug 2015 11:16:27 +0300 Subject: [PATCH 0907/2091] IB/mlx4: Fix incorrect cq flushing in error state commit 799cdaf8a98f13d4fba3162e21e1e63f21045010 upstream. When handling a device internal error, the driver is responsible to drain the completion queue with flush errors. In case a completion queue was assigned to multiple send queues, the driver iterates over the send queues and generates flush errors of inflight wqes. The driver must correctly pass the wc array with an offset as a result of the previous send queue iteration. Not doing so will overwrite previously set completions and return a wrong number of polled completions which includes ones which were not correctly set. Fixes: 35f05dabf95a (IB/mlx4: Reset flow support for IB kernel ULPs) Signed-off-by: Ariel Nahum Signed-off-by: Sagi Grimberg Cc: Yishai Hadas Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 0176caa5792c4..2857ed89725e6 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -629,7 +629,7 @@ static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries, * simulated FLUSH_ERR completions */ list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) { - mlx4_ib_qp_sw_comp(qp, num_entries, wc, npolled, 1); + mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 1); if (*npolled >= num_entries) goto out; } From 79866f313766dd2e8e118053e1eb8bd5abd608ac Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 9 Sep 2015 18:01:08 +0300 Subject: [PATCH 0908/2091] stmmac: fix check for phydev being open commit dfc50fcaad574e5c8c85cbc83eca1426b2413fa4 upstream. Current check of phydev with IS_ERR(phydev) may make not much sense because of_phy_connect() returns NULL on failure instead of error value. Still for checking result of phy_connect() IS_ERR() makes perfect sense. So let's use combined check IS_ERR_OR_NULL() that covers both cases. Cc: Sergei Shtylyov Cc: Giuseppe Cavallaro Cc: linux-kernel@vger.kernel.org Cc: David Miller Signed-off-by: Alexey Brodkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2c5ce2baca871..57ab4629ea2fa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -829,8 +829,11 @@ static int stmmac_init_phy(struct net_device *dev) phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface); - if (IS_ERR(phydev)) { + if (IS_ERR_OR_NULL(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); + if (!phydev) + return -ENODEV; + return PTR_ERR(phydev); } From 4d95fbcd38406f6747aeb86560a15a95e4527cc4 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 24 Jun 2015 11:47:41 +0300 Subject: [PATCH 0909/2091] stmmac: troubleshoot unexpected bits in des0 & des1 commit f1590670ce069eefeb93916391a67643e6ad1630 upstream. Current implementation of descriptor init procedure only takes care about setting/clearing ownership flag in "des0"/"des1" fields while it is perfectly possible to get unexpected bits set because of the following factors: [1] On driver probe underlying memory allocated with dma_alloc_coherent() might not be zeroed and so it will be filled with garbage. [2] During driver operation some bits could be set by SD/MMC controller (for example error flags etc). And unexpected and/or randomly set flags in "des0"/"des1" fields may lead to unpredictable behavior of GMAC DMA block. This change addresses both items above with: [1] Use of dma_zalloc_coherent() instead of simple dma_alloc_coherent() to make sure allocated memory is zeroed. That shouldn't affect performance because this allocation only happens once on driver probe. [2] Do explicit zeroing of both "des0" and "des1" fields of all buffer descriptors during initialization of DMA transfer. And while at it fixed identation of dma_free_coherent() counterpart as well. Signed-off-by: Alexey Brodkin Cc: Giuseppe Cavallaro Cc: arc-linux-dev@synopsys.com Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Cc: David Miller Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/descs.h | 2 + .../net/ethernet/stmicro/stmmac/enh_desc.c | 3 +- .../net/ethernet/stmicro/stmmac/norm_desc.c | 3 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 44 +++++++++---------- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index ad39960380180..799c2929c5365 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -158,6 +158,8 @@ struct dma_desc { u32 buffer2_size:13; u32 reserved4:3; } etx; /* -- enhanced -- */ + + u64 all_flags; } des01; unsigned int des2; unsigned int des3; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 1e2bcf5f89e13..7d944449f5eff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -240,6 +240,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.erx.own = 1; p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; @@ -254,7 +255,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.etx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ehn_desc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 35ad4f427ae2b..48c3456445b28 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -123,6 +123,7 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { + p->des01.all_flags = 0; p->des01.rx.own = 1; p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; @@ -137,7 +138,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) { - p->des01.tx.own = 0; + p->des01.all_flags = 0; if (mode == STMMAC_CHAIN_MODE) ndesc_tx_set_on_chain(p, end); else diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 57ab4629ea2fa..c274cdc5df1ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1192,41 +1192,41 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv) goto err_tx_skbuff; if (priv->extend_desc) { - priv->dma_erx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct - dma_extended_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_erx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct + dma_extended_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_erx) goto err_dma; - priv->dma_etx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct - dma_extended_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_etx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct + dma_extended_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_etx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_extended_desc), - priv->dma_erx, priv->dma_rx_phy); + sizeof(struct dma_extended_desc), + priv->dma_erx, priv->dma_rx_phy); goto err_dma; } } else { - priv->dma_rx = dma_alloc_coherent(priv->device, rxsize * - sizeof(struct dma_desc), - &priv->dma_rx_phy, - GFP_KERNEL); + priv->dma_rx = dma_zalloc_coherent(priv->device, rxsize * + sizeof(struct dma_desc), + &priv->dma_rx_phy, + GFP_KERNEL); if (!priv->dma_rx) goto err_dma; - priv->dma_tx = dma_alloc_coherent(priv->device, txsize * - sizeof(struct dma_desc), - &priv->dma_tx_phy, - GFP_KERNEL); + priv->dma_tx = dma_zalloc_coherent(priv->device, txsize * + sizeof(struct dma_desc), + &priv->dma_tx_phy, + GFP_KERNEL); if (!priv->dma_tx) { dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_desc), - priv->dma_rx, priv->dma_rx_phy); + sizeof(struct dma_desc), + priv->dma_rx, priv->dma_rx_phy); goto err_dma; } } From 2a0538e207c245fa649a090eaa3585302eb950e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20St=C3=BCbner?= Date: Sun, 21 Jun 2015 21:52:52 +0200 Subject: [PATCH 0910/2091] net: stmmac: dwmac-rk: Fix clk rate when provided by soc commit c48fa33c1fb2ccdb4bcc863a7b841f11efe0f8b0 upstream. The first iteration of the dwmac-rk support did access an intermediate clock directly below the pll selector. This was removed in a subsequent revision, but the clock and one invocation remained. This results in the driver trying to set the rate of a non-existent clock when the soc and not some external source provides the phy clock for RMII phys. So set the rate of the correct clock and remove the remaining now completely unused definition. Fixes: 436f5ae08f9d ("GMAC: add driver for Rockchip RK3288 SoCs integrated GMAC") Signed-off-by: Heiko Stuebner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6249a4ec08f05..573708123338f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -38,7 +38,6 @@ struct rk_priv_data { bool clock_input; struct clk *clk_mac; - struct clk *clk_mac_pll; struct clk *gmac_clkin; struct clk *mac_clk_rx; struct clk *mac_clk_tx; @@ -208,7 +207,7 @@ static int gmac_clk_init(struct rk_priv_data *bsp_priv) dev_info(dev, "%s: clock input from PHY\n", __func__); } else { if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) - clk_set_rate(bsp_priv->clk_mac_pll, 50000000); + clk_set_rate(bsp_priv->clk_mac, 50000000); } return 0; From d9c410f96c332b82e4b3de03f6e5f45902f1dbfb Mon Sep 17 00:00:00 2001 From: Hin-Tak Leung Date: Wed, 9 Sep 2015 15:38:04 -0700 Subject: [PATCH 0911/2091] hfs,hfsplus: cache pages correctly between bnode_create and bnode_free commit 7cb74be6fd827e314f81df3c5889b87e4c87c569 upstream. Pages looked up by __hfs_bnode_create() (called by hfs_bnode_create() and hfs_bnode_find() for finding or creating pages corresponding to an inode) are immediately kmap()'ed and used (both read and write) and kunmap()'ed, and should not be page_cache_release()'ed until hfs_bnode_free(). This patch fixes a problem I first saw in July 2012: merely running "du" on a large hfsplus-mounted directory a few times on a reasonably loaded system would get the hfsplus driver all confused and complaining about B-tree inconsistencies, and generates a "BUG: Bad page state". Most recently, I can generate this problem on up-to-date Fedora 22 with shipped kernel 4.0.5, by running "du /" (="/" + "/home" + "/mnt" + other smaller mounts) and "du /mnt" simultaneously on two windows, where /mnt is a lightly-used QEMU VM image of the full Mac OS X 10.9: $ df -i / /home /mnt Filesystem Inodes IUsed IFree IUse% Mounted on /dev/mapper/fedora-root 3276800 551665 2725135 17% / /dev/mapper/fedora-home 52879360 716221 52163139 2% /home /dev/nbd0p2 4294967295 1387818 4293579477 1% /mnt After applying the patch, I was able to run "du /" (60+ times) and "du /mnt" (150+ times) continuously and simultaneously for 6+ hours. There are many reports of the hfsplus driver getting confused under load and generating "BUG: Bad page state" or other similar issues over the years. [1] The unpatched code [2] has always been wrong since it entered the kernel tree. The only reason why it gets away with it is that the kmap/memcpy/kunmap follow very quickly after the page_cache_release() so the kernel has not had a chance to reuse the memory for something else, most of the time. The current RW driver appears to have followed the design and development of the earlier read-only hfsplus driver [3], where-by version 0.1 (Dec 2001) had a B-tree node-centric approach to read_cache_page()/page_cache_release() per bnode_get()/bnode_put(), migrating towards version 0.2 (June 2002) of caching and releasing pages per inode extents. When the current RW code first entered the kernel [2] in 2005, there was an REF_PAGES conditional (and "//" commented out code) to switch between B-node centric paging to inode-centric paging. There was a mistake with the direction of one of the REF_PAGES conditionals in __hfs_bnode_create(). In a subsequent "remove debug code" commit [4], the read_cache_page()/page_cache_release() per bnode_get()/bnode_put() were removed, but a page_cache_release() was mistakenly left in (propagating the "REF_PAGES <-> !REF_PAGE" mistake), and the commented-out page_cache_release() in bnode_release() (which should be spanned by !REF_PAGES) was never enabled. References: [1]: Michael Fox, Apr 2013 http://www.spinics.net/lists/linux-fsdevel/msg63807.html ("hfsplus volume suddenly inaccessable after 'hfs: recoff %d too large'") Sasha Levin, Feb 2015 http://lkml.org/lkml/2015/2/20/85 ("use after free") https://bugs.launchpad.net/ubuntu/+source/linux/+bug/740814 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1027887 https://bugzilla.kernel.org/show_bug.cgi?id=42342 https://bugzilla.kernel.org/show_bug.cgi?id=63841 https://bugzilla.kernel.org/show_bug.cgi?id=78761 [2]: http://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit/\ fs/hfs/bnode.c?id=d1081202f1d0ee35ab0beb490da4b65d4bc763db commit d1081202f1d0ee35ab0beb490da4b65d4bc763db Author: Andrew Morton Date: Wed Feb 25 16:17:36 2004 -0800 [PATCH] HFS rewrite http://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit/\ fs/hfsplus/bnode.c?id=91556682e0bf004d98a529bf829d339abb98bbbd commit 91556682e0bf004d98a529bf829d339abb98bbbd Author: Andrew Morton Date: Wed Feb 25 16:17:48 2004 -0800 [PATCH] HFS+ support [3]: http://sourceforge.net/projects/linux-hfsplus/ http://sourceforge.net/projects/linux-hfsplus/files/Linux%202.4.x%20patch/hfsplus%200.1/ http://sourceforge.net/projects/linux-hfsplus/files/Linux%202.4.x%20patch/hfsplus%200.2/ http://linux-hfsplus.cvs.sourceforge.net/viewvc/linux-hfsplus/linux/\ fs/hfsplus/bnode.c?r1=1.4&r2=1.5 Date: Thu Jun 6 09:45:14 2002 +0000 Use buffer cache instead of page cache in bnode.c. Cache inode extents. [4]: http://git.kernel.org/cgit/linux/kernel/git/\ stable/linux-stable.git/commit/?id=a5e3985fa014029eb6795664c704953720cc7f7d commit a5e3985fa014029eb6795664c704953720cc7f7d Author: Roman Zippel Date: Tue Sep 6 15:18:47 2005 -0700 [PATCH] hfs: remove debug code Signed-off-by: Hin-Tak Leung Signed-off-by: Sergei Antonov Reviewed-by: Anton Altaparmakov Reported-by: Sasha Levin Cc: Al Viro Cc: Christoph Hellwig Cc: Vyacheslav Dubeyko Cc: Sougata Santra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hfs/bnode.c | 9 ++++----- fs/hfsplus/bnode.c | 3 --- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index d3fa6bd9503e7..221719eac5de6 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -398,11 +397,11 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) void hfs_bnode_free(struct hfs_bnode *node) { - //int i; + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); kfree(node); } diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 759708fd9331c..63924662aaf3e 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -454,7 +454,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -566,13 +565,11 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) void hfs_bnode_free(struct hfs_bnode *node) { -#if 0 int i; for (i = 0; i < node->tree->pages_per_bnode; i++) if (node->page[i]) page_cache_release(node->page[i]); -#endif kfree(node); } From 76763f58c0f7d27d1aaee039a89969964d73bf7d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 9 Sep 2015 15:39:12 -0700 Subject: [PATCH 0912/2091] lib/decompressors: use real out buf size for gunzip with kernel commit 2d3862d26e67a59340ba1cf1748196c76c5787de upstream. When loading x86 64bit kernel above 4GiB with patched grub2, got kernel gunzip error. | early console in decompress_kernel | decompress_kernel: | input: [0x807f2143b4-0x807ff61aee] | output: [0x807cc00000-0x807f3ea29b] 0x027ea29c: output_len | boot via startup_64 | KASLR using RDTSC... | new output: [0x46fe000000-0x470138cfff] 0x0338d000: output_run_size | decompress: [0x46fe000000-0x47007ea29b] <=== [0x807f2143b4-0x807ff61aee] | | Decompressing Linux... gz... | | uncompression error | | -- System halted the new buffer is at 0x46fe000000ULL, decompressor_gzip is using 0xffffffb901ffffff as out_len. gunzip in lib/zlib_inflate/inflate.c cap that len to 0x01ffffff and decompress fails later. We could hit this problem with crashkernel booting that uses kexec loading kernel above 4GiB. We have decompress_* support: 1. inbuf[]/outbuf[] for kernel preboot. 2. inbuf[]/flush() for initramfs 3. fill()/flush() for initrd. This bug only affect kernel preboot path that use outbuf[]. Add __decompress and take real out_buf_len for gunzip instead of guessing wrong buf size. Fixes: 1431574a1c4 (lib/decompressors: fix "no limit" output buffer length) Signed-off-by: Yinghai Lu Cc: Alexandre Courbot Cc: Jon Medhurst Cc: Stephen Warren Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/compressed/decompress.c | 2 +- arch/m32r/boot/compressed/misc.c | 3 ++- arch/mips/boot/compressed/decompress.c | 4 ++-- arch/s390/boot/compressed/misc.c | 2 +- arch/sh/boot/compressed/misc.c | 2 +- arch/unicore32/boot/compressed/misc.c | 4 ++-- arch/x86/boot/compressed/misc.c | 3 ++- lib/decompress_bunzip2.c | 6 ++--- lib/decompress_inflate.c | 31 +++++++++++++++++++++----- lib/decompress_unlz4.c | 6 ++--- lib/decompress_unlzma.c | 7 +++--- lib/decompress_unlzo.c | 13 ++++++++++- lib/decompress_unxz.c | 12 +++++++++- 13 files changed, 69 insertions(+), 26 deletions(-) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index bd245d34952d2..a0765e7ed6c7d 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -57,5 +57,5 @@ extern char * strstr(const char * s1, const char *s2); int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { - return decompress(input, len, NULL, NULL, output, NULL, error); + return __decompress(input, len, NULL, NULL, output, 0, NULL, error); } diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 28a09529f2069..3a76927458681 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c @@ -86,6 +86,7 @@ decompress_kernel(int mmu_on, unsigned char *zimage_data, free_mem_end_ptr = free_mem_ptr + BOOT_HEAP_SIZE; puts("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output_data, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output_data, 0, + NULL, error); puts("done.\nBooting the kernel.\n"); } diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 54831069a2062..080cd53bac369 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -111,8 +111,8 @@ void decompress_kernel(unsigned long boot_heap_start) puts("\n"); /* Decompress the kernel with according algorithm */ - decompress((char *)zimage_start, zimage_size, 0, 0, - (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, error); + __decompress((char *)zimage_start, zimage_size, 0, 0, + (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); /* FIXME: should we flush cache here? */ puts("Now, booting the kernel...\n"); diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 42506b371b741..4da604ebf6fd8 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -167,7 +167,7 @@ unsigned long decompress_kernel(void) #endif puts("Uncompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); puts("Ok, booting the kernel.\n"); return (unsigned long) output; } diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index 95470a472d2cf..208a9753ab38c 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -132,7 +132,7 @@ void decompress_kernel(void) puts("Uncompressing Linux... "); cache_control(CACHE_ENABLE); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); cache_control(CACHE_DISABLE); puts("Ok, booting the kernel.\n"); } diff --git a/arch/unicore32/boot/compressed/misc.c b/arch/unicore32/boot/compressed/misc.c index 176d5bda3559d..5c65dfee278c0 100644 --- a/arch/unicore32/boot/compressed/misc.c +++ b/arch/unicore32/boot/compressed/misc.c @@ -119,8 +119,8 @@ unsigned long decompress_kernel(unsigned long output_start, output_ptr = get_unaligned_le32(tmp); arch_decomp_puts("Uncompressing Linux..."); - decompress(input_data, input_data_end - input_data, NULL, NULL, - output_data, NULL, error); + __decompress(input_data, input_data_end - input_data, NULL, NULL, + output_data, 0, NULL, error); arch_decomp_puts(" done, booting the kernel.\n"); return output_ptr; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a107b935e22fb..e28437e0f7088 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -424,7 +424,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); - decompress(input_data, input_len, NULL, NULL, output, NULL, error); + __decompress(input_data, input_len, NULL, NULL, output, output_len, + NULL, error); parse_elf(output); /* * 32-bit always performs relocations. 64-bit relocations are only diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 6dd0335ea61b2..0234361b24b89 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -743,12 +743,12 @@ STATIC int INIT bunzip2(unsigned char *buf, long len, } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, long len, +STATIC int INIT __decompress(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *outbuf, + unsigned char *outbuf, long olen, long *pos, - void(*error)(char *x)) + void (*error)(char *x)) { return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); } diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d4c7891635ecc..555c06bf20daa 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -1,4 +1,5 @@ #ifdef STATIC +#define PREBOOT /* Pre-boot environment: included */ /* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots @@ -33,23 +34,23 @@ static long INIT nofill(void *buffer, unsigned long len) } /* Included from initramfs et al code */ -STATIC int INIT gunzip(unsigned char *buf, long len, +STATIC int INIT __gunzip(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *out_buf, + unsigned char *out_buf, long out_len, long *pos, void(*error)(char *x)) { u8 *zbuf; struct z_stream_s *strm; int rc; - size_t out_len; rc = -1; if (flush) { out_len = 0x8000; /* 32 K */ out_buf = malloc(out_len); } else { - out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ + if (!out_len) + out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ } if (!out_buf) { error("Out of memory while allocating output buffer"); @@ -181,4 +182,24 @@ STATIC int INIT gunzip(unsigned char *buf, long len, return rc; /* returns Z_OK (0) if successful */ } -#define decompress gunzip +#ifndef PREBOOT +STATIC int INIT gunzip(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, + long *pos, + void (*error)(char *x)) +{ + return __gunzip(buf, len, fill, flush, out_buf, 0, pos, error); +} +#else +STATIC int INIT __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, long out_len, + long *pos, + void (*error)(char *x)) +{ + return __gunzip(buf, len, fill, flush, out_buf, out_len, pos, error); +} +#endif diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 40f66ebe57b77..036fc882cd725 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -196,12 +196,12 @@ STATIC inline int INIT unlz4(u8 *input, long in_len, } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, long in_len, +STATIC int INIT __decompress(unsigned char *buf, long in_len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *output, + unsigned char *output, long out_len, long *posp, - void(*error)(char *x) + void (*error)(char *x) ) { return unlz4(buf, in_len - 4, fill, flush, output, posp, error); diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0be83af62b884..decb64629c146 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -667,13 +667,12 @@ STATIC inline int INIT unlzma(unsigned char *buf, long in_len, } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, long in_len, +STATIC int INIT __decompress(unsigned char *buf, long in_len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), - unsigned char *output, + unsigned char *output, long out_len, long *posp, - void(*error)(char *x) - ) + void (*error)(char *x)) { return unlzma(buf, in_len - 4, fill, flush, output, posp, error); } diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index b94a31bdd87d1..f4c158e3a022a 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -31,6 +31,7 @@ */ #ifdef STATIC +#define PREBOOT #include "lzo/lzo1x_decompress_safe.c" #else #include @@ -287,4 +288,14 @@ STATIC int INIT unlzo(u8 *input, long in_len, return ret; } -#define decompress unlzo +#ifdef PREBOOT +STATIC int INIT __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, long olen, + long *pos, + void (*error)(char *x)) +{ + return unlzo(buf, len, fill, flush, out_buf, pos, error); +} +#endif diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index b07a78340e9d3..25d59a95bd668 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -394,4 +394,14 @@ STATIC int INIT unxz(unsigned char *in, long in_size, * This macro is used by architecture-specific files to decompress * the kernel image. */ -#define decompress unxz +#ifdef XZ_PREBOOT +STATIC int INIT __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *out_buf, long olen, + long *pos, + void (*error)(char *x)) +{ + return unxz(buf, len, fill, flush, out_buf, pos, error); +} +#endif From 164802e408904270d95f74de66404ba54df3fc60 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2015 14:57:14 -0400 Subject: [PATCH 0913/2091] jbd2: avoid infinite loop when destroying aborted journal commit 841df7df196237ea63233f0f9eaa41db53afd70f upstream. Commit 6f6a6fda2945 "jbd2: fix ocfs2 corrupt when updating journal superblock fails" changed jbd2_cleanup_journal_tail() to return EIO when the journal is aborted. That makes logic in jbd2_log_do_checkpoint() bail out which is fine, except that jbd2_journal_destroy() expects jbd2_log_do_checkpoint() to always make a progress in cleaning the journal. Without it jbd2_journal_destroy() just loops in an infinite loop. Fix jbd2_journal_destroy() to cleanup journal checkpoint lists of jbd2_log_do_checkpoint() fails with error. Reported-by: Eryu Guan Tested-by: Eryu Guan Fixes: 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/checkpoint.c | 39 +++++++++++++++++++++++++++++++++------ fs/jbd2/commit.c | 2 +- fs/jbd2/journal.c | 11 ++++++++++- include/linux/jbd2.h | 3 ++- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 4227dc4f7437d..8c44654ce2748 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * journal_clean_one_cp_list * * Find all the written-back checkpoint buffers in the given list and - * release them. + * release them. If 'destroy' is set, clean all buffers unconditionally. * * Called with j_list_lock held. * Returns 1 if we freed the transaction, 0 otherwise. */ -static int journal_clean_one_cp_list(struct journal_head *jh) +static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) { struct journal_head *last_jh; struct journal_head *next_jh = jh; @@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh) do { jh = next_jh; next_jh = jh->b_cpnext; - ret = __try_to_free_cp_buf(jh); + if (!destroy) + ret = __try_to_free_cp_buf(jh); + else + ret = __jbd2_journal_remove_checkpoint(jh) + 1; if (!ret) return freed; if (ret == 2) @@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh) * journal_clean_checkpoint_list * * Find all the written-back checkpoint buffers in the journal and release them. + * If 'destroy' is set, release all buffers unconditionally. * * Called with j_list_lock held. */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal) +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) { transaction_t *transaction, *last_transaction, *next_transaction; int ret; @@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); + ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, + destroy); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if @@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) * we can possibly see not yet submitted buffers on io_list */ ret = journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list); + t_checkpoint_io_list, destroy); if (need_resched()) return; /* @@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) } while (transaction != last_transaction); } +/* + * Remove buffers from all checkpoint lists as journal is aborted and we just + * need to free memory + */ +void jbd2_journal_destroy_checkpoint(journal_t *journal) +{ + /* + * We loop because __jbd2_journal_clean_checkpoint_list() may abort + * early due to a need of rescheduling. + */ + while (1) { + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) { + spin_unlock(&journal->j_list_lock); + break; + } + __jbd2_journal_clean_checkpoint_list(journal, true); + spin_unlock(&journal->j_list_lock); + cond_resched(); + } +} + /* * journal_remove_checkpoint: called after a buffer has been committed * to disk (either by being write-back flushed to disk, or being diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b73e0215baa7c..362e5f614450e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * frees some memory */ spin_lock(&journal->j_list_lock); - __jbd2_journal_clean_checkpoint_list(journal); + __jbd2_journal_clean_checkpoint_list(journal, false); spin_unlock(&journal->j_list_lock); jbd_debug(3, "JBD2: commit phase 1\n"); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 112fad9e1e20a..7003c09257601 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1708,8 +1708,17 @@ int jbd2_journal_destroy(journal_t *journal) while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); mutex_lock(&journal->j_checkpoint_mutex); - jbd2_log_do_checkpoint(journal); + err = jbd2_log_do_checkpoint(journal); mutex_unlock(&journal->j_checkpoint_mutex); + /* + * If checkpointing failed, just free the buffers to avoid + * looping forever + */ + if (err) { + jbd2_journal_destroy_checkpoint(journal); + spin_lock(&journal->j_list_lock); + break; + } spin_lock(&journal->j_list_lock); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index edb640ae9a948..eb1cebed3f36e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1042,8 +1042,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal); +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); int __jbd2_journal_remove_checkpoint(struct journal_head *); +void jbd2_journal_destroy_checkpoint(journal_t *journal); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); From ba6143d6e411fb94a9d73fffbaf309c126f3c16a Mon Sep 17 00:00:00 2001 From: Angga Date: Fri, 3 Jul 2015 14:40:52 +1200 Subject: [PATCH 0914/2091] ipv6: Make MLD packets to only be processed locally [ Upstream commit 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 ] Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") MLD packets were only processed locally. After the change, a copy of MLD packet goes through ip6_mr_input, causing MRT6MSG_NOCACHE message to be generated to user space. Make MLD packet only processed locally. Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") Signed-off-by: Hermin Anggawijaya Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464eba5efd..57990c929cd81 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } From 56fd491a29b829fac4ffb30f1fcec2b386455ad0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 6 Jul 2015 15:51:20 +0200 Subject: [PATCH 0915/2091] rhashtable: fix for resize events during table walk [ Upstream commit 142b942a75cb10ede1b42bf85368d41449ab4e3b ] If rhashtable_walk_next detects a resize operation in progress, it jumps to the new table and continues walking that one. But it misses to drop the reference to it's current item, leading it to continue traversing the new table's bucket in which the current item is sorted into, and after reaching that bucket's end continues traversing the new table's second bucket instead of the first one, thereby potentially missing items. This fixes the rhashtable runtime test for me. Bug probably introduced by Herbert Xu's patch eddee5ba ("rhashtable: Fix walker behaviour during rehash") although not explicitly tested. Fixes: eddee5ba ("rhashtable: Fix walker behaviour during rehash") Signed-off-by: Phil Sutter Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 8609378e65051..cf910e48f8f21 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -612,6 +612,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) iter->skip = 0; } + iter->p = NULL; + /* Ensure we see any new tables. */ smp_rmb(); @@ -622,8 +624,6 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) return ERR_PTR(-EAGAIN); } - iter->p = NULL; - out: return obj; From bd60ae48f727ef6cb9b0524fb49b7336cbb0ca66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 17:13:26 +0200 Subject: [PATCH 0916/2091] net: graceful exit from netif_alloc_netdev_queues() [ Upstream commit d339727c2b1a10f25e6636670ab6e1841170e328 ] User space can crash kernel with ip link add ifb10 numtxqueues 100000 type ifb We must replace a BUG_ON() by proper test and return -EINVAL for crazy values. Fixes: 60877a32bce00 ("net: allow large number of tx queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index aa82f9ab6a36d..47239143b48e5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6297,7 +6297,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { From c559b9ff0bcbfce18608d1e89cdc2358e234f803 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 6 Jul 2015 17:25:10 +0200 Subject: [PATCH 0917/2091] Revert "dev: set iflink to 0 for virtual interfaces" [ Upstream commit 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 ] This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c. The side effect of this commit is to add a '@NONE' after each virtual interface name with a 'ip link'. It may break existing scripts. Reported-by: Olivier Hartkopp Signed-off-by: Nicolas Dichtel Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 47239143b48e5..877ec57f6d7e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,10 +672,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); From 61d0de723e87ffcbe2350a582ecc426dbe78054c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jul 2015 00:07:52 +0200 Subject: [PATCH 0918/2091] rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver [ Upstream commit 4f7d2cdfdde71ffe962399b7020c674050329423 ] Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes anymore with respect to their policy, that is, ifla_vfinfo_policy[]. Before, they were part of ifla_policy[], but they have been nested since placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO, which is another nested attribute for the actual VF attributes such as IFLA_VF_MAC, IFLA_VF_VLAN, etc. Despite the policy being split out from ifla_policy[] in this commit, it's never applied anywhere. nla_for_each_nested() only does basic nla_ok() testing for struct nlattr, but it doesn't know about the data context and their requirements. Fix, on top of Jason's initial work, does 1) parsing of the attributes with the right policy, and 2) using the resulting parsed attribute table from 1) instead of the nla_for_each_nested() loop (just like we used to do when still part of ifla_policy[]). Reference: http://thread.gmane.org/gmane.linux.network/368913 Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric") Reported-by: Jason Gunthorpe Cc: Chris Wright Cc: Sucheta Chakraborty Cc: Greg Rose Cc: Jeff Kirsher Cc: Rony Efraim Cc: Vlad Zolotarov Cc: Nicolas Dichtel Cc: Thomas Graf Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Borkmann Acked-by: Vlad Zolotarov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 187 ++++++++++++++++++++++--------------------- 1 file changed, 96 insertions(+), 91 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8de36824018de..fe95cb704aaa0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1287,10 +1287,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1437,96 +1433,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1722,14 +1720,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; From 952f21eb0cd26e07b9a9e0639c0bd9453caee441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 7 Jul 2015 08:34:13 +0300 Subject: [PATCH 0919/2091] ip_tunnel: fix ipv4 pmtu check to honor inner ip header df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fc24f2b2094366da8786f59f2606307e934cea17 ] Frag needed should be sent only if the inner header asked to not fragment. Currently fragmentation is broken if the tunnel has df set, but df was not asked in the original packet. The tunnel's df needs to be still checked to update internally the pmtu cache. Commit 23a3647bc4f93bac broke it, and this commit fixes the ipv4 df check back to the way it was. Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba4ba659..626d9e56a6bd2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } From bb18cdc8b40e14a36fff015e794724e989af426c Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: [PATCH 0920/2091] net/tipc: initialize security state for new connection socket [ Upstream commit fdd75ea8df370f206a8163786e7470c1277a5064 ] Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f485600c4507b..20cc6df071576 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2009,6 +2009,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); From 27463fc0ab7a97bb0f311623f846f5f7c8457be8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 7 Jul 2015 15:55:56 +0200 Subject: [PATCH 0921/2091] bridge: mdb: zero out the local br_ip variable before use [ Upstream commit f1158b74e54f2e2462ba5e2f45a118246d9d5b43 ] Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups") there's a check in br_ip_equal() for a matching vlan id, but the mdb functions were not modified to use (or at least zero it) so when an entry was added it would have a garbage vlan id (from the local br_ip variable in __br_mdb_add/del) and this would prevent it from being matched and also deleted. So zero out the whole local ip var to protect ourselves from future changes and also to fix the current bug, since there's no vlan id support in the mdb uapi - use always vlan id 0. Example before patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent RTNETLINK answers: Invalid argument After patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb Signed-off-by: Nikolay Aleksandrov Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_mdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70b3000b..cc00066c0622b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -371,6 +371,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -417,6 +418,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_other_query.timer)) From fd08e0ccaa649064ca0df99eaadb58199be41970 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:11 +0200 Subject: [PATCH 0922/2091] net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() [ Upstream commit fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f ] pktgen_thread_worker() is obviously racy, kthread_stop() can come between the kthread_should_stop() check and set_current_state(). Signed-off-by: Oleg Nesterov Reported-by: Jan Stancek Reported-by: Marcelo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/pktgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 508155b283ddc..043ea1867d0f0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3490,8 +3490,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); From af9f8e1f82574c2eb91377aa93db0a1297e92c0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jul 2015 18:56:07 +0200 Subject: [PATCH 0923/2091] bridge: fix potential crash in __netdev_pick_tx() [ Upstream commit a7d35f9d73e9ffa74a02304b817e579eec632f67 ] Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") fixed an issue in normal forward path, caused by sender_cpu & napi_id skb fields being an union. Bridge is another point where skb can be forwarded, so we need the same cure. Bug triggers if packet was received on a NIC using skb_mark_napi_id() Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: Eric Dumazet Reported-by: Bob Liu Tested-by: Bob Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_forward.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b5d2ccf..0ff6e1bbca910 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) } else { skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } From f75c8a3015422128ca150e81c730bc2a471b5f4a Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:09 +0300 Subject: [PATCH 0924/2091] net: do not process device backlog during unregistration [ Upstream commit e9e4dd3267d0c5234c5c0f47440456b10875dec9 ] commit 381c759d9916 ("ipv4: Avoid crashing in ip_error") fixes a problem where processed packet comes from device with destroyed inetdev (dev->ip_ptr). This is not expected because inetdev_destroy is called in NETDEV_UNREGISTER phase and packets should not be processed after dev_close_many() and synchronize_net(). Above fix is still required because inetdev_destroy can be called for other reasons. But it shows the real problem: backlog can keep packets for long time and they do not hold reference to device. Such packets are then delivered to upper levels at the same time when device is unregistered. Calling flush_backlog after NETDEV_UNREGISTER_FINAL still accounts all packets from backlog but before that some packets continue to be delivered to upper levels long after the synchronize_net call which is supposed to wait the last ones. Also, as Eric pointed out, processed packets, mostly from other devices, can continue to add new packets to backlog. Fix the problem by moving flush_backlog early, after the device driver is stopped and before the synchronize_net() call. Then use netif_running check to make sure we do not add more packets to backlog. We have to do it in enqueue_to_backlog context when the local IRQ is disabled. As result, after the flush_backlog and synchronize_net sequence all packets should be accounted. Thanks to Eric W. Biederman for the test script and his valuable feedback! Reported-by: Vittorio Gambaletta Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 877ec57f6d7e6..cb49e014e959a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3337,6 +3337,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3358,6 +3360,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -6023,6 +6026,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6647,8 +6651,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ From 5568552ac83161a71b02f15e1d9c5b388d3da0a1 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:10 +0300 Subject: [PATCH 0925/2091] net: call rcu_read_lock early in process_backlog [ Upstream commit 2c17d27c36dcce2b6bf689f41a46b9e909877c21 ] Incoming packet should be either in backlog queue or in RCU read-side section. Otherwise, the final sequence of flush_backlog() and synchronize_net() may miss packets that can run without device reference: CPU 1 CPU 2 skb->dev: no reference process_backlog:__skb_dequeue process_backlog:local_irq_enable on_each_cpu for flush_backlog => IPI(hardirq): flush_backlog - packet not found in backlog CPU delayed ... synchronize_net - no ongoing RCU read-side sections netdev_run_todo, rcu_barrier: no ongoing callbacks __netif_receive_skb_core:rcu_read_lock - too late free dev process packet for freed dev Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index cb49e014e959a..a42b232805a5c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3666,8 +3666,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3677,7 +3675,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3707,7 +3705,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; } skb->tc_verd = 0; @@ -3724,7 +3722,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3736,7 +3734,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3790,8 +3788,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3822,29 +3819,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4389,8 +4387,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { From 316590d25ea05263eb4869fa668bf8a78fa04c1c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 12 Jul 2015 01:20:55 +0300 Subject: [PATCH 0926/2091] net/xen-netback: off by one in BUG_ON() condition [ Upstream commit 50c2e4dd6749725338621fff456b26d3a592259f ] The > should be >=. I also added spaces around the '-' operations so the code is a little more consistent and matches the condition better. Fixes: f53c3fe8dad7 ('xen-netback: Introduce TX grant mapping') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 0d2594395ffbc..0866c5dfdf87b 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1571,13 +1571,13 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) smp_rmb(); while (dc != dp) { - BUG_ON(gop - queue->tx_unmap_ops > MAX_PENDING_REQS); + BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS); pending_idx = queue->dealloc_ring[pending_index(dc++)]; - pending_idx_release[gop-queue->tx_unmap_ops] = + pending_idx_release[gop - queue->tx_unmap_ops] = pending_idx; - queue->pages_to_unmap[gop-queue->tx_unmap_ops] = + queue->pages_to_unmap[gop - queue->tx_unmap_ops] = queue->mmap_pages[pending_idx]; gnttab_set_unmap_op(gop, idx_to_kaddr(queue, pending_idx), From f21407a98826002e0274c09f1d77ab0f69b80a3d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Jul 2015 16:04:13 +0800 Subject: [PATCH 0927/2091] net: Clone skb before setting peeked flag [ Upstream commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ] Shared skbs must not be modified and this is crucial for broadcast and/or multicast paths where we use it as an optimisation to avoid unnecessary cloning. The function skb_recv_datagram breaks this rule by setting peeked without cloning the skb first. This causes funky races which leads to double-free. This patch fixes this by cloning the skb and replacing the skb in the list when setting skb->peeked. Fixes: a59322be07c9 ("[UDP]: Only increment counter on first peek/recv") Reported-by: Konstantin Khlebnikov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index b80fb91bb3f7e..4e9a3f690b7e6 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,6 +131,35 @@ static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, goto out; } +static int skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return 0; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return 0; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -165,7 +194,9 @@ static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -199,7 +228,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + error = skb_set_peeked(skb); + if (error) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -223,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; From 8b843198d812c885a8a9aeedf0135b47e3e500f5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Jul 2015 20:01:42 +0800 Subject: [PATCH 0928/2091] net: Fix skb csum races when peeking [ Upstream commit 89c22d8c3b278212eef6a8cc66b570bc840a6f5a ] When we calculate the checksum on the recv path, we store the result in the skb as an optimisation in case we need the checksum again down the line. This is in fact bogus for the MSG_PEEK case as this is done without any locking. So multiple threads can peek and then store the result to the same skb, potentially resulting in bogus skb states. This patch fixes this by only storing the result if the skb is not shared. This preserves the optimisations for the few cases where it can be done safely due to locking or other reasons, e.g., SIOCINQ. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 4e9a3f690b7e6..4967262b27076 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -657,7 +657,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); } - skb->csum_valid = !sum; + if (!skb_shared(skb)) + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); @@ -677,11 +678,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) netdev_rx_csum_fault(skb->dev); } - /* Save full packet checksum */ - skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - skb->csum_valid = !sum; + if (!skb_shared(skb)) { + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + } return sum; } From e308dd584ec309635129444e41dd4c90abf61842 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 4 Aug 2015 15:42:47 +0800 Subject: [PATCH 0929/2091] net: Fix skb_set_peeked use-after-free bug [ Upstream commit a0a2a6602496a45ae838a96db8b8173794b5d398 ] The commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ("net: Clone skb before setting peeked flag") introduced a use-after-free bug in skb_recv_datagram. This is because skb_set_peeked may create a new skb and free the existing one. As it stands the caller will continue to use the old freed skb. This patch fixes it by making skb_set_peeked return the new skb (or the old one if unchanged). Fixes: 738ac1ebb96d ("net: Clone skb before setting peeked flag") Reported-by: Brenden Blanco Signed-off-by: Herbert Xu Tested-by: Brenden Blanco Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/datagram.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 4967262b27076..617088aee21d4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,12 +131,12 @@ static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, goto out; } -static int skb_set_peeked(struct sk_buff *skb) +static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { struct sk_buff *nskb; if (skb->peeked) - return 0; + return skb; /* We have to unshare an skb before modifying it. */ if (!skb_shared(skb)) @@ -144,7 +144,7 @@ static int skb_set_peeked(struct sk_buff *skb) nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) - return -ENOMEM; + return ERR_PTR(-ENOMEM); skb->prev->next = nskb; skb->next->prev = nskb; @@ -157,7 +157,7 @@ static int skb_set_peeked(struct sk_buff *skb) done: skb->peeked = 1; - return 0; + return skb; } /** @@ -229,8 +229,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, continue; } - error = skb_set_peeked(skb); - if (error) + skb = skb_set_peeked(skb); + error = PTR_ERR(skb); + if (IS_ERR(skb)) goto unlock_err; atomic_inc(&skb->users); From 1b8976fedaf00e6e06ebe850aa4c8602d55282c3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 13 Jul 2015 06:36:19 -0700 Subject: [PATCH 0930/2091] bridge: mdb: fix double add notification [ Upstream commit 5ebc784625ea68a9570d1f70557e7932988cd1b4 ] Since the mdb add/del code was introduced there have been 2 br_mdb_notify calls when doing br_mdb_add() resulting in 2 notifications on each add. Example: Command: bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent Before patch: root@debian:~# bridge monitor all [MDB]dev br0 port eth1 grp 239.0.0.1 permanent [MDB]dev br0 port eth1 grp 239.0.0.1 permanent After patch: root@debian:~# bridge monitor all [MDB]dev br0 port eth1 grp 239.0.0.1 permanent Signed-off-by: Nikolay Aleksandrov Fixes: cfd567543590 ("bridge: add support of adding and deleting mdb entries") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_mdb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index cc00066c0622b..d1f910c0d586a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -348,7 +348,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -ENOMEM; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; } From 3e71447b891943cfaf8e5be7bd638c0414b3fea8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 13 Jul 2015 12:30:07 -0700 Subject: [PATCH 0931/2091] fq_codel: fix a use-after-free [ Upstream commit 052cbda41fdc243a8d40cce7ab3a6327b4b2887e ] Fixes: 25331d6ce42b ("net: sched: implement qstat helper routines") Cc: John Fastabend Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index c244c45b78d7f..9291598b5aad4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -162,10 +162,10 @@ static unsigned int fq_codel_drop(struct Qdisc *sch) skb = dequeue_head(flow); len = qdisc_pkt_len(skb); q->backlogs[idx] -= len; - kfree_skb(skb); sch->q.qlen--; qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); flow->dropped++; return idx; } From ef84567e9484f1a4755b184f6ad888c371504245 Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Tue, 14 Jul 2015 00:37:13 +0200 Subject: [PATCH 0932/2091] isdn/gigaset: reset tty->receive_room when attaching ser_gigaset [ Upstream commit fd98e9419d8d622a4de91f76b306af6aa627aa9c ] Commit 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc"), first merged in kernel release 3.10, caused the following regression in the Gigaset M101 driver: Before that commit, when closing the N_TTY line discipline in preparation to switching to N_GIGASET_M101, receive_room would be reset to a non-zero value by the call to n_tty_flush_buffer() in n_tty's close method. With the removal of that call, receive_room might be left at zero, blocking data reception on the serial line. The present patch fixes that regression by setting receive_room to an appropriate value in the ldisc open method. Fixes: 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc") Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/gigaset/ser-gigaset.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 8c91fd5eb6fdd..3ac9c4194814c 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty) cs->hw.ser->tty = tty; atomic_set(&cs->hw.ser->refcnt, 1); init_completion(&cs->hw.ser->dead_cmp); - tty->disc_data = cs; + /* Set the amount of data we're willing to receive per call + * from the hardware driver to half of the input buffer size + * to leave some reserve. + * Note: We don't do flow control towards the hardware driver. + * If more data is received than will fit into the input buffer, + * it will be dropped and an error will be logged. This should + * never happen as the device is slow and the buffer size ample. + */ + tty->receive_room = RBUFSIZE/2; + /* OK.. Initialization of the datastructures and the HW is done.. Now * startup system and notify the LL that we are ready to run */ From 7921f568ebdebd8862de6146d04137d0c8176ce0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Jul 2015 08:10:22 +0200 Subject: [PATCH 0933/2091] ipv6: lock socket in ip6_datagram_connect() [ Upstream commit 03645a11a570d52e70631838cb786eb4253eb463 ] ip6_datagram_connect() is doing a lot of socket changes without socket being locked. This looks wrong, at least for udp_lib_rehash() which could corrupt lists because of concurrent udp_sk(sk)->udp_portaddr_hash accesses. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 1 + net/ipv4/datagram.c | 16 ++++++++++++---- net/ipv6/datagram.c | 20 +++++++++++++++----- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index d14af7edd197c..f41fc497b21b2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -161,6 +161,7 @@ static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) } /* datagram.c */ +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 90c0e83861161..574fad9cca052 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -20,7 +20,7 @@ #include #include -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - lock_sock(sk); - oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { @@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_set(sk, &rt->dst); err = 0; out: - release_sock(sk); return err; } +EXPORT_SYMBOL(__ip4_datagram_connect); + +int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL(ip4_datagram_connect); /* Because UDP xmit path can manipulate sk_dst_cache without holding diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 62d908e64eeb5..b10a88986a989 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -204,6 +204,16 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, From f6eda61aba0ca9e6388fcb2453541281d4f8cb14 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 15 Jul 2015 21:52:51 +0200 Subject: [PATCH 0934/2091] bonding: fix destruction of bond with devices different from arphrd_ether [ Upstream commit 06f6d1094aa0992432b1e2a0920b0ee86ccd83bf ] When the bonding is being unloaded and the netdevice notifier is unregistered it executes NETDEV_UNREGISTER for each device which should remove the bond's proc entry but if the device enslaved is not of ARPHRD_ETHER type and is in front of the bonding, it may execute bond_release_and_destroy() first which would release the last slave and destroy the bond device leaving the proc entry and thus we will get the following error (with dynamic debug on for bond_netdev_event to see the events order): [ 908.963051] eql: event: 9 [ 908.963052] eql: IFF_SLAVE [ 908.963054] eql: event: 2 [ 908.963056] eql: IFF_SLAVE [ 908.963058] eql: event: 6 [ 908.963059] eql: IFF_SLAVE [ 908.963110] bond0: Releasing active interface eql [ 908.976168] bond0: Destroying bond bond0 [ 908.976266] bond0 (unregistering): Released all slaves [ 908.984097] ------------[ cut here ]------------ [ 908.984107] WARNING: CPU: 0 PID: 1787 at fs/proc/generic.c:575 remove_proc_entry+0x112/0x160() [ 908.984110] remove_proc_entry: removing non-empty directory 'net/bonding', leaking at least 'bond0' [ 908.984111] Modules linked in: bonding(-) eql(O) 9p nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ppdev qxl drm_kms_helper snd_hda_codec_generic aesni_intel ttm aes_x86_64 glue_helper pcspkr lrw gf128mul ablk_helper cryptd snd_hda_intel virtio_console snd_hda_codec psmouse serio_raw snd_hwdep snd_hda_core 9pnet_virtio 9pnet evdev joydev drm virtio_balloon snd_pcm snd_timer snd soundcore i2c_piix4 i2c_core pvpanic acpi_cpufreq parport_pc parport processor thermal_sys button autofs4 ext4 crc16 mbcache jbd2 hid_generic usbhid hid sg sr_mod cdrom ata_generic virtio_blk virtio_net floppy ata_piix e1000 libata ehci_pci virtio_pci scsi_mod uhci_hcd ehci_hcd virtio_ring virtio usbcore usb_common [last unloaded: bonding] [ 908.984168] CPU: 0 PID: 1787 Comm: rmmod Tainted: G W O 4.2.0-rc2+ #8 [ 908.984170] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 908.984172] 0000000000000000 ffffffff81732d41 ffffffff81525b34 ffff8800358dfda8 [ 908.984175] ffffffff8106c521 ffff88003595af78 ffff88003595af40 ffff88003e3a4280 [ 908.984178] ffffffffa058d040 0000000000000000 ffffffff8106c59a ffffffff8172ebd0 [ 908.984181] Call Trace: [ 908.984188] [] ? dump_stack+0x40/0x50 [ 908.984193] [] ? warn_slowpath_common+0x81/0xb0 [ 908.984196] [] ? warn_slowpath_fmt+0x4a/0x50 [ 908.984199] [] ? remove_proc_entry+0x112/0x160 [ 908.984205] [] ? bond_destroy_proc_dir+0x26/0x30 [bonding] [ 908.984208] [] ? bond_net_exit+0x8e/0xa0 [bonding] [ 908.984217] [] ? ops_exit_list.isra.4+0x37/0x70 [ 908.984225] [] ? unregister_pernet_operations+0x8d/0xd0 [ 908.984228] [] ? unregister_pernet_subsys+0x1d/0x30 [ 908.984232] [] ? bonding_exit+0x23/0xdba [bonding] [ 908.984236] [] ? SyS_delete_module+0x18a/0x250 [ 908.984241] [] ? task_work_run+0x89/0xc0 [ 908.984244] [] ? entry_SYSCALL_64_fastpath+0x16/0x75 [ 908.984247] ---[ end trace 7c006ed4abbef24b ]--- Thus remove the proc entry manually if bond_release_and_destroy() is used. Because of the checks in bond_remove_proc_entry() it's not a problem for a bond device to change namespaces (the bug fixed by the Fixes commit) but since commit f9399814927ad ("bonding: Don't allow bond devices to change network namespaces.") that can't happen anyway. Reported-by: Carol Soto Signed-off-by: Nikolay Aleksandrov Fixes: a64d49c3dd50 ("bonding: Manage /proc/net/bonding/ entries from the netdev events") Tested-by: Carol L Soto Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d5fe5d5f490f3..6a4e5232f40d9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1902,6 +1902,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; netdev_info(bond_dev, "Destroying bond %s\n", bond_dev->name); + bond_remove_proc_entry(bond); unregister_netdevice(bond_dev); } return ret; From a1f2fcd2d6adc73a0d40247573266d3c668b3b52 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jul 2015 17:55:38 +0800 Subject: [PATCH 0935/2091] Revert "sit: Add gro callbacks to sit_offload" [ Upstream commit fdbf5b097bbd9693a86c0b8bfdd071a9a2117cfc ] This patch reverts 19424e052fb44da2f00d1a868cbb51f3e9f4bbb5 ("sit: Add gro callbacks to sit_offload") because it generates packets that cannot be handled even by our own GSO. Reported-by: Wolfgang Walter Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_offload.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e893cd18612fc..08b62047c67f3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }, }; From 68c2a15f7246e095707471935ac6281386493a7f Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 16 Jul 2015 16:30:02 +0800 Subject: [PATCH 0936/2091] bonding: correct the MAC address for "follow" fail_over_mac policy [ Upstream commit a951bc1e6ba58f11df5ed5ddc41311e10f5fd20b ] The "follow" fail_over_mac policy is useful for multiport devices that either become confused or incur a performance penalty when multiple ports are programmed with the same MAC address, but the same MAC address still may happened by this steps for this policy: 1) echo +eth0 > /sys/class/net/bond0/bonding/slaves bond0 has the same mac address with eth0, it is MAC1. 2) echo +eth1 > /sys/class/net/bond0/bonding/slaves eth1 is backup, eth1 has MAC2. 3) ifconfig eth0 down eth1 became active slave, bond will swap MAC for eth0 and eth1, so eth1 has MAC1, and eth0 has MAC2. 4) ifconfig eth1 down there is no active slave, and eth1 still has MAC1, eth2 has MAC2. 5) ifconfig eth0 up the eth0 became active slave again, the bond set eth0 to MAC1. Something wrong here, then if you set eth1 up, the eth0 and eth1 will have the same MAC address, it will break this policy for ACTIVE_BACKUP mode. This patch will fix this problem by finding the old active slave and swap them MAC address before change active slave. Signed-off-by: Ding Tianhong Tested-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6a4e5232f40d9..16d87bf8ac3cd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -625,6 +625,23 @@ static void bond_set_dev_addr(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); } +static struct slave *bond_get_old_active(struct bonding *bond, + struct slave *new_active) +{ + struct slave *slave; + struct list_head *iter; + + bond_for_each_slave(bond, slave, iter) { + if (slave == new_active) + continue; + + if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) + return slave; + } + + return NULL; +} + /* bond_do_fail_over_mac * * Perform special MAC address swapping for fail_over_mac settings @@ -652,6 +669,9 @@ static void bond_do_fail_over_mac(struct bonding *bond, if (!new_active) return; + if (!old_active) + old_active = bond_get_old_active(bond, new_active); + if (old_active) { ether_addr_copy(tmp_mac, new_active->dev->dev_addr); ether_addr_copy(saddr.sa_data, From 9238d30c469f797598ef6eb616c68fabe67532e6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 17 Jul 2015 22:38:43 +0200 Subject: [PATCH 0937/2091] sched: cls_bpf: fix panic on filter replace [ Upstream commit f6bfc46da6292b630ba389592123f0dd02066172 ] The following test case causes a NULL pointer dereference in cls_bpf: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc filter replace dev foo parent 1: pref 49152 handle 0x1 \ bpf bytecode "$FOO" flowid 1:1 action drop The problem is that commit 1f947bf151e9 ("net: sched: rcu'ify cls_bpf") accidentally swapped the arguments of list_replace_rcu(), the old element needs to be the first argument and the new element the second. Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 91bd9c19471d5..c0b86f2bfe221 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -364,7 +364,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; if (oldprog) { - list_replace_rcu(&prog->link, &oldprog->link); + list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); } else { From dcdd14ea0bd3e1fc668c22baf7d2992a3062e33a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 17 Jul 2015 22:38:45 +0200 Subject: [PATCH 0938/2091] sched: cls_flow: fix panic on filter replace [ Upstream commit 32b2f4b196b37695fdb42b31afcbc15399d6ef91 ] The following test case causes a NULL pointer dereference in cls_flow: tc filter add dev foo parent 1: handle 0x1 flow hash keys dst action ok tc filter replace dev foo parent 1: pref 49152 handle 0x1 \ flow hash keys mark action drop To be more precise, actually two different panics are fixed, the first occurs because tcf_exts_init() is not called on the newly allocated filter when we do a replace. And the second panic uncovered after that happens since the arguments of list_replace_rcu() are swapped, the old element needs to be the first argument and the new element the second. Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_flow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a620c4e288a51..75df923f5c03c 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -419,6 +419,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; + tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; @@ -480,7 +482,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, fnew->mask = ~0U; fnew->tp = tp; get_random_bytes(&fnew->hashrnd, 4); - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); } fnew->perturb_timer.function = flow_perturbation; @@ -520,7 +521,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (*arg == 0) list_add_tail_rcu(&fnew->list, &head->filters); else - list_replace_rcu(&fnew->list, &fold->list); + list_replace_rcu(&fold->list, &fnew->list); *arg = (unsigned long)fnew; From 1d18abda904fa7d299239c6ba17b986a43f36b3c Mon Sep 17 00:00:00 2001 From: Edward Hyunkoo Jee Date: Tue, 21 Jul 2015 09:43:59 +0200 Subject: [PATCH 0939/2091] inet: frags: fix defragmented packet's IP header for af_packet [ Upstream commit 0848f6428ba3a2e42db124d41ac6f548655735bf ] When ip_frag_queue() computes positions, it assumes that the passed sk_buff does not contain L2 headers. However, when PACKET_FANOUT_FLAG_DEFRAG is used, IP reassembly functions can be called on outgoing packets that contain L2 headers. Also, IPv4 checksum is not corrected after reassembly. Fixes: 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4 fanouts.") Signed-off-by: Edward Hyunkoo Jee Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Jerry Chu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cc1da6d9cb351..cae22a1a87770 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -342,7 +342,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ - end = offset + skb->len - ihl; + end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ @@ -372,7 +372,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (!pskb_pull(skb, ihl)) + if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -613,6 +613,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; + + ip_send_check(iph); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; From b265c3003196a76f9b88d54e098e982e7c55647c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jul 2015 16:33:50 +0200 Subject: [PATCH 0940/2091] netlink: don't hold mutex in rcu callback when releasing mmapd ring [ Upstream commit 0470eb99b4721586ccac954faac3fa4472da0845 ] Kirill A. Shutemov says: This simple test-case trigers few locking asserts in kernel: int main(int argc, char **argv) { unsigned int block_size = 16 * 4096; struct nl_mmap_req req = { .nm_block_size = block_size, .nm_block_nr = 64, .nm_frame_size = 16384, .nm_frame_nr = 64 * block_size / 16384, }; unsigned int ring_size; int fd; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) exit(1); if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1); ring_size = req.nm_block_nr * req.nm_block_size; mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); return 0; } +++ exited with 0 +++ BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init 3 locks held by init/1: #0: (reboot_mutex){+.+...}, at: [] SyS_reboot+0xa9/0x220 #1: ((reboot_notifier_list).rwsem){.+.+..}, at: [] __blocking_notifier_call_chain+0x39/0x70 #2: (rcu_callback){......}, at: [] rcu_do_batch.isra.49+0x160/0x10c0 Preemption disabled at:[] __delay+0xf/0x20 CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014 ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002 ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98 Call Trace: [] dump_stack+0x4f/0x7b [] ___might_sleep+0x16d/0x270 [] __might_sleep+0x4d/0x90 [] mutex_lock_nested+0x2f/0x430 [] ? _raw_spin_unlock_irqrestore+0x5d/0x80 [] ? __this_cpu_preempt_check+0x13/0x20 [] netlink_set_ring+0x1ed/0x350 [] ? netlink_undo_bind+0x70/0x70 [] netlink_sock_destruct+0x80/0x150 [] __sk_free+0x1d/0x160 [] sk_free+0x19/0x20 [..] Cong Wang says: We can't hold mutex lock in a rcu callback, [..] Thomas Graf says: The socket should be dead at this point. It might be simpler to add a netlink_release_ring() function which doesn't require locking at all. Reported-by: "Kirill A. Shutemov" Diagnosed-by: Cong Wang Suggested-by: Thomas Graf Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 79 ++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bf6e76643f787..ea5ed7af97eb4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -355,25 +355,52 @@ static void **alloc_pg_vec(struct netlink_sock *nlk, return NULL; } + +static void +__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, + unsigned int order) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct sk_buff_head *queue; + struct netlink_ring *ring; + + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +} + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool closing, bool tx_ring) + bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; - struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; - int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (!closing) { - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - } + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -405,31 +432,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, return -EINVAL; } - err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (closing || atomic_read(&nlk->mapped) == 0) { - err = 0; - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); + if (atomic_read(&nlk->mapped) == 0) { + __netlink_set_ring(sk, req, tx_ring, pg_vec, order); + mutex_unlock(&nlk->pg_vec_lock); + return 0; } + mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - return err; + + return -EBUSY; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -898,10 +913,10 @@ static void netlink_sock_destruct(struct sock *sk) memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - netlink_set_ring(sk, &req, true, false); + __netlink_set_ring(sk, &req, false, NULL, 0); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - netlink_set_ring(sk, &req, true, true); + __netlink_set_ring(sk, &req, true, NULL, 0); } #endif /* CONFIG_NETLINK_MMAP */ @@ -2197,7 +2212,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, false, + err = netlink_set_ring(sk, &req, optname == NETLINK_TX_RING); break; } From 51677b722338da3671ef19846616cf3811253760 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Jul 2015 15:26:19 +0300 Subject: [PATCH 0941/2091] virtio_net: don't require ANY_LAYOUT with VERSION_1 [ Upstream commit 75993300d008f418ee2569a632185fc1d7d50674 ] ANY_LAYOUT is a compatibility feature. It's implied for VERSION_1 devices, and non-transitional devices might not offer it. Change code to behave accordingly. Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 63c7810e1545a..7fbca37a1adff 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1828,7 +1828,8 @@ static int virtnet_probe(struct virtio_device *vdev) else vi->hdr_len = sizeof(struct virtio_net_hdr); - if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT)) + if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->any_header_sg = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) From 90ec7452b5804da4e0a0041e25c4ee74e06578db Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 22 Jul 2015 13:03:40 +0200 Subject: [PATCH 0942/2091] bridge: netlink: fix slave_changelink/br_setport race conditions [ Upstream commit 963ad94853000ab100f5ff19eea80095660d41b4 ] Since slave_changelink support was added there have been a few race conditions when using br_setport() since some of the port functions it uses require the bridge lock. It is very easy to trigger a lockup due to some internal spin_lock() usage without bh disabled, also it's possible to get the bridge into an inconsistent state. Signed-off-by: Nikolay Aleksandrov Fixes: 3ac636b8591c ("bridge: implement rtnl_link_ops->slave_changelink") Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4b5c236998ff1..20e06a9a1820a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -711,9 +711,17 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *tb[], struct nlattr *data[]) { + struct net_bridge *br = netdev_priv(brdev); + int ret; + if (!data) return 0; - return br_setport(br_port_get_rtnl(dev), data); + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; } static int br_port_fill_slave_info(struct sk_buff *skb, From f75d70aa248dad6038eb5b49b5b816ce96e8ae7e Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 22 Jul 2015 16:53:47 +0300 Subject: [PATCH 0943/2091] net/mlx4_core: Fix wrong index in propagating port change event to VFs [ Upstream commit 1c1bf34951e8d17941bf708d1901c47e81b15d55 ] The port-change event processing in procedure mlx4_eq_int() uses "slave" as the vf_oper array index. Since the value of "slave" is the PF function index, the result is that the PF link state is used for deciding to propagate the event for all the VFs. The VF link state should be used, so the VF function index should be used here. Fixes: 948e306d7d64 ('net/mlx4: Add VF link state support') Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/eq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 2619c9fbf42df..983b1d51244df 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -573,7 +573,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n", __func__, i, port); - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( @@ -608,7 +608,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; if (i == mlx4_master_func_num(dev)) continue; - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( From d1f56d1041b423c2c9c767b03eaade32f5463efc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 27 Jul 2015 13:08:06 -0700 Subject: [PATCH 0944/2091] fib_trie: Drop unnecessary calls to leaf_pull_suffix [ Upstream commit 1513069edcf8dd86cfd8d5daef482b97d6b93df6 ] It was reported that update_suffix was taking a long time on systems where a large number of leaves were attached to a single node. As it turns out fib_table_flush was calling update_suffix for each leaf that didn't have all of the aliases stripped from it. As a result, on this large node removing one leaf would result in us calling update_suffix for every other leaf on the node. The fix is to just remove the calls to leaf_pull_suffix since they are redundant as we already have a call in resize that will go through and update the suffix length for the node before we exit out of fib_table_flush or fib_table_flush_external. Reported-by: David Ahern Signed-off-by: Alexander Duyck Tested-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 09b62e17dd8cb..210ceca2640c1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1780,8 +1780,6 @@ void fib_table_flush_external(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } } @@ -1852,8 +1850,6 @@ int fib_table_flush(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } From d339d03860f21b3f27a674312df09b6ce92c0c42 Mon Sep 17 00:00:00 2001 From: Lars Westerhoff Date: Tue, 28 Jul 2015 01:32:21 +0300 Subject: [PATCH 0945/2091] packet: missing dev_put() in packet_do_bind() [ Upstream commit 158cd4af8dedbda0d612d448c724c715d0dda649 ] When binding a PF_PACKET socket, the use count of the bound interface is always increased with dev_hold in dev_get_by_{index,name}. However, when rebound with the same protocol and device as in the previous bind the use count of the interface was not decreased. Ultimately, this caused the deletion of the interface to fail with the following message: unregister_netdevice: waiting for dummy0 to become free. Usage count = 1 This patch moves the dev_put out of the conditional part that was only executed when either the protocol or device changed on a bind. Fixes: 902fefb82ef7 ('packet: improve socket create/bind latency in some cases') Signed-off-by: Lars Westerhoff Signed-off-by: Dan Carpenter Reviewed-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fe1610ddeacf7..733ef5a56dee9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2688,7 +2688,7 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; @@ -2712,15 +2712,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) po->num = proto; po->prot_hook.type = proto; - - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; From 2ef3d434d30f074fc744f86a956239feea922b04 Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Tue, 28 Jul 2015 13:57:01 +0300 Subject: [PATCH 0946/2091] packet: tpacket_snd(): fix signed/unsigned comparison [ Upstream commit dbd46ab412b8fb395f2b0ff6f6a7eec9df311550 ] tpacket_fill_skb() can return a negative value (-errno) which is stored in tp_len variable. In that case the following condition will be (but shouldn't be) true: tp_len > dev->mtu + dev->hard_header_len as dev->mtu and dev->hard_header_len are both unsigned. That may lead to just returning an incorrect EMSGSIZE errno to the user. Fixes: 52f1454f629fa ("packet: allow to transmit +4 byte in TX_RING slot for VLAN case") Signed-off-by: Alexander Drozdov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 733ef5a56dee9..e1ea5d43b01ef 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2307,7 +2307,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); - if (tp_len > dev->mtu + dev->hard_header_len) { + if (likely(tp_len >= 0) && + tp_len > dev->mtu + dev->hard_header_len) { struct ethhdr *ehdr; /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual From e04f76d6215119252fc7d0b8033d4529f931f9f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Jul 2015 18:40:56 +0200 Subject: [PATCH 0947/2091] act_bpf: fix memory leaks when replacing bpf programs [ Upstream commit f4eaed28c7834fc049c754f63e6988bbd73778d9 ] We currently trigger multiple memory leaks when replacing bpf actions, besides others: comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s) hex dump (first 32 bytes): 01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................ 18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __vmalloc_node_range+0x1bd/0x2c0 [] __vmalloc+0x4a/0x50 [] bpf_prog_alloc+0x3a/0xa0 [] bpf_prog_create+0x44/0xa0 [] tcf_bpf_init+0x28b/0x3c0 [act_bpf] [] tcf_action_init_1+0x191/0x1b0 [] tcf_action_init+0x82/0xf0 [] tcf_exts_validate+0xb2/0xc0 [] cls_bpf_modify_existing+0x98/0x340 [cls_bpf] [] cls_bpf_change+0x1a6/0x274 [cls_bpf] [] tc_ctl_tfilter+0x335/0x910 [] rtnetlink_rcv_msg+0x95/0x240 [] netlink_rcv_skb+0xaf/0xc0 [] rtnetlink_rcv+0x2e/0x40 [] netlink_unicast+0xef/0x1b0 Issue is that the old content from tcf_bpf is allocated and needs to be released when we replace it. We seem to do that since the beginning of act_bpf on the filter and insns, later on the name as well. Example test case, after patch: # FOO="1,6 0 0 4294967295," # BAR="1,6 0 0 4294967294," # tc actions add action bpf bytecode "$FOO" index 2 # tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 2 ref 1 bind 0 # tc actions replace action bpf bytecode "$BAR" index 2 # tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 2 ref 1 bind 0 # tc actions replace action bpf bytecode "$FOO" index 2 # tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 2 ref 1 bind 0 # tc actions del action bpf index 2 [...] # echo "scan" > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l 0 Fixes: d23b8ad8ab23 ("tc: add BPF based action") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_bpf.c | 50 +++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index dc6a2d324bd81..521ffca91228a 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -200,6 +201,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -234,18 +236,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -294,6 +318,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -309,29 +336,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; + struct tcf_bpf_cfg tmp; - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { From 563071d6899c56701a47e945c5f39673bfcb0d38 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Jul 2015 23:35:25 +0200 Subject: [PATCH 0948/2091] net: sched: fix refcount imbalance in actions [ Upstream commit 28e6b67f0b292f557468c139085303b15f1a678f ] Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/act_api.h | 8 +++++++- net/sched/act_api.c | 11 ++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1bd..931738bc5bba3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -99,7 +99,6 @@ struct tc_action_ops { int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, @@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ + return __tcf_hash_release(a, bind, false); +} + int tcf_register_action(struct tc_action_ops *a, unsigned int mask); int tcf_unregister_action(struct tc_action_ops *a); int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3d43e4979f27c..f8d9c2a2c451d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -413,7 +414,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) From 715bb7ae43279e24a8d4d35a9ac8288b47a75500 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 2 Aug 2015 19:29:16 +0200 Subject: [PATCH 0949/2091] rocker: free netdevice during netdevice removal [ Upstream commit 1ebd47efa4e17391dfac8caa349c6a8d35f996d1 ] When removing a port's netdevice in 'rocker_remove_ports', we should also free the allocated 'net_device' structure. Do that by calling 'free_netdev' after unregistering it. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Fixes: 4b8ac9660af ("rocker: introduce rocker switch driver") Acked-by: Scott Feldman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/rocker/rocker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index cf98cc9bbc8dc..73b6fc21ea00d 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4587,6 +4587,7 @@ static void rocker_remove_ports(struct rocker *rocker) rocker_port = rocker->ports[i]; rocker_port_ig_tbl(rocker_port, ROCKER_OP_FLAG_REMOVE); unregister_netdev(rocker_port->dev); + free_netdev(rocker_port->dev); } kfree(rocker->ports); } From c1a6ce0483bbdd9cf3489ef9efc312482344f58c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Aug 2015 15:33:26 +0300 Subject: [PATCH 0950/2091] rds: fix an integer overflow test in rds_info_getsockopt() [ Upstream commit 468b732b6f76b138c0926eadf38ac88467dcd271 ] "len" is a signed integer. We check that len is not negative, so it goes from zero to INT_MAX. PAGE_SIZE is unsigned long so the comparison is type promoted to unsigned long. ULONG_MAX - 4095 is a higher than INT_MAX so the condition can never be true. I don't know if this is harmful but it seems safe to limit "len" to INT_MAX - 4095. Fixes: a8c879a7ee98 ('RDS: Info and stats') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/info.c b/net/rds/info.c index 9a6b4f66187cf..140a44a5f7b7f 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, /* check for all kinds of wrapping and the like */ start = (unsigned long)optval; - if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { ret = -EINVAL; goto out; } From 89b2791c0fd6938a1c2124589fe1b0f699d4b0d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Aug 2015 12:14:33 +0200 Subject: [PATCH 0951/2091] udp: fix dst races with multicast early demux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 10e2eb878f3ca07ac2f05fa5ca5e6c4c9174a27a ] Multicast dst are not cached. They carry DST_NOCACHE. As mentioned in commit f8864972126899 ("ipv4: fix dst race in sk_dst_get()"), these dst need special care before caching them into a socket. Caching them is allowed only if their refcnt was not 0, ie we must use atomic_inc_not_zero() Also, we must use READ_ONCE() to fetch sk->sk_rx_dst, as mentioned in commit d0c294c53a771 ("tcp: prevent fetching dst twice in early demux code") Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Tested-by: Gregory Hoggarth Signed-off-by: Eric Dumazet Reported-by: Gregory Hoggarth Reported-by: Alex Gartrell Cc: Michal Kubeček Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 83aa604f9273c..1b8c5ba7d5f73 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1995,12 +1995,19 @@ void udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = sk->sk_rx_dst; + dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); - if (dst) - skb_dst_set_noref(skb, dst); + if (dst) { + /* DST_NOCACHE can not be used without taking a reference */ + if (dst->flags & DST_NOCACHE) { + if (likely(atomic_inc_not_zero(&dst->__refcnt))) + skb_dst_set(skb, dst); + } else { + skb_dst_set_noref(skb, dst); + } + } } int udp_rcv(struct sk_buff *skb) From 9000d23361befc59b04273c7b5ae1bfee2eaa4ee Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 4 Aug 2015 19:06:32 +0200 Subject: [PATCH 0952/2091] bridge: netlink: account for the IFLA_BRPORT_PROXYARP attribute size and policy [ Upstream commit 355b9f9df1f0311f20087350aee8ad96eedca8a9 ] The attribute size wasn't accounted for in the get_slave_size() callback (br_port_get_slave_size) when it was introduced, so fix it now. Also add a policy entry for it in br_port_policy. Signed-off-by: Nikolay Aleksandrov Fixes: 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 20e06a9a1820a..95ef032df11bd 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -112,6 +112,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + 0; } @@ -504,6 +505,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ From b291dba31d0862c9262005ee7a8b38f2377494fa Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 4 Aug 2015 19:06:33 +0200 Subject: [PATCH 0953/2091] bridge: netlink: account for the IFLA_BRPORT_PROXYARP_WIFI attribute size and policy [ Upstream commit 786c2077ec8e9eab37a88fc14aac4309a8061e18 ] The attribute size wasn't accounted for in the get_slave_size() callback (br_port_get_slave_size) when it was introduced, so fix it now. Also add a policy entry for it in br_port_policy. Signed-off-by: Nikolay Aleksandrov Fixes: 842a9ae08a25 ("bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 95ef032df11bd..a7559ef312bdf 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -113,6 +113,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + 0; } @@ -506,6 +507,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ From 1d79bc603aec343479bc56bc770b8b13c9660523 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 6 Aug 2015 22:48:23 +0200 Subject: [PATCH 0954/2091] bna: fix interrupts storm caused by erroneous packets [ Upstream commit ade4dc3e616e33c80d7e62855fe1b6f9895bc7c3 ] The commit "e29aa33 bna: Enable Multi Buffer RX" moved packets counter increment from the beginning of the NAPI processing loop after the check for erroneous packets so they are never accounted. This counter is used to inform firmware about number of processed completions (packets). As these packets are never acked the firmware fires IRQs for them again and again. Fixes: e29aa33 ("bna: Enable Multi Buffer RX") Signed-off-by: Ivan Vecera Acked-by: Rasesh Mody Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/brocade/bna/bnad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index caae6cb2bc1a4..a1c30ee60888e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -675,6 +675,7 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) if (!next_cmpl->valid) break; } + packets++; /* TODO: BNA_CQ_EF_LOCAL ? */ if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR | @@ -691,7 +692,6 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) else bnad_cq_setup_skb_frags(rcb, skb, sop_ci, nvecs, len); - packets++; rcb->rxq->rx_packets++; rcb->rxq->rx_bytes += totlen; ccb->bytes_per_intr += totlen; From d397617f757558a18f491e41b5f95a5bcf446d70 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Aug 2015 00:26:41 +0200 Subject: [PATCH 0955/2091] netlink: make sure -EBUSY won't escape from netlink_insert [ Upstream commit 4e7c1330689e27556de407d3fdadc65ffff5eb12 ] Linus reports the following deadlock on rtnl_mutex; triggered only once so far (extract): [12236.694209] NetworkManager D 0000000000013b80 0 1047 1 0x00000000 [12236.694218] ffff88003f902640 0000000000000000 ffffffff815d15a9 0000000000000018 [12236.694224] ffff880119538000 ffff88003f902640 ffffffff81a8ff84 00000000ffffffff [12236.694230] ffffffff81a8ff88 ffff880119c47f00 ffffffff815d133a ffffffff81a8ff80 [12236.694235] Call Trace: [12236.694250] [] ? schedule_preempt_disabled+0x9/0x10 [12236.694257] [] ? schedule+0x2a/0x70 [12236.694263] [] ? schedule_preempt_disabled+0x9/0x10 [12236.694271] [] ? __mutex_lock_slowpath+0x7f/0xf0 [12236.694280] [] ? mutex_lock+0x16/0x30 [12236.694291] [] ? rtnetlink_rcv+0x10/0x30 [12236.694299] [] ? netlink_unicast+0xfb/0x180 [12236.694309] [] ? rtnl_getlink+0x113/0x190 [12236.694319] [] ? rtnetlink_rcv_msg+0x7a/0x210 [12236.694331] [] ? sock_has_perm+0x5c/0x70 [12236.694339] [] ? rtnetlink_rcv+0x30/0x30 [12236.694346] [] ? netlink_rcv_skb+0x9c/0xc0 [12236.694354] [] ? rtnetlink_rcv+0x1f/0x30 [12236.694360] [] ? netlink_unicast+0xfb/0x180 [12236.694367] [] ? netlink_sendmsg+0x484/0x5d0 [12236.694376] [] ? __wake_up+0x2f/0x50 [12236.694387] [] ? sock_sendmsg+0x33/0x40 [12236.694396] [] ? ___sys_sendmsg+0x22e/0x240 [12236.694405] [] ? ___sys_recvmsg+0x135/0x1a0 [12236.694415] [] ? eventfd_write+0x82/0x210 [12236.694423] [] ? fsnotify+0x32e/0x4c0 [12236.694429] [] ? wake_up_q+0x60/0x60 [12236.694434] [] ? __sys_sendmsg+0x39/0x70 [12236.694440] [] ? entry_SYSCALL_64_fastpath+0x12/0x6a It seems so far plausible that the recursive call into rtnetlink_rcv() looks suspicious. One way, where this could trigger is that the senders NETLINK_CB(skb).portid was wrongly 0 (which is rtnetlink socket), so the rtnl_getlink() request's answer would be sent to the kernel instead to the actual user process, thus grabbing rtnl_mutex() twice. One theory would be that netlink_autobind() triggered via netlink_sendmsg() internally overwrites the -EBUSY error to 0, but where it is wrongly originating from __netlink_insert() instead. That would reset the socket's portid to 0, which is then filled into NETLINK_CB(skb).portid later on. As commit d470e3b483dc ("[NETLINK]: Fix two socket hashing bugs.") also puts it, -EBUSY should not be propagated from netlink_insert(). It looks like it's very unlikely to reproduce. We need to trigger the rhashtable_insert_rehash() handler under a situation where rehashing currently occurs (one /rare/ way would be to hit ht->elasticity limits while not filled enough to expand the hashtable, but that would rather require a specifically crafted bind() sequence with knowledge about destination slots, seems unlikely). It probably makes sense to guard __netlink_insert() in any case and remap that error. It was suggested that EOVERFLOW might be better than an already overloaded ENOMEM. Reference: http://thread.gmane.org/gmane.linux.network/372676 Reported-by: Linus Torvalds Signed-off-by: Daniel Borkmann Acked-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ea5ed7af97eb4..4856d975492d9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1094,6 +1094,11 @@ static int netlink_insert(struct sock *sk, u32 portid) err = __netlink_insert(table, sk); if (err) { + /* In case the hashtable backend returns with -EBUSY + * from here, it must not escape to the caller. + */ + if (unlikely(err == -EBUSY)) + err = -EOVERFLOW; if (err == -EEXIST) err = -EADDRINUSE; nlk_sk(sk)->portid = 0; From d36f8434da8c333aa3837cf421a52f3835642759 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Aug 2015 15:07:34 -0700 Subject: [PATCH 0956/2091] inet: fix possible request socket leak [ Upstream commit 3257d8b12f954c462d29de6201664a846328a522 ] In commit b357a364c57c9 ("inet: fix possible panic in reqsk_queue_unlink()"), I missed fact that tcp_check_req() can return the listener socket in one case, and that we must release the request socket refcount or we leak it. Tested: Following packetdrill test template shows the issue 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 2920 +0 > S. 0:0(0) ack 1 +.002 < . 1:1(0) ack 21 win 2920 +0 > R 21:21(0) Fixes: b357a364c57c9 ("inet: fix possible panic in reqsk_queue_unlink()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc1c658ec6c18..441ca6f389815 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1348,7 +1348,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3adffb300238e..e541d68dba8bd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -946,7 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } From 0c1122ae6107b01e50bb18fa40eb44e7fa492fbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Aug 2015 09:09:13 -0700 Subject: [PATCH 0957/2091] inet: fix races with reqsk timers [ Upstream commit 2235f2ac75fd2501c251b0b699a9632e80239a6d ] reqsk_queue_destroy() and reqsk_queue_unlink() should use del_timer_sync() instead of del_timer() before calling reqsk_put(), otherwise we could free a req still used by another cpu. But before doing so, reqsk_queue_destroy() must release syn_wait_lock spinlock or risk a dead lock, as reqsk_timer_handler() might need to take this same spinlock from reqsk_queue_unlink() (called from inet_csk_reqsk_queue_drop()) Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/request_sock.c | 8 +++++++- net/ipv4/inet_connection_sock.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 87b22c0bc08c2..b42f0e26f89e4 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -103,10 +103,16 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) spin_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; + /* Because of following del_timer_sync(), + * we must release the spinlock here + * or risk a dead lock. + */ + spin_unlock_bh(&queue->syn_wait_lock); atomic_inc(&lopt->qlen_dec); - if (del_timer(&req->rsk_timer)) + if (del_timer_sync(&req->rsk_timer)) reqsk_put(req); reqsk_put(req); + spin_lock_bh(&queue->syn_wait_lock); } spin_unlock_bh(&queue->syn_wait_lock); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8976ca423a074..b27fc401c6a9a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -584,7 +584,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, } spin_unlock(&queue->syn_wait_lock); - if (del_timer(&req->rsk_timer)) + if (del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; } From b21ee342590aa41e21aa0196bff5af592cc349d0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 8 Aug 2015 12:58:57 -0700 Subject: [PATCH 0958/2091] net: dsa: Do not override PHY interface if already configured [ Upstream commit 211c504a444710b1d8ce3431ac19f2578602ca27 ] In case we need to divert reads/writes using the slave MII bus, we may have already fetched a valid PHY interface property from Device Tree, and that mode is used by the PHY driver to make configuration decisions. If we could not fetch the "phy-mode" property, we will assign p->phy_interface to PHY_INTERFACE_MODE_NA, such that we can actually check for that condition as to whether or not we should override the interface value. Fixes: 19334920eaf7 ("net: dsa: Set valid phy interface type") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/slave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 827cda560a552..57978c5b2c91d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -732,7 +732,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, return -ENODEV; /* Use already configured phy mode */ - p->phy_interface = p->phy->interface; + if (p->phy_interface == PHY_INTERFACE_MODE_NA) + p->phy_interface = p->phy->interface; phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, p->phy_interface); From e55ffaf457bcc8ec4e9d9f56f955971f834d65b3 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 13 Aug 2015 20:49:01 +0100 Subject: [PATCH 0959/2091] ipv4: off-by-one in continuation handling in /proc/net/route [ Upstream commit 25b97c016b26039982daaa2c11d83979f93b71ab ] When generating /proc/net/route we emit a header followed by a line for each route. When a short read is performed we will restart this process based on the open file descriptor. When calculating the start point we fail to take into account that the 0th entry is the header. This leads us to skip the first entry when doing a continuation read. This can be easily seen with the comparison below: while read l; do echo "$l"; done A cat /proc/net/route >B diff -bu A B | grep '^[+-]' On my example machine I have approximatly 10KB of route output. There we see the very first non-title element is lost in the while read case, and an entry around the 8K mark in the cat case: +wlan0 00000000 02021EAC 0003 0 0 400 00000000 0 0 0 -tun1 00C0AC0A 00000000 0001 0 0 950 00C0FFFF 0 0 0 Fix up the off-by-one when reaquiring position on continuation. Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf") BugLink: http://bugs.launchpad.net/bugs/1483440 Acked-by: Alexander Duyck Signed-off-by: Andy Whitcroft Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 210ceca2640c1..0ca933db1b41b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2453,7 +2453,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, key = l->key + 1; iter->pos++; - if (pos-- <= 0) + if (--pos <= 0) break; l = NULL; From c3a0355bddf35457563a0d64e91f9dfca7c80280 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 14 Aug 2015 17:41:24 +1000 Subject: [PATCH 0960/2091] cxl: Don't remove AFUs/vPHBs in cxl_reset commit 4e1efb403c1c016ae831bd9988a7d2e5e0af41a0 upstream. If the driver doesn't participate in EEH, the AFUs will be removed by cxl_remove, which will be invoked by EEH. If the driver does particpate in EEH, the vPHB needs to stick around so that the it can particpate. In both cases, we shouldn't remove the AFU/vPHB. Reviewed-by: Cyril Bur Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman Reported-by: Guenter Roeck Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 9eab76c48e590..4f1b0bdb9cf84 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -781,9 +781,6 @@ int cxl_reset(struct cxl *adapter) dev_info(&dev->dev, "CXL reset\n"); - for (i = 0; i < adapter->slices; i++) - cxl_remove_afu(adapter->afu[i]); - /* pcie_warm_reset requests a fundamental pci reset which includes a * PERST assert/deassert. PERST triggers a loading of the image * if "user" or "factory" is selected in sysfs */ From cbc890891ddaf0240ad669dd9f0e48c599ff3d63 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 29 Sep 2015 19:26:41 +0200 Subject: [PATCH 0961/2091] Linux 4.1.9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dbf3baa5fabbb..e071176b2ce6e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Series 4800 From 5c02ee314d9f1672283c3c679520b489650936e2 Mon Sep 17 00:00:00 2001 From: huaibin Wang Date: Tue, 25 Aug 2015 16:20:34 +0200 Subject: [PATCH 0962/2091] ip6_gre: release cached dst on tunnel removal [ Upstream commit d4257295ba1b389c693b79de857a96e4b7cd8ac0 ] When a tunnel is deleted, the cached dst entry should be released. This problem may prevent the removal of a netns (seen with a x-netns IPv6 gre tunnel): unregister_netdevice: waiting for lo to become free. Usage count = 3 CC: Dmitry Kozlov Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: huaibin Wang Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a38d3ac0f18f6..69f4f689f06af 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, t); + ip6_tnl_dst_reset(t); dev_put(dev); } From 0fade09c7073b8899a20a80c61349ea344caf28b Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 25 Aug 2015 20:22:35 -0300 Subject: [PATCH 0963/2091] vxlan: re-ignore EADDRINUSE from igmp_join [ Upstream commit bef0057b7ba881d5ae67eec876df7a26fe672a59 ] Before 56ef9c909b40[1] it used to ignore all errors from igmp_join(). That commit enhanced that and made it error out whatever error happened with igmp_join(), but that's not good because when using multicast groups vxlan will try to join it multiple times if the socket is reused and then the 2nd and further attempts will fail with EADDRINUSE. As we don't track to which groups the socket is already subscribed, it's okay to just ignore that error. Fixes: 56ef9c909b40 ("vxlan: Move socket initialization to within rtnl scope") Reported-by: John Nielsen Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 21a0fbf1ed947..0085b8df83e20 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2212,6 +2212,8 @@ static int vxlan_open(struct net_device *dev) if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { ret = vxlan_igmp_join(vxlan); + if (ret == -EADDRINUSE) + ret = 0; if (ret) { vxlan_sock_release(vs); return ret; From 5184fc66fd660f675ed5662cc4288fedc579e222 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 25 Aug 2015 16:38:12 -0700 Subject: [PATCH 0964/2091] cls_u32: complete the check for non-forced case in u32_destroy() [ Upstream commit a6c1aea044e490da3e59124ec55991fe316818d5 ] In commit 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone") I added a check in u32_destroy() to see if all real filters are gone for each tp, however, that is only done for root_ht, same is needed for others. This can be reproduced by the following tc commands: tc filter add dev eth0 parent 1:0 prio 5 handle 15: protocol ip u32 divisor 256 tc filter add dev eth0 protocol ip parent 1: prio 5 handle 15:2:2 u32 ht 15:2: match ip src 10.0.0.2 flowid 1:10 tc filter add dev eth0 protocol ip parent 1: prio 5 handle 15:2:3 u32 ht 15:2: match ip src 10.0.0.3 flowid 1:10 Fixes: 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone") Reported-by: Akshat Kakkar Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_u32.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index cab9e9b43967a..4fbb67430ce48 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto *tp, bool force) return false; } } + + if (tp_c->refcnt > 1) + return false; + + if (tp_c->refcnt == 1) { + struct tc_u_hnode *ht; + + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) + if (!ht_empty(ht)) + return false; + } } if (root_ht && --root_ht->refcnt == 0) From 26b732f035763e44a777a213e35c272f5ca41430 Mon Sep 17 00:00:00 2001 From: Eugene Shatokhin Date: Mon, 24 Aug 2015 23:13:42 +0300 Subject: [PATCH 0965/2091] usbnet: Get EVENT_NO_RUNTIME_PM bit before it is cleared [ Upstream commit f50791ac1aca1ac1b0370d62397b43e9f831421a ] It is needed to check EVENT_NO_RUNTIME_PM bit of dev->flags in usbnet_stop(), but its value should be read before it is cleared when dev->flags is set to 0. The problem was spotted and the fix was provided by Oliver Neukum . Signed-off-by: Eugene Shatokhin Acked-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/usbnet.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3c86b107275a8..e0498571ae267 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -778,7 +778,7 @@ int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct driver_info *info = dev->driver_info; - int retval, pm; + int retval, pm, mpn; clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); @@ -809,6 +809,8 @@ int usbnet_stop (struct net_device *net) usbnet_purge_paused_rxq(dev); + mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags); + /* deferred work (task, timer, softirq) must also stop. * can't flush_scheduled_work() until we drop rtnl (later), * else workers could deadlock; so make workers a NOP. @@ -819,8 +821,7 @@ int usbnet_stop (struct net_device *net) if (!pm) usb_autopm_put_interface(dev->intf); - if (info->manage_power && - !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) + if (info->manage_power && mpn) info->manage_power(dev, 0); else usb_autopm_put_interface(dev->intf); From 6a7ca4e15715ece5c2a4d306ded237ec7d1ba231 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 Sep 2015 14:00:36 +0200 Subject: [PATCH 0966/2091] sock, diag: fix panic in sock_diag_put_filterinfo [ Upstream commit b382c08656000c12a146723a153b85b13a855b49 ] diag socket's sock_diag_put_filterinfo() dumps classic BPF programs upon request to user space (ss -0 -b). However, native eBPF programs attached to sockets (SO_ATTACH_BPF) cannot be dumped with this method: Their orig_prog is always NULL. However, sock_diag_put_filterinfo() unconditionally tries to access its filter length resp. wants to copy the filter insns from there. Internal cBPF to eBPF transformations attached to sockets don't have this issue, as orig_prog state is kept. It's currently only used by packet sockets. If we would want to add native eBPF support in the future, this needs to be done through a different attribute than PACKET_DIAG_FILTER to not confuse possible user space disassemblers that work on diag data. Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") Signed-off-by: Daniel Borkmann Acked-by: Nicolas Dichtel Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock_diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 74dddf84adcdd..556ecf96a385b 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -86,6 +86,9 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, goto out; fprog = filter->prog->orig_prog; + if (!fprog) + goto out; + flen = bpf_classic_proglen(fprog); attr = nla_reserve(skb, attrtype, flen); From 8d14167cc854117c94f02f5ba612689869b3d9a4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 3 Sep 2015 00:29:07 +0200 Subject: [PATCH 0967/2091] ipv6: fix exthdrs offload registration in out_rt path [ Upstream commit e41b0bedba0293b9e1e8d1e8ed553104b9693656 ] We previously register IPPROTO_ROUTING offload under inet6_add_offload(), but in error path, we try to unregister it with inet_del_offload(). This doesn't seem correct, it should actually be inet6_del_offload(), also ipv6_exthdrs_offload_exit() from that commit seems rather incorrect (it also uses rthdr_offload twice), but it got removed entirely later on. Fixes: 3336288a9fea ("ipv6: Switch to using new offload infrastructure.") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/exthdrs_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index 447a7fbd1bb6f..f5e2ba1c18bf8 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -36,6 +36,6 @@ int __init ipv6_exthdrs_offload_init(void) return ret; out_rt: - inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING); goto out; } From 5e06748086650d178186181c2bfe2518bb959fd5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 2 Sep 2015 17:24:14 +0800 Subject: [PATCH 0968/2091] net: fec: clear receive interrupts before processing a packet [ Upstream commit ed63f1dcd5788d36f942fbcce350742385e3e18c ] The patch just to re-submit the patch "db3421c114cfa6326" because the patch "4d494cdc92b3b9a0" remove the change. Clear any pending receive interrupt before we process a pending packet. This helps to avoid any spurious interrupts being raised after we have fully cleaned the receive ring, while still allowing an interrupt to be raised if we receive another packet. The position of this is critical: we must do this prior to reading the next packet status to avoid potentially dropping an interrupt when a packet is still pending. Acked-by: Fugang Duan Signed-off-by: Russell King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/fec_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 66d47e448e4d1..570390b5cd42f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1396,6 +1396,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) if ((status & BD_ENET_RX_LAST) == 0) netdev_err(ndev, "rcv is not +last\n"); + writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); /* Check for errors. */ if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | From 8bb9225a7bd3a07aadd58b19a633f1f7a8884d01 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 2 Sep 2015 17:49:29 +0900 Subject: [PATCH 0969/2091] net: eth: altera: fix napi poll_list corruption [ Upstream commit 4548a697e4969d695047cebd6d9af5e2f6cc728e ] tse_poll() calls __napi_complete() with irq enabled. This leads napi poll_list corruption and may stop all napi drivers working. Use napi_complete() instead of __napi_complete(). Signed-off-by: Atsushi Nemoto Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/altera/altera_tse_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index da48e66377b5f..8207877d62376 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -511,8 +511,7 @@ static int tse_poll(struct napi_struct *napi, int budget) if (rxcomplete < budget) { - napi_gro_flush(napi, false); - __napi_complete(napi); + napi_complete(napi); netdev_dbg(priv->dev, "NAPI Complete, did %d packets with budget %d\n", From 5008d77ef8b27c8c464e35d290ec4fa3adb5f9e1 Mon Sep 17 00:00:00 2001 From: Richard Laing Date: Thu, 3 Sep 2015 13:52:31 +1200 Subject: [PATCH 0970/2091] net/ipv6: Correct PIM6 mrt_lock handling [ Upstream commit 25b4a44c19c83d98e8c0807a7ede07c1f28eab8b ] In the IPv6 multicast routing code the mrt_lock was not being released correctly in the MFC iterator, as a result adding or deleting a MIF would cause a hang because the mrt_lock could not be acquired. This fix is a copy of the code for the IPv4 case and ensures that the lock is released correctly. Signed-off-by: Richard Laing Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 74ceb73c1c9a0..5f36266b1f5ef 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -550,7 +550,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mrt->mfc6_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mrt->mfc6_cache_array) + else if (it->cache == &mrt->mfc6_cache_array[it->ct]) read_unlock(&mrt_lock); } From 0f5262e8ddb6e7264724de0d9ec220d13b2ef1ff Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 5 Sep 2015 13:07:27 -0700 Subject: [PATCH 0971/2091] net: dsa: bcm_sf2: Fix ageing conditions and operation [ Upstream commit 39797a279d62972cd914ef580fdfacb13e508bf8 ] The comparison check between cur_hw_state and hw_state is currently invalid because cur_hw_state is right shifted by G_MISTP_SHIFT, while hw_state is not, so we end-up comparing bits 2:0 with bits 7:5, which is going to cause an additional aging to occur. Fix this by not shifting cur_hw_state while reading it, but instead, mask the value with the appropriately shitfted bitmask. The other problem with the fast-ageing process is that we did not set the EN_AGE_DYNAMIC bit to request the ageing to occur for dynamically learned MAC addresses. Finally, write back 0 to the FAST_AGE_CTRL register to avoid leaving spurious bits sets from one operation to the other. Fixes: 12f460f23423 ("net: dsa: bcm_sf2: add HW bridging support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cedb572bf25af..f24c4febd85d9 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -417,7 +417,7 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) core_writel(priv, port, CORE_FAST_AGE_PORT); reg = core_readl(priv, CORE_FAST_AGE_CTRL); - reg |= EN_AGE_PORT | FAST_AGE_STR_DONE; + reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE; core_writel(priv, reg, CORE_FAST_AGE_CTRL); do { @@ -431,6 +431,8 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) if (!timeout) return -ETIMEDOUT; + core_writel(priv, 0, CORE_FAST_AGE_CTRL); + return 0; } @@ -506,7 +508,7 @@ static int bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, u32 reg; reg = core_readl(priv, CORE_G_PCTL_PORT(port)); - cur_hw_state = reg >> G_MISTP_STATE_SHIFT; + cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); switch (state) { case BR_STATE_DISABLED: @@ -530,10 +532,12 @@ static int bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, } /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding state to Disabled, Blocking or Listening state + * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening + * state (hw_state) */ if (cur_hw_state != hw_state) { - if (cur_hw_state & 4 && !(hw_state & 4)) { + if (cur_hw_state >= G_MISTP_LEARN_STATE && + hw_state <= G_MISTP_LISTEN_STATE) { ret = bcm_sf2_sw_fast_age_port(ds, port); if (ret) { pr_err("%s: fast-ageing failed\n", __func__); From c59231c8788b8c11d806fb08996e6135821eedf6 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 8 Sep 2015 10:53:04 -0700 Subject: [PATCH 0972/2091] ipv6: fix multipath route replace error recovery [ Upstream commit 6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e ] Problem: The ecmp route replace support for ipv6 in the kernel, deletes the existing ecmp route too early, ie when it installs the first nexthop. If there is an error in installing the subsequent nexthops, its too late to recover the already deleted existing route leaving the fib in an inconsistent state. This patch reduces the possibility of this by doing the following: a) Changes the existing multipath route add code to a two stage process: build rt6_infos + insert them ip6_route_add rt6_info creation code is moved into ip6_route_info_create. b) This ensures that most errors are caught during building rt6_infos and we fail early c) Separates multipath add and del code. Because add needs the special two stage mode in a) and delete essentially does not care. d) In any event if the code fails during inserting a route again, a warning is printed (This should be unlikely) Before the patch: $ip -6 route show 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 /* Try replacing the route with a duplicate nexthop */ $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 RTNETLINK answers: File exists $ip -6 route show /* previously added ecmp route 3000:1000:1000:1000::2 dissappears from * kernel */ After the patch: $ip -6 route show 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 /* Try replacing the route with a duplicate nexthop */ $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 RTNETLINK answers: File exists $ip -6 route show 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Roopa Prabhu Reviewed-by: Nikolay Aleksandrov Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 201 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 175 insertions(+), 26 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c73ae5039e46d..f371fefa7fdcc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1515,7 +1515,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, return -EINVAL; } -int ip6_route_add(struct fib6_config *cfg) +int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) { int err; struct net *net = cfg->fc_nlinfo.nl_net; @@ -1523,7 +1523,6 @@ int ip6_route_add(struct fib6_config *cfg) struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; - struct mx6_config mxc = { .mx = NULL, }; int addr_type; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) @@ -1719,6 +1718,32 @@ int ip6_route_add(struct fib6_config *cfg) cfg->fc_nlinfo.nl_net = dev_net(dev); + *rt_ret = rt; + + return 0; +out: + if (dev) + dev_put(dev); + if (idev) + in6_dev_put(idev); + if (rt) + dst_free(&rt->dst); + + *rt_ret = NULL; + + return err; +} + +int ip6_route_add(struct fib6_config *cfg) +{ + struct mx6_config mxc = { .mx = NULL, }; + struct rt6_info *rt = NULL; + int err; + + err = ip6_route_info_create(cfg, &rt); + if (err) + goto out; + err = ip6_convert_metrics(&mxc, cfg); if (err) goto out; @@ -1726,14 +1751,12 @@ int ip6_route_add(struct fib6_config *cfg) err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); kfree(mxc.mx); + return err; out: - if (dev) - dev_put(dev); - if (idev) - in6_dev_put(idev); if (rt) dst_free(&rt->dst); + return err; } @@ -2496,19 +2519,78 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int ip6_route_multipath(struct fib6_config *cfg, int add) +struct rt6_nh { + struct rt6_info *rt6_info; + struct fib6_config r_cfg; + struct mx6_config mxc; + struct list_head next; +}; + +static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) +{ + struct rt6_nh *nh; + + list_for_each_entry(nh, rt6_nh_list, next) { + pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", + &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, + nh->r_cfg.fc_ifindex); + } +} + +static int ip6_route_info_append(struct list_head *rt6_nh_list, + struct rt6_info *rt, struct fib6_config *r_cfg) +{ + struct rt6_nh *nh; + struct rt6_info *rtnh; + int err = -EEXIST; + + list_for_each_entry(nh, rt6_nh_list, next) { + /* check if rt6_info already exists */ + rtnh = nh->rt6_info; + + if (rtnh->dst.dev == rt->dst.dev && + rtnh->rt6i_idev == rt->rt6i_idev && + ipv6_addr_equal(&rtnh->rt6i_gateway, + &rt->rt6i_gateway)) + return err; + } + + nh = kzalloc(sizeof(*nh), GFP_KERNEL); + if (!nh) + return -ENOMEM; + nh->rt6_info = rt; + err = ip6_convert_metrics(&nh->mxc, r_cfg); + if (err) { + kfree(nh); + return err; + } + memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); + list_add_tail(&nh->next, rt6_nh_list); + + return 0; +} + +static int ip6_route_multipath_add(struct fib6_config *cfg) { struct fib6_config r_cfg; struct rtnexthop *rtnh; + struct rt6_info *rt; + struct rt6_nh *err_nh; + struct rt6_nh *nh, *nh_safe; int remaining; int attrlen; - int err = 0, last_err = 0; + int err = 1; + int nhn = 0; + int replace = (cfg->fc_nlinfo.nlh && + (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); + LIST_HEAD(rt6_nh_list); remaining = cfg->fc_mp_len; -beginning: rtnh = (struct rtnexthop *)cfg->fc_mp; - /* Parse a Multipath Entry */ + /* Parse a Multipath Entry and build a list (rt6_nh_list) of + * rt6_info structs per nexthop + */ while (rtnh_ok(rtnh, remaining)) { memcpy(&r_cfg, cfg, sizeof(*cfg)); if (rtnh->rtnh_ifindex) @@ -2524,22 +2606,32 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add) r_cfg.fc_flags |= RTF_GATEWAY; } } - err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); + + err = ip6_route_info_create(&r_cfg, &rt); + if (err) + goto cleanup; + + err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); if (err) { - last_err = err; - /* If we are trying to remove a route, do not stop the - * loop when ip6_route_del() fails (because next hop is - * already gone), we should try to remove all next hops. - */ - if (add) { - /* If add fails, we should try to delete all - * next hops that have been already added. - */ - add = 0; - remaining = cfg->fc_mp_len - remaining; - goto beginning; - } + dst_free(&rt->dst); + goto cleanup; + } + + rtnh = rtnh_next(rtnh, &remaining); + } + + err_nh = NULL; + list_for_each_entry(nh, &rt6_nh_list, next) { + err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); + /* nh->rt6_info is used or freed at this point, reset to NULL*/ + nh->rt6_info = NULL; + if (err) { + if (replace && nhn) + ip6_print_replace_route_err(&rt6_nh_list); + err_nh = nh; + goto add_errout; } + /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, * we have already failed to add the first nexthop: @@ -2549,6 +2641,63 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add) */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + nhn++; + } + + goto cleanup; + +add_errout: + /* Delete routes that were already added */ + list_for_each_entry(nh, &rt6_nh_list, next) { + if (err_nh == nh) + break; + ip6_route_del(&nh->r_cfg); + } + +cleanup: + list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { + if (nh->rt6_info) + dst_free(&nh->rt6_info->dst); + if (nh->mxc.mx) + kfree(nh->mxc.mx); + list_del(&nh->next); + kfree(nh); + } + + return err; +} + +static int ip6_route_multipath_del(struct fib6_config *cfg) +{ + struct fib6_config r_cfg; + struct rtnexthop *rtnh; + int remaining; + int attrlen; + int err = 1, last_err = 0; + + remaining = cfg->fc_mp_len; + rtnh = (struct rtnexthop *)cfg->fc_mp; + + /* Parse a Multipath Entry */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_flags |= RTF_GATEWAY; + } + } + err = ip6_route_del(&r_cfg); + if (err) + last_err = err; + rtnh = rtnh_next(rtnh, &remaining); } @@ -2565,7 +2714,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) return err; if (cfg.fc_mp) - return ip6_route_multipath(&cfg, 0); + return ip6_route_multipath_del(&cfg); else return ip6_route_del(&cfg); } @@ -2580,7 +2729,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) return err; if (cfg.fc_mp) - return ip6_route_multipath(&cfg, 1); + return ip6_route_multipath_add(&cfg); else return ip6_route_add(&cfg); } From 85b722085574a327e617f049423dab3faac64a04 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 8 Sep 2015 20:06:41 -0700 Subject: [PATCH 0973/2091] net: dsa: bcm_sf2: Fix 64-bits register writes [ Upstream commit 03679a14739a0d4c14b52ba65a69ff553bfba73b ] The macro to write 64-bits quantities to the 32-bits register swapped the value and offsets arguments, we want to preserve the ordering of the arguments with respect to how writel() is implemented for instance: value first, offset/base second. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 22e2ebf313332..789d7b7737da4 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -112,8 +112,8 @@ static inline u64 name##_readq(struct bcm_sf2_priv *priv, u32 off) \ spin_unlock(&priv->indir_lock); \ return (u64)indir << 32 | dir; \ } \ -static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off, \ - u64 val) \ +static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \ + u32 off) \ { \ spin_lock(&priv->indir_lock); \ reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE); \ From 65d48c630ff80a19c39751a4a6d3315f4c3c0280 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 10 Sep 2015 20:05:46 +0200 Subject: [PATCH 0974/2091] netlink, mmap: transform mmap skb into full skb on taps [ Upstream commit 1853c949646005b5959c483becde86608f548f24 ] Ken-ichirou reported that running netlink in mmap mode for receive in combination with nlmon will throw a NULL pointer dereference in __kfree_skb() on nlmon_xmit(), in my case I can also trigger an "unable to handle kernel paging request". The problem is the skb_clone() in __netlink_deliver_tap_skb() for skbs that are mmaped. I.e. the cloned skb doesn't have a destructor, whereas the mmap netlink skb has it pointed to netlink_skb_destructor(), set in the handler netlink_ring_setup_skb(). There, skb->head is being set to NULL, so that in such cases, __kfree_skb() doesn't perform a skb_release_data() via skb_release_all(), where skb->head is possibly being freed through kfree(head) into slab allocator, although netlink mmap skb->head points to the mmap buffer. Similarly, the same has to be done also for large netlink skbs where the data area is vmalloced. Therefore, as discussed, make a copy for these rather rare cases for now. This fixes the issue on my and Ken-ichirou's test-cases. Reference: http://thread.gmane.org/gmane.linux.network/371129 Fixes: bcbde0d449ed ("net: netlink: virtual tap device management") Reported-by: Ken-ichirou MATSUZAWA Signed-off-by: Daniel Borkmann Tested-by: Ken-ichirou MATSUZAWA Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 30 +++++++++++++++++++++++------- net/netlink/af_netlink.h | 9 +++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4856d975492d9..85aeaf27f1b54 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -123,6 +123,24 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } +static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, + gfp_t gfp_mask) +{ + unsigned int len = skb_end_offset(skb); + struct sk_buff *new; + + new = alloc_skb(len, gfp_mask); + if (new == NULL) + return NULL; + + NETLINK_CB(new).portid = NETLINK_CB(skb).portid; + NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; + NETLINK_CB(new).creds = NETLINK_CB(skb).creds; + + memcpy(skb_put(new, len), skb->data, len); + return new; +} + int netlink_add_tap(struct netlink_tap *nt) { if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -204,7 +222,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, int ret = -ENOMEM; dev_hold(dev); - nskb = skb_clone(skb, GFP_ATOMIC); + + if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + nskb = netlink_to_full_skb(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); @@ -276,11 +298,6 @@ static void netlink_rcv_wake(struct sock *sk) } #ifdef CONFIG_NETLINK_MMAP -static bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -} - static bool netlink_rx_is_mmaped(struct sock *sk) { return nlk_sk(sk)->rx_ring.pg_vec != NULL; @@ -832,7 +849,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) } #else /* CONFIG_NETLINK_MMAP */ -#define netlink_skb_is_mmaped(skb) false #define netlink_rx_is_mmaped(sk) false #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 89008405d6b4d..df9a06090db61 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -59,6 +59,15 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } +static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ +#ifdef CONFIG_NETLINK_MMAP + return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +#else + return false; +#endif /* CONFIG_NETLINK_MMAP */ +} + struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; From 5cadd6bac523e5e78ae18284e5f2b286ebff070b Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 10 Sep 2015 17:31:15 -0300 Subject: [PATCH 0975/2091] sctp: fix race on protocol/netns initialization [ Upstream commit 8e2d61e0aed2b7c4ecb35844fe07e0b2b762dee4 ] Consider sctp module is unloaded and is being requested because an user is creating a sctp socket. During initialization, sctp will add the new protocol type and then initialize pernet subsys: status = sctp_v4_protosw_init(); if (status) goto err_protosw_init; status = sctp_v6_protosw_init(); if (status) goto err_v6_protosw_init; status = register_pernet_subsys(&sctp_net_ops); The problem is that after those calls to sctp_v{4,6}_protosw_init(), it is possible for userspace to create SCTP sockets like if the module is already fully loaded. If that happens, one of the possible effects is that we will have readers for net->sctp.local_addr_list list earlier than expected and sctp_net_init() does not take precautions while dealing with that list, leading to a potential panic but not limited to that, as sctp_sock_init() will copy a bunch of blank/partially initialized values from net->sctp. The race happens like this: CPU 0 | CPU 1 socket() | __sock_create | socket() inet_create | __sock_create list_for_each_entry_rcu( | answer, &inetsw[sock->type], | list) { | inet_create /* no hits */ | if (unlikely(err)) { | ... | request_module() | /* socket creation is blocked | * the module is fully loaded | */ | sctp_init | sctp_v4_protosw_init | inet_register_protosw | list_add_rcu(&p->list, | last_perm); | | list_for_each_entry_rcu( | answer, &inetsw[sock->type], sctp_v6_protosw_init | list) { | /* hit, so assumes protocol | * is already loaded | */ | /* socket creation continues | * before netns is initialized | */ register_pernet_subsys | Simply inverting the initialization order between register_pernet_subsys() and sctp_v4_protosw_init() is not possible because register_pernet_subsys() will create a control sctp socket, so the protocol must be already visible by then. Deferring the socket creation to a work-queue is not good specially because we loose the ability to handle its errors. So, as suggested by Vlad, the fix is to split netns initialization in two moments: defaults and control socket, so that the defaults are already loaded by when we register the protocol, while control socket initialization is kept at the same moment it is today. Fixes: 4db67e808640 ("sctp: Make the address lists per network namespace") Signed-off-by: Vlad Yasevich Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/protocol.c | 64 +++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53b7acde9aa37..e13c3c3ea4ac1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1166,7 +1166,7 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } -static int __net_init sctp_net_init(struct net *net) +static int __net_init sctp_defaults_init(struct net *net) { int status; @@ -1259,12 +1259,6 @@ static int __net_init sctp_net_init(struct net *net) sctp_dbg_objcnt_init(net); - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init(net))) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } - /* Initialize the local address list. */ INIT_LIST_HEAD(&net->sctp.local_addr_list); spin_lock_init(&net->sctp.local_addr_lock); @@ -1280,9 +1274,6 @@ static int __net_init sctp_net_init(struct net *net) return 0; -err_ctl_sock_init: - sctp_dbg_objcnt_exit(net); - sctp_proc_exit(net); err_init_proc: cleanup_sctp_mibs(net); err_init_mibs: @@ -1291,15 +1282,12 @@ static int __net_init sctp_net_init(struct net *net) return status; } -static void __net_exit sctp_net_exit(struct net *net) +static void __net_exit sctp_defaults_exit(struct net *net) { /* Free the local address list */ sctp_free_addr_wq(net); sctp_free_local_addr_list(net); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(net->sctp.ctl_sock); - sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); @@ -1307,9 +1295,32 @@ static void __net_exit sctp_net_exit(struct net *net) sctp_sysctl_net_unregister(net); } -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, +static struct pernet_operations sctp_defaults_ops = { + .init = sctp_defaults_init, + .exit = sctp_defaults_exit, +}; + +static int __net_init sctp_ctrlsock_init(struct net *net) +{ + int status; + + /* Initialize the control inode/socket for handling OOTB packets. */ + status = sctp_ctl_sock_init(net); + if (status) + pr_err("Failed to initialize the SCTP control sock\n"); + + return status; +} + +static void __net_init sctp_ctrlsock_exit(struct net *net) +{ + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); +} + +static struct pernet_operations sctp_ctrlsock_ops = { + .init = sctp_ctrlsock_init, + .exit = sctp_ctrlsock_exit, }; /* Initialize the universe into something sensible. */ @@ -1442,8 +1453,11 @@ static __init int sctp_init(void) sctp_v4_pf_init(); sctp_v6_pf_init(); - status = sctp_v4_protosw_init(); + status = register_pernet_subsys(&sctp_defaults_ops); + if (status) + goto err_register_defaults; + status = sctp_v4_protosw_init(); if (status) goto err_protosw_init; @@ -1451,9 +1465,9 @@ static __init int sctp_init(void) if (status) goto err_v6_protosw_init; - status = register_pernet_subsys(&sctp_net_ops); + status = register_pernet_subsys(&sctp_ctrlsock_ops); if (status) - goto err_register_pernet_subsys; + goto err_register_ctrlsock; status = sctp_v4_add_protocol(); if (status) @@ -1469,12 +1483,14 @@ static __init int sctp_init(void) err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: - unregister_pernet_subsys(&sctp_net_ops); -err_register_pernet_subsys: + unregister_pernet_subsys(&sctp_ctrlsock_ops); +err_register_ctrlsock: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: + unregister_pernet_subsys(&sctp_defaults_ops); +err_register_defaults: sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1507,12 +1523,14 @@ static __exit void sctp_exit(void) sctp_v6_del_protocol(); sctp_v4_del_protocol(); - unregister_pernet_subsys(&sctp_net_ops); + unregister_pernet_subsys(&sctp_ctrlsock_ops); /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); + unregister_pernet_subsys(&sctp_defaults_ops); + /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); From 022ce825bec2cba19bfb3cc5473df4347851c506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 11 Sep 2015 18:39:48 +0200 Subject: [PATCH 0976/2091] bridge: fix igmpv3 / mldv2 report parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c2d4fbd2163e607915cc05798ce7fb7f31117cc1 ] With the newly introduced helper functions the skb pulling is hidden in the checksumming function - and undone before returning to the caller. The IGMPv3 and MLDv2 report parsing functions in the bridge still assumed that the skb is pointing to the beginning of the IGMP/MLD message while it is now kept at the beginning of the IPv4/6 header, breaking the message parsing and creating packet loss. Fixing this by taking the offset between IP and IGMP/MLD header into account, too. Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Tobias Powalowski Tested-by: Tobias Powalowski Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ff667e18b2d63..9ba383f5b0c4b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -980,7 +980,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); - len = sizeof(*ih); + len = skb_transport_offset(skb) + sizeof(*ih); for (i = 0; i < num; i++) { len += sizeof(*grec); @@ -1035,7 +1035,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); - len = sizeof(*icmp6h); + len = skb_transport_offset(skb) + sizeof(*icmp6h); for (i = 0; i < num; i++) { __be16 *nsrcs, _nsrcs; From fd0a1a9da244fe54dd7756e25a074c5cb12fe444 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Sep 2015 18:29:47 -0700 Subject: [PATCH 0977/2091] net/mlx4_en: really allow to change RSS key [ Upsteam commit 4671fc6d47e0a0108fe24a4d830347d6a6ef4aa7 ] When changing rss key, we do not want to overwrite user provided key by the one provided by netdev_rss_key_fill(), which is the host random key generated at boot time. Fixes: 947cbb0ac242 ("net/mlx4_en: Support for configurable RSS hash function") Signed-off-by: Eric Dumazet Cc: Eyal Perry CC: Amir Vadai Acked-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index eab4e080ebd2d..80aac20104de2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1256,8 +1256,6 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->hash_fn = MLX4_RSS_HASH_TOP; memcpy(rss_context->rss_key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); - netdev_rss_key_fill(rss_context->rss_key, - MLX4_EN_RSS_KEY_SIZE); } else { en_err(priv, "Unknown RSS hash function requested\n"); err = -EINVAL; From cf9cf6bc253c5d83a8cb593d4ed7042bb4f04e7c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 18 Sep 2015 13:41:09 +0300 Subject: [PATCH 0978/2091] macvtap: fix TUNSETSNDBUF values > 64k [ Upstream commit 3ea79249e81e5ed051f2e6480cbde896d99046e8 ] Upon TUNSETSNDBUF, macvtap reads the requested sndbuf size into a local variable u. commit 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF") changed its type to u16 (which is the right thing to do for all other macvtap ioctls), breaking all values > 64k. The value of TUNSETSNDBUF is actually a signed 32 bit integer, so the right thing to do is to read it into an int. Cc: David S. Miller Fixes: 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF") Reported-by: Mark A. Peloquin Bisected-by: Matthew Rosato Reported-by: Christian Borntraeger Signed-off-by: Michael S. Tsirkin Tested-by: Matthew Rosato Acked-by: Christian Borntraeger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ada..58858c5589db7 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1054,10 +1054,10 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return 0; case TUNSETSNDBUF: - if (get_user(u, up)) + if (get_user(s, sp)) return -EFAULT; - q->sk.sk_sndbuf = u; + q->sk.sk_sndbuf = s; return 0; case TUNGETVNETHDRSZ: From 6d80e35071740489cf3b4d7a0d33bf732dd8ab75 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 21 Sep 2015 20:21:20 -0700 Subject: [PATCH 0979/2091] openvswitch: Zero flows on allocation. [ Upstream commit ae5f2fb1d51fa128a460bcfbe3c56d7ab8bf6a43 ] When support for megaflows was introduced, OVS needed to start installing flows with a mask applied to them. Since masking is an expensive operation, OVS also had an optimization that would only take the parts of the flow keys that were covered by a non-zero mask. The values stored in the remaining pieces should not matter because they are masked out. While this works fine for the purposes of matching (which must always look at the mask), serialization to netlink can be problematic. Since the flow and the mask are serialized separately, the uninitialized portions of the flow can be encoded with whatever values happen to be present. In terms of functionality, this has little effect since these fields will be masked out by definition. However, it leaks kernel memory to userspace, which is a potential security vulnerability. It is also possible that other code paths could look at the masked key and get uninitialized data, although this does not currently appear to be an issue in practice. This removes the mask optimization for flows that are being installed. This was always intended to be the case as the mask optimizations were really targetting per-packet flow operations. Fixes: 03f0d916 ("openvswitch: Mega flow implementation") Signed-off-by: Jesse Gross Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/datapath.c | 4 ++-- net/openvswitch/flow_table.c | 23 ++++++++++++----------- net/openvswitch/flow_table.h | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 096c6276e6b92..27e14962b5046 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -906,7 +906,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, true, &mask); /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], @@ -1033,7 +1033,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, struct sw_flow_key masked_key; int error; - ovs_flow_mask_key(&masked_key, key, mask); + ovs_flow_mask_key(&masked_key, key, true, mask); error = ovs_nla_copy_actions(a, &masked_key, &acts, log); if (error) { OVS_NLERR(log, diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4613df8c82900..aa349514e4cb5 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -56,20 +56,21 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) } void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) + bool full, const struct sw_flow_mask *mask) { - const long *m = (const long *)((const u8 *)&mask->key + - mask->range.start); - const long *s = (const long *)((const u8 *)src + - mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); + int start = full ? 0 : mask->range.start; + int len = full ? sizeof *dst : range_n_bytes(&mask->range); + const long *m = (const long *)((const u8 *)&mask->key + start); + const long *s = (const long *)((const u8 *)src + start); + long *d = (long *)((u8 *)dst + start); int i; - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. + /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, + * if 'full' is false the memory outside of the 'mask->range' is left + * uninitialized. This can be used as an optimization when further + * operations on 'dst' only use contents within 'mask->range'. */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + for (i = 0; i < len; i += sizeof(long)) *d++ = *s++ & *m++; } @@ -473,7 +474,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, u32 hash; struct sw_flow_key masked_key; - ovs_flow_mask_key(&masked_key, unmasked, mask); + ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 616eda10d9554..2dd9900f533df 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); + bool full, const struct sw_flow_mask *mask); #endif /* flow_table.h */ From c3647e60c838b88e9cabe208b3eeea2c5eceaf85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Sep 2015 14:00:21 -0700 Subject: [PATCH 0980/2091] tcp: add proper TS val into RST packets [ Upstream commit 675ee231d960af2af3606b4480324e26797eb010 ] RST packets sent on behalf of TCP connections with TS option (RFC 7323 TCP timestamps) have incorrect TS val (set to 0), but correct TS ecr. A > B: Flags [S], seq 0, win 65535, options [mss 1000,nop,nop,TS val 100 ecr 0], length 0 B > A: Flags [S.], seq 2444755794, ack 1, win 28960, options [mss 1460,nop,nop,TS val 7264344 ecr 100], length 0 A > B: Flags [.], ack 1, win 65535, options [nop,nop,TS val 110 ecr 7264344], length 0 B > A: Flags [R.], seq 1, ack 1, win 28960, options [nop,nop,TS val 0 ecr 110], length 0 We need to call skb_mstamp_get() to get proper TS val, derived from skb->skb_mstamp Note that RFC 1323 was advocating to not send TS option in RST segment, but RFC 7323 recommends the opposite : Once TSopt has been successfully negotiated, that is both and contain TSopt, the TSopt MUST be sent in every non- segment for the duration of the connection, and SHOULD be sent in an segment (see Section 5.2 for details) Note this RFC recommends to send TS val = 0, but we believe it is premature : We do not know if all TCP stacks are properly handling the receive side : When an segment is received, it MUST NOT be subjected to the PAWS check by verifying an acceptable value in SEG.TSval, and information from the Timestamps option MUST NOT be used to update connection state information. SEG.TSecr MAY be used to provide stricter acceptance checks. In 5 years, if/when all TCP stack are RFC 7323 ready, we might consider to decide to send TS val = 0, if it buys something. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a369e8a70b2c7..986440b249784 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2893,6 +2893,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); + skb_mstamp_get(&skb->skb_mstamp); /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); From 74bff4a07790822d7f7787d3e67b13b94e8f78a9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 22 Sep 2015 17:01:11 -0700 Subject: [PATCH 0981/2091] net: revert "net_sched: move tp->root allocation into fw_init()" [ Upstream commit d8aecb10115497f6cdf841df8c88ebb3ba25fa28 ] fw filter uses tp->root==NULL to check if it is the old method, so it doesn't need allocation at all in this case. This patch reverts the offending commit and adds some comments for old method to make it obvious. Fixes: 33f8b9ecdb15 ("net_sched: move tp->root allocation into fw_init()") Reported-by: Akshat Kakkar Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_fw.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 715e01e5910a9..f23a3b68bba63 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -33,7 +33,6 @@ struct fw_head { u32 mask; - bool mask_set; struct fw_filter __rcu *ht[HTSIZE]; struct rcu_head rcu; }; @@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, } } } else { - /* old method */ + /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id ^ tp->q->handle)))) { res->classid = id; @@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle) static int fw_init(struct tcf_proto *tp) { - struct fw_head *head; - - head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); - if (head == NULL) - return -ENOBUFS; - - head->mask_set = false; - rcu_assign_pointer(tp->root, head); + /* We don't allocate fw_head here, because in the old method + * we don't need it at all. + */ return 0; } @@ -252,7 +246,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, int err; if (!opt) - return handle ? -EINVAL : 0; + return handle ? -EINVAL : 0; /* Succeed if it is old method. */ err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); if (err < 0) @@ -302,11 +296,17 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!handle) return -EINVAL; - if (!head->mask_set) { - head->mask = 0xFFFFFFFF; + if (!head) { + u32 mask = 0xFFFFFFFF; if (tb[TCA_FW_MASK]) - head->mask = nla_get_u32(tb[TCA_FW_MASK]); - head->mask_set = true; + mask = nla_get_u32(tb[TCA_FW_MASK]); + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOBUFS; + head->mask = mask; + + rcu_assign_pointer(tp->root, head); } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); From fce134644303a4ebe0892edc9af7d46e0ebda9ab Mon Sep 17 00:00:00 2001 From: Wilson Kok Date: Tue, 22 Sep 2015 21:40:22 -0700 Subject: [PATCH 0982/2091] fib_rules: fix fib rule dumps across multiple skbs [ Upstream commit 41fc014332d91ee90c32840bf161f9685b7fbf2b ] dump_rules returns skb length and not error. But when family == AF_UNSPEC, the caller of dump_rules assumes that it returns an error. Hence, when family == AF_UNSPEC, we continue trying to dump on -EMSGSIZE errors resulting in incorrect dump idx carried between skbs belonging to the same dump. This results in fib rule dump always only dumping rules that fit into the first skb. This patch fixes dump_rules to return error so that we exit correctly and idx is correctly maintained between skbs that are part of the same dump. Signed-off-by: Wilson Kok Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/fib_rules.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a12668f7d627..0ad144fb0c792 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -615,15 +615,17 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, { int idx = 0; struct fib_rule *rule; + int err = 0; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWRULE, - NLM_F_MULTI, ops) < 0) + err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops); + if (err) break; skip: idx++; @@ -632,7 +634,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, cb->args[1] = idx; rules_ops_put(ops); - return skb->len; + return err; } static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) @@ -648,7 +650,9 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) if (ops == NULL) return -EAFNOSUPPORT; - return dump_rules(skb, cb, ops); + dump_rules(skb, cb, ops); + + return skb->len; } rcu_read_lock(); From 90eb52c94901af7f78f9751f8ffff67fbb0bf741 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Jul 2015 17:49:55 -0700 Subject: [PATCH 0983/2091] net: dsa: bcm_sf2: Do not override speed settings [ Upstream d2eac98f7d1b950b762a7eca05a9ce0ea1d878d2 in net-next tree, will be pushed to Linus very soon. ] The SF2 driver currently overrides speed settings for its port configured using a fixed PHY, this is both unnecessary and incorrect, because we keep feedback to the hardware parameters that we read from the PHY device, which in the case of a fixed PHY cannot possibly change speed. This is a required change to allow the fixed PHY code to allow registering a PHY with a link configured as DOWN by default and avoid some sort of circular dependency where we require the link_update callback to run to program the hardware, and we then utilize the fixed PHY parameters to program the hardware with the same settings. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index f24c4febd85d9..db9ebbc1a7325 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -893,15 +893,11 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, struct fixed_phy_status *status) { struct bcm_sf2_priv *priv = ds_to_priv(ds); - u32 duplex, pause, speed; + u32 duplex, pause; u32 reg; duplex = core_readl(priv, CORE_DUPSTS); pause = core_readl(priv, CORE_PAUSESTS); - speed = core_readl(priv, CORE_SPDSTS); - - speed >>= (port * SPDSTS_SHIFT); - speed &= SPDSTS_MASK; status->link = 0; @@ -929,18 +925,6 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, reg &= ~LINK_STS; core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); - switch (speed) { - case SPDSTS_10: - status->speed = SPEED_10; - break; - case SPDSTS_100: - status->speed = SPEED_100; - break; - case SPDSTS_1000: - status->speed = SPEED_1000; - break; - } - if ((pause & (1 << port)) && (pause & (1 << (port + PAUSESTS_TX_PAUSE_SHIFT)))) { status->asym_pause = 1; From 282117acdfac7b5d621b18f002a9751d7f583daf Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 20 Jul 2015 17:49:56 -0700 Subject: [PATCH 0984/2091] net: phy: fixed_phy: handle link-down case [ Upstream 868a4215be9a6d80548ccb74763b883dc99d32a2 in net-next tree, will be pushed to Linus very soon. ] fixed_phy_register() currently hardcodes the fixed PHY link to 1, and expects to find a "speed" parameter to provide correct information towards the fixed PHY consumer. In a subsequent change, where we allow "managed" (e.g: (RS)GMII in-band status auto-negotiation) fixed PHYs, none of these parameters can be provided since they will be auto-negotiated, hence, we just provide a zero-initialized fixed_phy_status to fixed_phy_register() which makes it fail when we call fixed_phy_update_regs() since status.speed = 0 which makes us hit the "default" label and error out. Without this change, we would also see potentially inconsistent speed/duplex parameters for fixed PHYs when the link is DOWN. CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Stas Sergeev [florian: add more background to why this is correct and desirable] Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/fixed_phy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 1960b46add65b..479b93f9581c4 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -52,6 +52,10 @@ static int fixed_phy_update_regs(struct fixed_phy *fp) u16 lpagb = 0; u16 lpa = 0; + if (!fp->status.link) + goto done; + bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE; + if (fp->status.duplex) { bmcr |= BMCR_FULLDPLX; @@ -96,15 +100,13 @@ static int fixed_phy_update_regs(struct fixed_phy *fp) } } - if (fp->status.link) - bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE; - if (fp->status.pause) lpa |= LPA_PAUSE_CAP; if (fp->status.asym_pause) lpa |= LPA_PAUSE_ASYM; +done: fp->regs[MII_PHYSID1] = 0; fp->regs[MII_PHYSID2] = 0; From ebfd3e10bf111a7954d75fc4723b3fb3450483e0 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 20 Jul 2015 17:49:57 -0700 Subject: [PATCH 0985/2091] of_mdio: add new DT property 'managed' to specify the PHY management type [ Upstream commit 4cba5c2103657d43d0886e4cff8004d95a3d0def in net-next tree, will be pushed to Linus very soon. ] Currently the PHY management type is selected by the MAC driver arbitrary. The decision is based on the presence of the "fixed-link" node and on a will of the driver's authors. This caused a regression recently, when mvneta driver suddenly started to use the in-band status for auto-negotiation on fixed links. It appears the auto-negotiation may not work when expected by the MAC driver. Sebastien Rannou explains: << Yes, I confirm that my HW does not generate an in-band status. AFAIK, it's a PHY that aggregates 4xSGMIIs to 1xQSGMII ; the MAC side of the PHY (with inband status) is connected to the switch through QSGMII, and in this context we are on the media side of the PHY. >> https://lkml.org/lkml/2015/7/10/206 This patch introduces the new string property 'managed' that allows the user to set the management type explicitly. The supported values are: "auto" - default. Uses either MDIO or nothing, depending on the presence of the fixed-link node "in-band-status" - use in-band status Signed-off-by: Stas Sergeev CC: Rob Herring CC: Pawel Moll CC: Mark Rutland CC: Ian Campbell CC: Kumar Gala CC: Florian Fainelli CC: Grant Likely CC: devicetree@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/net/ethernet.txt | 4 ++++ drivers/of/of_mdio.c | 19 +++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 41b3f3f864e84..5d88f37480b6a 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -25,7 +25,11 @@ The following properties are common to the Ethernet controllers: flow control thresholds. - tx-fifo-depth: the size of the controller's transmit fifo in bytes. This is used for components that can have configurable fifo sizes. +- managed: string, specifies the PHY management type. Supported values are: + "auto", "in-band-status". "auto" is the default, it usess MDIO for + management if fixed-link is not specified. Child nodes of the Ethernet controller are typically the individual PHY devices connected via the MDIO bus (sometimes the MDIO bus controller is separate). They are described in the phy.txt file in this same directory. +For non-MDIO PHY management see fixed-link.txt. diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 0c064485d1c2c..bec8ec2b31f63 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -263,7 +263,8 @@ EXPORT_SYMBOL(of_phy_attach); bool of_phy_is_fixed_link(struct device_node *np) { struct device_node *dn; - int len; + int len, err; + const char *managed; /* New binding */ dn = of_get_child_by_name(np, "fixed-link"); @@ -272,6 +273,10 @@ bool of_phy_is_fixed_link(struct device_node *np) return true; } + err = of_property_read_string(np, "managed", &managed); + if (err == 0 && strcmp(managed, "auto") != 0) + return true; + /* Old binding */ if (of_get_property(np, "fixed-link", &len) && len == (5 * sizeof(__be32))) @@ -286,8 +291,18 @@ int of_phy_register_fixed_link(struct device_node *np) struct fixed_phy_status status = {}; struct device_node *fixed_link_node; const __be32 *fixed_link_prop; - int len; + int len, err; struct phy_device *phy; + const char *managed; + + err = of_property_read_string(np, "managed", &managed); + if (err == 0) { + if (strcmp(managed, "in-band-status") == 0) { + /* status is zeroed, namely its .link member */ + phy = fixed_phy_register(PHY_POLL, &status, np); + return IS_ERR(phy) ? PTR_ERR(phy) : 0; + } + } /* New binding */ fixed_link_node = of_get_child_by_name(np, "fixed-link"); From d600176461e85a0bf3b963587e4ca92d494a7a47 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 20 Jul 2015 17:49:58 -0700 Subject: [PATCH 0986/2091] mvneta: use inband status only when explicitly enabled [ Upstream commit f8af8e6eb95093d5ce5ebcc52bd1929b0433e172 in net-next tree, will be pushed to Linus very soon. ] The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state signaling") implemented the link parameters auto-negotiation unconditionally. Unfortunately it appears that some HW that implements SGMII protocol, doesn't generate the inband status, so it is not possible to auto-negotiate anything with such HW. This patch enables the auto-negotiation only if explicitly requested with the 'managed' DT property. This patch fixes the following regression: https://lkml.org/lkml/2015/7/8/865 Signed-off-by: Stas Sergeev CC: Thomas Petazzoni CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 74d0389bf2332..4d608f0117cd9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3029,8 +3029,8 @@ static int mvneta_probe(struct platform_device *pdev) const char *dt_mac_addr; char hw_mac_addr[ETH_ALEN]; const char *mac_from; + const char *managed; int phy_mode; - int fixed_phy = 0; int err; /* Our multiqueue support is not complete, so for now, only @@ -3064,7 +3064,6 @@ static int mvneta_probe(struct platform_device *pdev) dev_err(&pdev->dev, "cannot register fixed PHY\n"); goto err_free_irq; } - fixed_phy = 1; /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. @@ -3088,8 +3087,10 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); pp->phy_node = phy_node; pp->phy_interface = phy_mode; - pp->use_inband_status = (phy_mode == PHY_INTERFACE_MODE_SGMII) && - fixed_phy; + + err = of_property_read_string(dn, "managed", &managed); + pp->use_inband_status = (err == 0 && + strcmp(managed, "in-band-status") == 0); pp->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pp->clk)) { From 4e27762417669cb459971635be550eb7b5598286 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Sep 2015 19:16:50 +0800 Subject: [PATCH 0987/2091] netlink: Fix autobind race condition that leads to zero port ID [ Upstream commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ] The commit c0bb07df7d981e4091432754e30c9c720e2c0c78 ("netlink: Reset portid after netlink_insert failure") introduced a race condition where if two threads try to autobind the same socket one of them may end up with a zero port ID. This led to kernel deadlocks that were observed by multiple people. This patch reverts that commit and instead fixes it by introducing a separte rhash_portid variable so that the real portid is only set after the socket has been successfully hashed. Fixes: c0bb07df7d98 ("netlink: Reset portid after netlink_insert failure") Reported-by: Tejun Heo Reported-by: Linus Torvalds Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 11 ++++++----- net/netlink/af_netlink.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 85aeaf27f1b54..4753dc083423f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1017,7 +1017,7 @@ static inline int netlink_compare(struct rhashtable_compare_arg *arg, const struct netlink_compare_arg *x = arg->key; const struct netlink_sock *nlk = ptr; - return nlk->portid != x->portid || + return nlk->rhash_portid != x->portid || !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); } @@ -1043,7 +1043,7 @@ static int __netlink_insert(struct netlink_table *table, struct sock *sk) { struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); + netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid); return rhashtable_lookup_insert_key(&table->hash, &arg, &nlk_sk(sk)->node, netlink_rhashtable_params); @@ -1105,7 +1105,7 @@ static int netlink_insert(struct sock *sk, u32 portid) unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; - nlk_sk(sk)->portid = portid; + nlk_sk(sk)->rhash_portid = portid; sock_hold(sk); err = __netlink_insert(table, sk); @@ -1117,10 +1117,11 @@ static int netlink_insert(struct sock *sk, u32 portid) err = -EOVERFLOW; if (err == -EEXIST) err = -EADDRINUSE; - nlk_sk(sk)->portid = 0; sock_put(sk); } + nlk_sk(sk)->portid = portid; + err: release_sock(sk); return err; @@ -3167,7 +3168,7 @@ static inline u32 netlink_hash(const void *data, u32 len, u32 seed) const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); + netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid); return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index df9a06090db61..80b2b7526dfd2 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -25,6 +25,7 @@ struct netlink_ring { struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; + u32 rhash_portid; u32 portid; u32 dst_portid; u32 dst_group; From d48623677191e0f035d7afd344f92cf880b01f8e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Sep 2015 11:38:56 +0800 Subject: [PATCH 0988/2091] netlink: Replace rhash_portid with bound [ Upstream commit da314c9923fed553a007785a901fd395b7eb6c19 ] On Mon, Sep 21, 2015 at 02:20:22PM -0400, Tejun Heo wrote: > > store_release and load_acquire are different from the usual memory > barriers and can't be paired this way. You have to pair store_release > and load_acquire. Besides, it isn't a particularly good idea to OK I've decided to drop the acquire/release helpers as they don't help us at all and simply pessimises the code by using full memory barriers (on some architectures) where only a write or read barrier is needed. > depend on memory barriers embedded in other data structures like the > above. Here, especially, rhashtable_insert() would have write barrier > *before* the entry is hashed not necessarily *after*, which means that > in the above case, a socket which appears to have set bound to a > reader might not visible when the reader tries to look up the socket > on the hashtable. But you are right we do need an explicit write barrier here to ensure that the hashing is visible. > There's no reason to be overly smart here. This isn't a crazy hot > path, write barriers tend to be very cheap, store_release more so. > Please just do smp_store_release() and note what it's paired with. It's not about being overly smart. It's about actually understanding what's going on with the code. I've seen too many instances of people simply sprinkling synchronisation primitives around without any knowledge of what is happening underneath, which is just a recipe for creating hard-to-debug races. > > @@ -1539,7 +1546,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, > > } > > } > > > > - if (!nlk->portid) { > > + if (!nlk->bound) { > > I don't think you can skip load_acquire here just because this is the > second deref of the variable. That doesn't change anything. Race > condition could still happen between the first and second tests and > skipping the second would lead to the same kind of bug. The reason this one is OK is because we do not use nlk->portid or try to get nlk from the hash table before we return to user-space. However, there is a real bug here that none of these acquire/release helpers discovered. The two bound tests here used to be a single one. Now that they are separate it is entirely possible for another thread to come in the middle and bind the socket. So we need to repeat the portid check in order to maintain consistency. > > @@ -1587,7 +1594,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, > > !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) > > return -EPERM; > > > > - if (!nlk->portid) > > + if (!nlk->bound) > > Don't we need load_acquire here too? Is this path holding a lock > which makes that unnecessary? Ditto. ---8<--- The commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink: Fix autobind race condition that leads to zero port ID") created some new races that can occur due to inconcsistencies between the two port IDs. Tejun is right that a barrier is unavoidable. Therefore I am reverting to the original patch that used a boolean to indicate that a user netlink socket has been bound. Barriers have been added where necessary to ensure that a valid portid and the hashed socket is visible. I have also changed netlink_insert to only return EBUSY if the socket is bound to a portid different to the requested one. This combined with only reading nlk->bound once in netlink_bind fixes a race where two threads that bind the socket at the same time with different port IDs may both succeed. Fixes: 1f770c0a09da ("netlink: Fix autobind race condition that leads to zero port ID") Reported-by: Tejun Heo Reported-by: Linus Torvalds Signed-off-by: Herbert Xu Nacked-by: Tejun Heo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 39 ++++++++++++++++++++++++++++----------- net/netlink/af_netlink.h | 2 +- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4753dc083423f..980121e75d2ec 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1017,7 +1017,7 @@ static inline int netlink_compare(struct rhashtable_compare_arg *arg, const struct netlink_compare_arg *x = arg->key; const struct netlink_sock *nlk = ptr; - return nlk->rhash_portid != x->portid || + return nlk->portid != x->portid || !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); } @@ -1043,7 +1043,7 @@ static int __netlink_insert(struct netlink_table *table, struct sock *sk) { struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid); + netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); return rhashtable_lookup_insert_key(&table->hash, &arg, &nlk_sk(sk)->node, netlink_rhashtable_params); @@ -1096,8 +1096,8 @@ static int netlink_insert(struct sock *sk, u32 portid) lock_sock(sk); - err = -EBUSY; - if (nlk_sk(sk)->portid) + err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; + if (nlk_sk(sk)->bound) goto err; err = -ENOMEM; @@ -1105,7 +1105,7 @@ static int netlink_insert(struct sock *sk, u32 portid) unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; - nlk_sk(sk)->rhash_portid = portid; + nlk_sk(sk)->portid = portid; sock_hold(sk); err = __netlink_insert(table, sk); @@ -1120,7 +1120,9 @@ static int netlink_insert(struct sock *sk, u32 portid) sock_put(sk); } - nlk_sk(sk)->portid = portid; + /* We need to ensure that the socket is hashed and visible. */ + smp_wmb(); + nlk_sk(sk)->bound = portid; err: release_sock(sk); @@ -1501,6 +1503,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; long unsigned int groups = nladdr->nl_groups; + bool bound; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1517,9 +1520,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->portid) + bound = nlk->bound; + if (bound) { + /* Ensure nlk->portid is up-to-date. */ + smp_rmb(); + if (nladdr->nl_pid != nlk->portid) return -EINVAL; + } if (nlk->netlink_bind && groups) { int group; @@ -1535,7 +1543,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, } } - if (!nlk->portid) { + /* No need for barriers here as we return to user-space without + * using any of the bound attributes. + */ + if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); @@ -1583,7 +1594,10 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->portid) + /* No need for barriers here as we return to user-space without + * using any of the bound attributes. + */ + if (!nlk->bound) err = netlink_autobind(sock); if (err == 0) { @@ -2340,10 +2354,13 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; } - if (!nlk->portid) { + if (!nlk->bound) { err = netlink_autobind(sock); if (err) goto out; + } else { + /* Ensure nlk is hashed and visible. */ + smp_rmb(); } /* It's a really convoluted way for userland to ask for mmaped @@ -3168,7 +3185,7 @@ static inline u32 netlink_hash(const void *data, u32 len, u32 seed) const struct netlink_sock *nlk = data; struct netlink_compare_arg arg; - netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid); + netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 80b2b7526dfd2..14437d9b1965d 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -25,7 +25,6 @@ struct netlink_ring { struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 rhash_portid; u32 portid; u32 dst_portid; u32 dst_group; @@ -36,6 +35,7 @@ struct netlink_sock { unsigned long state; size_t max_recvmsg_len; wait_queue_head_t wait; + bool bound; bool cb_running; struct netlink_callback cb; struct mutex *cb_mutex; From 6e3105d5b2418166d48f162758fc3ed8bb77d997 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 17 Sep 2015 16:01:40 -0700 Subject: [PATCH 0989/2091] zram: fix possible use after free in zcomp_create() commit 3aaf14da807a4e9931a37f21e4251abb8a67021b upstream. zcomp_create() verifies the success of zcomp_strm_{multi,single}_create() through comp->stream, which can potentially be pointing to memory that was freed if these functions returned an error. While at it, replace a 'ERR_PTR(-ENOMEM)' by a more generic 'ERR_PTR(error)' as in the future zcomp_strm_{multi,siggle}_create() could return other error codes. Function documentation updated accordingly. Fixes: beca3ec71fe5 ("zram: add multi stream functionality") Signed-off-by: Luis Henriques Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zcomp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index f1ff39a3d1c12..54d946a9eee60 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -325,12 +325,14 @@ void zcomp_destroy(struct zcomp *comp) * allocate new zcomp and initialize it. return compressing * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in - * case of allocation error. + * case of allocation error, or any other error potentially + * returned by functions zcomp_strm_{multi,single}_create. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; + int error; backend = find_backend(compress); if (!backend) @@ -342,12 +344,12 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) comp->backend = backend; if (max_strm > 1) - zcomp_strm_multi_create(comp, max_strm); + error = zcomp_strm_multi_create(comp, max_strm); else - zcomp_strm_single_create(comp); - if (!comp->stream) { + error = zcomp_strm_single_create(comp); + if (error) { kfree(comp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } return comp; } From 85695fdcbb8a7bffe8e0782b2f264c18ad264ecc Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 11 Sep 2015 10:40:17 -0500 Subject: [PATCH 0990/2091] hp-wmi: limit hotkey enable commit 8a1513b49321e503fd6c8b6793e3b1f9a8a3285b upstream. Do not write initialize magic on systems that do not have feature query 0xb. Fixes Bug #82451. Redefine FEATURE_QUERY to align with 0xb and FEATURE2 with 0xd for code clearity. Add a new test function, hp_wmi_bios_2008_later() & simplify hp_wmi_bios_2009_later(), which fixes a bug in cases where an improper value is returned. Probably also fixes Bug #69131. Add missing __init tag. Signed-off-by: Kyle Evans Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp-wmi.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 06697315a0887..fb4dd7b3ee711 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -54,8 +54,9 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HARDWARE_QUERY 0x4 #define HPWMI_WIRELESS_QUERY 0x5 #define HPWMI_BIOS_QUERY 0x9 +#define HPWMI_FEATURE_QUERY 0xb #define HPWMI_HOTKEY_QUERY 0xc -#define HPWMI_FEATURE_QUERY 0xd +#define HPWMI_FEATURE2_QUERY 0xd #define HPWMI_WIRELESS2_QUERY 0x1b #define HPWMI_POSTCODEERROR_QUERY 0x2a @@ -295,25 +296,33 @@ static int hp_wmi_tablet_state(void) return (state & 0x4) ? 1 : 0; } -static int __init hp_wmi_bios_2009_later(void) +static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, 0, &state, sizeof(state), sizeof(state)); - if (ret) - return ret; + if (!ret) + return 1; - return (state & 0x10) ? 1 : 0; + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; } -static int hp_wmi_enable_hotkeys(void) +static int __init hp_wmi_bios_2009_later(void) { - int ret; - int query = 0x6e; + int state = 0; + int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, 0, &state, + sizeof(state), sizeof(state)); + if (!ret) + return 1; - ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query), - 0); + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; +} +static int __init hp_wmi_enable_hotkeys(void) +{ + int value = 0x6e; + int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &value, + sizeof(value), 0); if (ret) return -EINVAL; return 0; @@ -663,7 +672,7 @@ static int __init hp_wmi_input_setup(void) hp_wmi_tablet_state()); input_sync(hp_wmi_input_dev); - if (hp_wmi_bios_2009_later() == 4) + if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later()) hp_wmi_enable_hotkeys(); status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); From 27f1b7fed9c305ef46f8708f1bdde9cdb5f166bd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 3 Oct 2015 13:49:38 +0200 Subject: [PATCH 0991/2091] Linux 4.1.10 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e071176b2ce6e..d02f16b510dc5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 9 +SUBLEVEL = 10 EXTRAVERSION = NAME = Series 4800 From 10e259a1ec2e4b39e1a9d49d51dbd152a8e5f3c3 Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Wed, 16 Sep 2015 12:04:55 +0200 Subject: [PATCH 0992/2091] arm: KVM: Fix incorrect device to IPA mapping commit ca09f02f122b2ecb0f5ddfc5fd47b29ed657d4fd upstream. A critical bug has been found in device memory stage1 translation for VMs with more then 4GB of address space. Once vm_pgoff size is smaller then pa (which is true for LPAE case, u32 and u64 respectively) some more significant bits of pa may be lost as a shift operation is performed on u32 and later cast onto u64. Example: vm_pgoff(u32)=0x00210030, PAGE_SHIFT=12 expected pa(u64): 0x0000002010030000 produced pa(u64): 0x0000000010030000 The fix is to change the order of operations (casting first onto phys_addr_t and then shifting). Reviewed-by: Marc Zyngier [maz: fixed changelog and patch formatting] Signed-off-by: Marek Majtyka Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf2..191dcfab9f609 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1790,8 +1790,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (vma->vm_flags & VM_PFNMAP) { gpa_t gpa = mem->guest_phys_addr + (vm_start - mem->userspace_addr); - phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + - vm_start - vma->vm_start; + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) From 35f5af9cb6994ee92e27b2cd5936142337983284 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 16 Sep 2015 19:31:11 +0800 Subject: [PATCH 0993/2091] KVM: vmx: fix VPID is 0000H in non-root operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 04bb92e4b4cf06a66889d37b892b78f926faa9d4 upstream. Reference SDM 28.1: The current VPID is 0000H in the following situations: - Outside VMX operation. (This includes operation in system-management mode under the default treatment of SMIs and SMM with VMX operation; see Section 34.14.) - In VMX root operation. - In VMX non-root operation when the “enable VPID” VM-execution control is 0. The VPID should never be 0000H in non-root operation when "enable VPID" VM-execution control is 1. However, commit 34a1cd60 ("kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()") remove the codes which reserve 0000H for VMX root operation. This patch fix it by again reserving 0000H for VMX root operation. Fixes: 34a1cd60d17f62c1f077c1478a6c2ca8c3d17af4 Reported-by: Wincy Van Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d73807f0d317..bc3041e1abbc8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6144,6 +6144,8 @@ static __init int hardware_setup(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); From 87f3ae1f34a06fb5296a4b0d763737171c3f2d65 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:54 +0800 Subject: [PATCH 0994/2091] kvm: don't try to register to KVM_FAST_MMIO_BUS for non mmio eventfd commit 8453fecbecae26edb3f278627376caab05d9a88d upstream. We only want zero length mmio eventfd to be registered on KVM_FAST_MMIO_BUS. So check this explicitly when arg->len is zero to make sure this. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa493..e404806e3a7af 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -846,7 +846,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) /* When length is ignored, MMIO is also put on a separate bus, for * faster lookups. */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { + if (!args->len && bus_idx == KVM_MMIO_BUS) { ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, p->addr, 0, &p->dev); if (ret < 0) @@ -901,7 +901,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { + if (!p->length && p->bus_idx == KVM_MMIO_BUS) { kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, &p->dev); } From 989bbb3a4c25d9e21a791a4d23cf24610270db63 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:57 +0800 Subject: [PATCH 0995/2091] kvm: fix zero length mmio searching commit 8f4216c7d28976f7ec1b2bcbfa0a9f787133c45e upstream. Currently, if we had a zero length mmio eventfd assigned on KVM_MMIO_BUS. It will never be found by kvm_io_bus_cmp() since it always compares the kvm_io_range() with the length that guest wrote. This will cause e.g for vhost, kick will be trapped by qemu userspace instead of vhost. Fixing this by using zero length if an iodevice is zero length. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 90977418aeb6e..85422985235f2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2935,10 +2935,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, const struct kvm_io_range *r2) { - if (r1->addr < r2->addr) + gpa_t addr1 = r1->addr; + gpa_t addr2 = r2->addr; + + if (addr1 < addr2) return -1; - if (r1->addr + r1->len > r2->addr + r2->len) + + /* If r2->len == 0, match the exact address. If r2->len != 0, + * accept any overlapping write. Any order is acceptable for + * overlapping ranges, because kvm_io_bus_get_first_dev ensures + * we process all of them. + */ + if (r2->len) { + addr1 += r1->len; + addr2 += r2->len; + } + + if (addr1 > addr2) return 1; + return 0; } From 348a20a23f03953f1a0ab57b55cd9079084b14f4 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:55 +0800 Subject: [PATCH 0996/2091] kvm: factor out core eventfd assign/deassign logic commit 85da11ca587c8eb73993a1b503052391a73586f9 upstream. This patch factors out core eventfd assign/deassign logic and leaves the argument checking and bus index selection to callers. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 85 +++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index e404806e3a7af..0829c7f197a0d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -873,14 +847,13 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -918,6 +891,48 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + + return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && + args->flags & (KVM_IOEVENTFD_FLAG_PIO | + KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + return kvm_assign_ioeventfd_idx(kvm, bus_idx, args); +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { From c286128ffc7ffaeaf5cf8d7b0b64c9883160a636 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:56 +0800 Subject: [PATCH 0997/2091] kvm: fix double free for fast mmio eventfd commit eefd6b06b17c5478e7c24bea6f64beaa2c431ca6 upstream. We register wildcard mmio eventfd on two buses, once for KVM_MMIO_BUS and once on KVM_FAST_MMIO_BUS but with a single iodev instance. This will lead to an issue: kvm_io_bus_destroy() knows nothing about the devices on two buses pointing to a single dev. Which will lead to double free[1] during exit. Fix this by allocating two instances of iodevs then registering one on KVM_MMIO_BUS and another on KVM_FAST_MMIO_BUS. CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic #28-Ubuntu Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 09/12/2013 task: ffff88009ae0c4b0 ti: ffff88020e7f0000 task.ti: ffff88020e7f0000 RIP: 0010:[] [] ioeventfd_release+0x28/0x60 [kvm] RSP: 0018:ffff88020e7f3bc8 EFLAGS: 00010292 RAX: dead000000200200 RBX: ffff8801ec19c900 RCX: 000000018200016d RDX: ffff8801ec19cf80 RSI: ffffea0008bf1d40 RDI: ffff8801ec19c900 RBP: ffff88020e7f3bd8 R08: 000000002fc75a01 R09: 000000018200016d R10: ffffffffc07df6ae R11: ffff88022fc75a98 R12: ffff88021e7cc000 R13: ffff88021e7cca48 R14: ffff88021e7cca50 R15: ffff8801ec19c880 FS: 00007fc1ee3e6700(0000) GS:ffff88023e240000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f389d8000 CR3: 000000023dc13000 CR4: 00000000001427e0 Stack: ffff88021e7cc000 0000000000000000 ffff88020e7f3be8 ffffffffc07e2622 ffff88020e7f3c38 ffffffffc07df69a ffff880232524160 ffff88020e792d80 0000000000000000 ffff880219b78c00 0000000000000008 ffff8802321686a8 Call Trace: [] ioeventfd_destructor+0x12/0x20 [kvm] [] kvm_put_kvm+0xca/0x210 [kvm] [] kvm_vcpu_release+0x18/0x20 [kvm] [] __fput+0xe7/0x250 [] ____fput+0xe/0x10 [] task_work_run+0xd4/0xf0 [] do_exit+0x368/0xa50 [] ? recalc_sigpending+0x1f/0x60 [] do_group_exit+0x45/0xb0 [] get_signal+0x291/0x750 [] do_signal+0x28/0xab0 [] ? do_futex+0xdb/0x5d0 [] ? __wake_up_locked_key+0x18/0x20 [] ? SyS_futex+0x76/0x170 [] do_notify_resume+0x69/0xb0 [] int_signal+0x12/0x17 Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 7f 20 e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 <48> 89 10 48 b8 00 01 10 00 00 RIP [] ioeventfd_release+0x28/0x60 [kvm] RSP Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/eventfd.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0829c7f197a0d..79db45336e3a2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -817,16 +817,6 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && bus_idx == KVM_MMIO_BUS) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -834,8 +824,6 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -874,10 +862,6 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length && p->bus_idx == KVM_MMIO_BUS) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -894,14 +878,19 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); - return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + return ret; } static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { enum kvm_bus bus_idx; + int ret; bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized, or 0 to ignore length */ @@ -930,7 +919,25 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) KVM_IOEVENTFD_FLAG_DATAMATCH)) return -EINVAL; - return kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; } int From a5b48a8ed5004bc857cd15b4a5e4f93773251874 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 16 Sep 2015 16:18:59 +0100 Subject: [PATCH 0998/2091] arm: KVM: Disable virtual timer even if the guest is not using it commit 688bc577ac42ae3d07c889a1f0a72f0b23763d58 upstream. When running a guest with the architected timer disabled (with QEMU and the kernel_irqchip=off option, for example), it is important to make sure the timer gets turned off. Otherwise, the guest may try to enable it anyway, leading to a screaming HW interrupt. The fix is to unconditionally turn off the virtual timer on guest exit. Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/interrupts_head.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 48efe2ee452cf..58048b333d31a 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -518,8 +518,7 @@ ARM_BE8(rev r6, r6 ) mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - bic r2, #1 @ Clear ENABLE - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL @@ -532,6 +531,9 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: + mov r2, #0 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) From ffd269ee140151a04f33731a7160b571186d5bba Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 18 Sep 2015 08:57:28 +0200 Subject: [PATCH 0999/2091] KVM: PPC: Book3S: Take the kvm->srcu lock in kvmppc_h_logical_ci_load/store() commit 3eb4ee68254235e4f47bc0410538fcdaede39589 upstream. Access to the kvm->buses (like with the kvm_io_bus_read() and -write() functions) has to be protected via the kvm->srcu lock. The kvmppc_h_logical_ci_load() and -store() functions are missing this lock so far, so let's add it there, too. This fixes the problem that the kernel reports "suspicious RCU usage" when lock debugging is enabled. Fixes: 99342cf8044420eebdf9297ca03a14cb6a7085a1 Signed-off-by: Thomas Huth Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 453a8a47a4676..964c0ce584ce2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -826,12 +826,15 @@ int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) unsigned long size = kvmppc_get_gpr(vcpu, 4); unsigned long addr = kvmppc_get_gpr(vcpu, 5); u64 buf; + int srcu_idx; int ret; if (!is_power_of_2(size) || (size > sizeof(buf))) return H_TOO_HARD; + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; @@ -866,6 +869,7 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) unsigned long addr = kvmppc_get_gpr(vcpu, 5); unsigned long val = kvmppc_get_gpr(vcpu, 6); u64 buf; + int srcu_idx; int ret; switch (size) { @@ -889,7 +893,9 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) return H_TOO_HARD; } + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); if (ret != 0) return H_TOO_HARD; From 6d9cc6c15c8a1019263ab8a85eeb38dcf3c3c9d6 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 21 May 2015 13:57:04 +0530 Subject: [PATCH 1000/2091] KVM: PPC: Book3S HV: Pass the correct trap argument to kvmhv_commence_exit commit 7e022e717f54897e396504306d0c9b61452adf4e upstream. In guest_exit_cont we call kvmhv_commence_exit which expects the trap number as the argument. However r3 doesn't contain the trap number at this point and as a result we would be calling the function with a spurious trap number. Fix this by copying r12 into r3 before calling kvmhv_commence_exit as r12 contains the trap number. Fixes: eddb60fb1443 Signed-off-by: Gautham R. Shenoy Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 3b2d2c5b6376c..ffd98b2bfa166 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1171,6 +1171,7 @@ mc_cont: bl kvmhv_accumulate_time #endif + mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit nop From a5428d82f04e051e9c0d1da2b353c21be01600c7 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 9 Sep 2015 16:07:30 -0700 Subject: [PATCH 1001/2091] time: Fix timekeeping_freqadjust()'s incorrect use of abs() instead of abs64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2619d7e9c92d524cb155ec89fd72875321512e5b upstream. The internal clocksteering done for fine-grained error correction uses a logarithmic approximation, so any time adjtimex() adjusts the clock steering, timekeeping_freqadjust() quickly approximates the correct clock frequency over a series of ticks. Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit: dc491596f639 ("timekeeping: Rework frequency adjustments to work better w/ nohz") used the abs() function with a s64 error value to calculate the size of the approximated adjustment to be made. Per include/linux/kernel.h: "abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()". Thus on 32-bit platforms, this resulted in the clocksteering to take a quite dampended random walk trying to converge on the proper frequency, which caused the adjustments to be made much slower then intended (most easily observed when large adjustments are made). This patch fixes the issue by using abs64() instead. Reported-by: Nuno Gonçalves Tested-by: Nuno Goncalves Signed-off-by: John Stultz Cc: Linus Torvalds Cc: Miroslav Lichvar Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441840051-20244-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 946acb72179fa..414d9df947242 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1615,7 +1615,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; From e77fb714b612371276f733947ad776ccf1f7aefe Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 24 Aug 2015 10:26:03 -0700 Subject: [PATCH 1002/2091] target/iscsi: Fix np_ip bracket issue by removing np_ip commit 76c28f1fcfeb42b47f798fe498351ee1d60086ae upstream. Revert commit 1997e6259, which causes double brackets on ipv6 inaddr_any addresses. Since we have np_sockaddr, if we need a textual representation we can use "%pISc". Change iscsit_add_network_portal() and iscsit_add_np() signatures to remove *ip_str parameter. Fix and extend some comments earlier in the function. Tested to work for :: and ::1 via iscsiadm, previously :: failed, see https://bugzilla.redhat.com/show_bug.cgi?id=1249107 . Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target.c | 33 ++++++++++---------- drivers/target/iscsi/iscsi_target.h | 2 +- drivers/target/iscsi/iscsi_target_configfs.c | 14 ++++----- drivers/target/iscsi/iscsi_target_login.c | 8 ++--- drivers/target/iscsi/iscsi_target_tpg.c | 15 +++++---- drivers/target/iscsi/iscsi_target_tpg.h | 2 +- include/target/iscsi/iscsi_target_core.h | 1 - 7 files changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 0ab6e2efd28ce..330bbe8310665 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -341,7 +341,6 @@ static struct iscsi_np *iscsit_get_np( struct iscsi_np *iscsit_add_np( struct __kernel_sockaddr_storage *sockaddr, - char *ip_str, int network_transport) { struct sockaddr_in *sock_in; @@ -370,11 +369,9 @@ struct iscsi_np *iscsit_add_np( np->np_flags |= NPF_IP_NETWORK; if (sockaddr->ss_family == AF_INET6) { sock_in6 = (struct sockaddr_in6 *)sockaddr; - snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str); np->np_port = ntohs(sock_in6->sin6_port); } else { sock_in = (struct sockaddr_in *)sockaddr; - sprintf(np->np_ip, "%s", ip_str); np->np_port = ntohs(sock_in->sin_port); } @@ -411,8 +408,8 @@ struct iscsi_np *iscsit_add_np( list_add_tail(&np->np_list, &g_np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n", - np->np_ip, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Added Network Portal: %pISc:%hu on %s\n", + &np->np_sockaddr, np->np_port, np->np_transport->name); return np; } @@ -481,8 +478,8 @@ int iscsit_del_np(struct iscsi_np *np) list_del(&np->np_list); mutex_unlock(&np_lock); - pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n", - np->np_ip, np->np_port, np->np_transport->name); + pr_debug("CORE[0] - Removed Network Portal: %pISc:%hu on %s\n", + &np->np_sockaddr, np->np_port, np->np_transport->name); iscsit_put_transport(np->np_transport); kfree(np); @@ -3467,7 +3464,6 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); - char *fmt_str; if (np->np_network_transport != network_transport) continue; @@ -3495,15 +3491,18 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } } - if (np->np_sockaddr.ss_family == AF_INET6) - fmt_str = "TargetAddress=[%s]:%hu,%hu"; - else - fmt_str = "TargetAddress=%s:%hu,%hu"; - - len = sprintf(buf, fmt_str, - inaddr_any ? conn->local_ip : np->np_ip, - np->np_port, - tpg->tpgt); + if (inaddr_any) { + len = sprintf(buf, "TargetAddress=" + "%s:%hu,%hu", + conn->local_ip, + np->np_port, + tpg->tpgt); + } else { + len = sprintf(buf, "TargetAddress=" + "%pISpc,%hu", + &np->np_sockaddr, + tpg->tpgt); + } len += 1; if ((len + payload_len) > buffer_len) { diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 7d0f9c00d9c25..d294f030a0978 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -13,7 +13,7 @@ extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, struct iscsi_np *, int); extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, - char *, int); + int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, struct iscsi_portal_group *, bool); extern int iscsit_del_np(struct iscsi_np *); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 469fce44ebad5..6f2fb546477e8 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -100,7 +100,7 @@ static ssize_t lio_target_np_store_sctp( * Use existing np->np_sockaddr for SCTP network portal reference */ tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - np->np_ip, tpg_np, ISCSI_SCTP_TCP); + tpg_np, ISCSI_SCTP_TCP); if (!tpg_np_sctp || IS_ERR(tpg_np_sctp)) goto out; } else { @@ -178,7 +178,7 @@ static ssize_t lio_target_np_store_iser( } tpg_np_iser = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - np->np_ip, tpg_np, ISCSI_INFINIBAND); + tpg_np, ISCSI_INFINIBAND); if (IS_ERR(tpg_np_iser)) { rc = PTR_ERR(tpg_np_iser); goto out; @@ -249,8 +249,8 @@ static struct se_tpg_np *lio_target_call_addnptotpg( return ERR_PTR(-EINVAL); } str++; /* Skip over leading "[" */ - *str2 = '\0'; /* Terminate the IPv6 address */ - str2++; /* Skip over the "]" */ + *str2 = '\0'; /* Terminate the unbracketed IPv6 address */ + str2++; /* Skip over the \0 */ port_str = strstr(str2, ":"); if (!port_str) { pr_err("Unable to locate \":port\"" @@ -317,7 +317,7 @@ static struct se_tpg_np *lio_target_call_addnptotpg( * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/ * */ - tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL, + tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, NULL, ISCSI_TCP); if (IS_ERR(tpg_np)) { iscsit_put_tpg(tpg); @@ -345,8 +345,8 @@ static void lio_target_call_delnpfromtpg( se_tpg = &tpg->tpg_se_tpg; pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu" - " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), - tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port); + " PORTAL: %pISc:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, &tpg_np->tpg_np->np_sockaddr, tpg_np->tpg_np->np_port); ret = iscsit_tpg_del_network_portal(tpg, tpg_np); if (ret < 0) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index c3bccaddb5920..39654e917cd80 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -879,8 +879,8 @@ static void iscsi_handle_login_thread_timeout(unsigned long data) struct iscsi_np *np = (struct iscsi_np *) data; spin_lock_bh(&np->np_thread_lock); - pr_err("iSCSI Login timeout on Network Portal %s:%hu\n", - np->np_ip, np->np_port); + pr_err("iSCSI Login timeout on Network Portal %pISc:%hu\n", + &np->np_sockaddr, np->np_port); if (np->np_login_timer_flags & ISCSI_TF_STOP) { spin_unlock_bh(&np->np_thread_lock); @@ -1358,8 +1358,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) spin_lock_bh(&np->np_thread_lock); if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { spin_unlock_bh(&np->np_thread_lock); - pr_err("iSCSI Network Portal on %s:%hu currently not" - " active.\n", np->np_ip, np->np_port); + pr_err("iSCSI Network Portal on %pISc:%hu currently not" + " active.\n", &np->np_sockaddr, np->np_port); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); goto new_sess_out; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 5e3295fe404d7..3bc7d62c0a652 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -460,7 +460,6 @@ static bool iscsit_tpg_check_network_portal( struct iscsi_tpg_np *iscsit_tpg_add_network_portal( struct iscsi_portal_group *tpg, struct __kernel_sockaddr_storage *sockaddr, - char *ip_str, struct iscsi_tpg_np *tpg_np_parent, int network_transport) { @@ -470,8 +469,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( if (!tpg_np_parent) { if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr, network_transport)) { - pr_err("Network Portal: %s already exists on a" - " different TPG on %s\n", ip_str, + pr_err("Network Portal: %pISc already exists on a" + " different TPG on %s\n", sockaddr, tpg->tpg_tiqn->tiqn); return ERR_PTR(-EEXIST); } @@ -484,7 +483,7 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( return ERR_PTR(-ENOMEM); } - np = iscsit_add_np(sockaddr, ip_str, network_transport); + np = iscsit_add_np(sockaddr, network_transport); if (IS_ERR(np)) { kfree(tpg_np); return ERR_CAST(np); @@ -514,8 +513,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( spin_unlock(&tpg_np_parent->tpg_np_parent_lock); } - pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Added Network Portal: %pISc:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, np->np_transport->name); return tpg_np; @@ -528,8 +527,8 @@ static int iscsit_tpg_release_np( { iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); - pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", - tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + pr_debug("CORE[%s] - Removed Network Portal: %pISc:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, &np->np_sockaddr, np->np_port, tpg->tpgt, np->np_transport->name); tpg_np->tpg_np = NULL; diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 95ff5bdecd719..28abda89ea98d 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -22,7 +22,7 @@ extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *); extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int); extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *, - struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *, + struct __kernel_sockaddr_storage *, struct iscsi_tpg_np *, int); extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *, struct iscsi_tpg_np *); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 73abbc54063de..7bd03f867fcac 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -787,7 +787,6 @@ struct iscsi_np { enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; - unsigned char np_ip[IPV6_ADDRESS_SPACE]; u16 np_port; spinlock_t np_thread_lock; struct completion np_restart_comp; From 4863a3f3a6fee85504d78254de97aea26e0c661c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 27 Aug 2015 20:16:37 +0200 Subject: [PATCH 1003/2091] scsi: fix scsi_error_handler vs. scsi_host_dev_release race commit 537b604c8b3aa8b96fe35f87dd085816552e294c upstream. b9d5c6b7ef57 ("[SCSI] cleanup setting task state in scsi_error_handler()") has introduced a race between scsi_error_handler and scsi_host_dev_release resulting in the hang when the device goes away because scsi_error_handler might miss a wake up: CPU0 CPU1 scsi_error_handler scsi_host_dev_release kthread_stop() kthread_should_stop() test_bit(KTHREAD_SHOULD_STOP) set_bit(KTHREAD_SHOULD_STOP) wake_up_process() wait_for_completion() set_current_state(TASK_INTERRUPTIBLE) schedule() The most straightforward solution seems to be to invert the ordering of the set_current_state and kthread_should_stop. The issue has been noticed during reboot test on a 3.0 based kernel but the current code seems to be affected in the same way. [jejb: additional comment added] Reported-and-debugged-by: Mike Mayer Signed-off-by: Michal Hocko Reviewed-by: Dan Williams Reviewed-by: Hannes Reinecke Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_error.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index ce6c770d74d51..c6b93d273799c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2169,8 +2169,17 @@ int scsi_error_handler(void *data) * We never actually get interrupted because kthread_run * disables signal delivery for the created thread. */ - while (!kthread_should_stop()) { + while (true) { + /* + * The sequence in kthread_stop() sets the stop flag first + * then wakes the process. To avoid missed wakeups, the task + * should always be in a non running state before the stop + * flag is checked + */ set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || shost->host_failed != atomic_read(&shost->host_busy)) { SCSI_LOG_ERROR_RECOVERY(1, From 57bc4a2d5be815c3b917fdf54a2013e8df565e42 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 3 Sep 2015 06:30:45 +0000 Subject: [PATCH 1004/2091] target: Attach EXTENDED_COPY local I/O descriptors to xcopy_pt_sess commit 4416f89b8cfcb794d040fc3b68e5fb159b7d8d02 upstream. This patch is a >= v4.1 regression bug-fix where control CDB emulation logic in commit 38b57f82 now expects a se_cmd->se_sess pointer to exist when determining T10-PI support is to be exposed for initiator host ports. To address this bug, go ahead and add locally generated se_cmd descriptors for copy-offload block-copy to it's own stand-alone se_session nexus, while the parent EXTENDED_COPY se_cmd descriptor remains associated with it's originating se_cmd->se_sess nexus. Note a valid se_cmd->se_sess is also required for future support of WRITE_INSERT and READ_STRIP software emulation when submitting backend I/O to se_device that exposes T10-PI suport. Reported-by: Alex Gorbachev Tested-by: Alex Gorbachev Cc: "Martin K. Petersen" Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Doug Gilbert Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_xcopy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 8fd680ac941bd..4609305a15916 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -465,6 +465,8 @@ int target_xcopy_setup_pt(void) memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); INIT_LIST_HEAD(&xcopy_pt_sess.sess_list); INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_cmd_list); + spin_lock_init(&xcopy_pt_sess.sess_cmd_lock); xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; @@ -666,7 +668,7 @@ static int target_xcopy_read_source( pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", (unsigned long long)src_lba, src_sectors, length); - transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->src_pt_cmd = xpt_cmd; @@ -726,7 +728,7 @@ static int target_xcopy_write_destination( pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", (unsigned long long)dst_lba, dst_sectors, length); - transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length, DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->dst_pt_cmd = xpt_cmd; From 060f45157d2a43dffd51a10e9707336f58ae34b7 Mon Sep 17 00:00:00 2001 From: Jenny Derzhavetz Date: Sun, 6 Sep 2015 14:52:20 +0300 Subject: [PATCH 1005/2091] iser-target: remove command with state ISTATE_REMOVE commit a4c15cd957cbd728f685645de7a150df5912591a upstream. As documented in iscsit_sequence_cmd: /* * Existing callers for iscsit_sequence_cmd() will silently * ignore commands with CMDSN_LOWER_THAN_EXP, so force this * return for CMDSN_MAXCMDSN_OVERRUN as well.. */ We need to silently finish a command when it's in ISTATE_REMOVE. This fixes an teardown hang we were seeing where a mis-behaved initiator (triggered by allocation error injections) sent us a cmdsn which was lower than expected. Signed-off-by: Jenny Derzhavetz Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 575a072d765f6..eb58fed90fc2d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2996,9 +2996,16 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) static int isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) { - int ret; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + int ret = 0; switch (state) { + case ISTATE_REMOVE: + spin_lock_bh(&conn->cmd_lock); + list_del_init(&cmd->i_conn_node); + spin_unlock_bh(&conn->cmd_lock); + isert_put_cmd(isert_cmd, true); + break; case ISTATE_SEND_NOPIN_WANT_RESPONSE: ret = isert_put_nopin(cmd, conn, false); break; From b6ac3aee1c73d5b107ae6ec6e3715b39db782781 Mon Sep 17 00:00:00 2001 From: Jenny Derzhavetz Date: Sun, 6 Sep 2015 14:52:21 +0300 Subject: [PATCH 1006/2091] iser-target: Put the reference on commands waiting for unsol data commit 3e03c4b01da3e6a5f3081eb0aa252490fe83e352 upstream. The iscsi target core teardown sequence calls wait_conn for all active commands to finish gracefully by: - move the queue-pair to error state - drain all the completions - wait for the core to finish handling all session commands However, when tearing down a session while there are sequenced commands that are still waiting for unsolicited data outs, we can block forever as these are missing an extra reference put. We basically need the equivalent of iscsit_free_queue_reqs_for_conn() which is called after wait_conn has returned. Address this by an explicit walk on conn_cmd_list and put the extra reference. Signed-off-by: Jenny Derzhavetz Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 38 ++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index eb58fed90fc2d..c32a934f76936 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -3370,6 +3370,41 @@ isert_wait4flush(struct isert_conn *isert_conn) wait_for_completion(&isert_conn->wait_comp_err); } +/** + * isert_put_unsol_pending_cmds() - Drop commands waiting for + * unsolicitate dataout + * @conn: iscsi connection + * + * We might still have commands that are waiting for unsolicited + * dataouts messages. We must put the extra reference on those + * before blocking on the target_wait_for_session_cmds + */ +static void +isert_put_unsol_pending_cmds(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd, *tmp; + static LIST_HEAD(drop_cmd_list); + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, tmp, &conn->conn_cmd_list, i_conn_node) { + if ((cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA) && + (cmd->write_data_done < conn->sess->sess_ops->FirstBurstLength) && + (cmd->write_data_done < cmd->se_cmd.data_length)) + list_move_tail(&cmd->i_conn_node, &drop_cmd_list); + } + spin_unlock_bh(&conn->cmd_lock); + + list_for_each_entry_safe(cmd, tmp, &drop_cmd_list, i_conn_node) { + list_del_init(&cmd->i_conn_node); + if (cmd->i_state != ISTATE_REMOVE) { + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + + isert_info("conn %p dropping cmd %p\n", conn, cmd); + isert_put_cmd(isert_cmd, true); + } + } +} + static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -3388,8 +3423,9 @@ static void isert_wait_conn(struct iscsi_conn *conn) isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->mutex); - isert_wait4cmds(conn); isert_wait4flush(isert_conn); + isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); From 35afa65642a9a88c81913377b93a3a66220f8b9d Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 23 Sep 2015 07:49:26 +0000 Subject: [PATCH 1007/2091] target: Fix v4.1 UNIT_ATTENTION se_node_acl->device_list[] NULL pointer This patch fixes a v4.1 only regression bug as reported by Martin where UNIT_ATTENTION checking for pre v4.2-rc1 RCU conversion code legacy se_node_acl->device_list[] was hitting a NULL pointer dereference in: [ 1858.639654] CPU: 2 PID: 1293 Comm: kworker/2:1 Tainted: G I 4.1.6-fixxcopy+ #1 [ 1858.639699] Hardware name: Dell Inc. PowerEdge R410/0N83VF, BIOS 1.11.0 07/20/2012 [ 1858.639747] Workqueue: xcopy_wq target_xcopy_do_work [target_core_mod] [ 1858.639782] task: ffff880036f0cbe0 ti: ffff880317940000 task.ti: ffff880317940000 [ 1858.639822] RIP: 0010:[] [] target_scsi3_ua_check+0x24/0x60 [target_core_mod] [ 1858.639884] RSP: 0018:ffff880317943ce0 EFLAGS: 00010282 [ 1858.639913] RAX: 0000000000000000 RBX: ffff880317943dc0 RCX: 0000000000000000 [ 1858.639950] RDX: 0000000000000000 RSI: ffff880317943dd0 RDI: ffff88030eaee408 [ 1858.639987] RBP: ffff88030eaee408 R08: 0000000000000001 R09: 0000000000000001 [ 1858.640025] R10: 0000000000000000 R11: 00000000000706e0 R12: ffff880315e0a000 [ 1858.640062] R13: ffff88030eaee408 R14: 0000000000000001 R15: ffff88030eaee408 [ 1858.640100] FS: 0000000000000000(0000) GS:ffff880322e80000(0000) knlGS:0000000000000000 [ 1858.640143] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1858.640173] CR2: 0000000000000000 CR3: 000000000180d000 CR4: 00000000000006e0 [ 1858.640210] Stack: [ 1858.640223] ffffffffa01cadfa ffff88030eaee400 ffff880318e7c340 ffff880315e0a000 [ 1858.640267] ffffffffa01d8c25 ffff8800cae809e0 0000000000000400 0000000000000400 [ 1858.640310] ffff880318e7c3d0 0000000006b75800 0000000000080000 ffff88030eaee400 [ 1858.640354] Call Trace: [ 1858.640379] [] ? target_setup_cmd_from_cdb+0x13a/0x2c0 [target_core_mod] [ 1858.640429] [] ? target_xcopy_setup_pt_cmd+0x85/0x320 [target_core_mod] [ 1858.640479] [] ? target_xcopy_do_work+0x264/0x700 [target_core_mod] [ 1858.640526] [] ? pick_next_task_fair+0x720/0x8f0 [ 1858.640562] [] ? process_one_work+0x14b/0x430 [ 1858.640595] [] ? worker_thread+0x6b/0x560 [ 1858.640627] [] ? rescuer_thread+0x390/0x390 [ 1858.640661] [] ? kthread+0xd3/0xf0 [ 1858.640689] [] ? kthread_create_on_node+0x180/0x180 Also, check for the same se_node_acl->device_list[] during EXTENDED_COPY operation as a non-holding persistent reservation port. Reported-by: Martin Svec Tested-by: Martin Svec Cc: Martin Svec Cc: Alex Gorbachev Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pr.c | 3 +++ drivers/target/target_core_ua.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index a15411c79ae99..08aa7cc586941 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -328,6 +328,9 @@ static int core_scsi3_pr_seq_non_holder( int legacy = 0; /* Act like a legacy device and return * RESERVATION CONFLICT on some CDBs */ + if (!se_sess->se_node_acl->device_list) + return; + se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; /* * Determine if the registration should be ignored due to diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index 1738b16469887..9fc33e84439ae 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -48,7 +48,7 @@ target_scsi3_ua_check(struct se_cmd *cmd) return 0; nacl = sess->se_node_acl; - if (!nacl) + if (!nacl || !nacl->device_list) return 0; deve = nacl->device_list[cmd->orig_fe_lun]; @@ -90,7 +90,7 @@ int core_scsi3_ua_allocate( /* * PASSTHROUGH OPS */ - if (!nacl) + if (!nacl || !nacl->device_list) return -EINVAL; ua = kmem_cache_zalloc(se_ua_cache, GFP_ATOMIC); @@ -208,7 +208,7 @@ void core_scsi3_ua_for_check_condition( return; nacl = sess->se_node_acl; - if (!nacl) + if (!nacl || !nacl->device_list) return; spin_lock_irq(&nacl->device_list_lock); @@ -276,7 +276,7 @@ int core_scsi3_ua_clear_for_request_sense( return -EINVAL; nacl = sess->se_node_acl; - if (!nacl) + if (!nacl || !nacl->device_list) return -EINVAL; spin_lock_irq(&nacl->device_list_lock); From 61be5a59622ab5b2c0152e3d32b0d8b18ca65875 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:45 -0600 Subject: [PATCH 1008/2091] toshiba_acpi: Fix hotkeys registration on some toshiba models commit 53147b6cabee5e8d1997b5682fcc0c3b72ddf9c2 upstream. Commit a2b3471b5b13 ("toshiba_acpi: Use the Hotkey Event Type function for keymap choosing") changed the *setup_keyboard function to query for the Hotkey Event Type to help choose the correct keymap, but turns out that here are certain Toshiba models out there not implementing this feature, and thus, failing to continue the input device registration and leaving such laptops without hotkey support. This patch changes such check, and instead of returning an error if the Hotkey Event Type is not present, we simply inform userspace about it, changing the message printed from err to notice, making the function responsible for registering the input device to continue. This issue was found on a Toshiba Portege Z30-B, but there might be some other models out there affected by this regression as well. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/toshiba_acpi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 9956b9902bb40..93e54a0f471af 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2525,11 +2525,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev) if (error) return error; - error = toshiba_hotkey_event_type_get(dev, &events_type); - if (error) { - pr_err("Unable to query Hotkey Event Type\n"); - return error; - } + if (toshiba_hotkey_event_type_get(dev, &events_type)) + pr_notice("Unable to query Hotkey Event Type\n"); + dev->hotkey_event_type = events_type; dev->hotkey_dev = input_allocate_device(); From 37f70c3811230c2ee4a3a9eece54cf7ee5ba00f9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Sep 2015 11:58:27 +0200 Subject: [PATCH 1009/2091] perf/x86/intel: Fix constraint access commit ebfb4988f0378e2ac3b4a0aa1ea20d724293f392 upstream. Sasha reported that we can get here with .idx==-1, and cpuc->event_constraints unallocated. Suggested-by: Stephane Eranian Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b371b5943178 ("perf/x86: Fix event/group validation") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2813ea0f142e9..22212615a1375 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2098,9 +2098,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls From d4cb7d0fc447e6117c99e45f3f4f698017201f2d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 24 Sep 2015 13:05:22 +0300 Subject: [PATCH 1010/2091] perf tools: Fix copying of /proc/kcore commit b5cabbcbd157a4bf5a92dfc85134999a3b55342d upstream. A copy of /proc/kcore containing the kernel text can be made to the buildid cache. e.g. perf buildid-cache -v -k /proc/kcore To workaround objdump limitations, a copy is also made when annotating against /proc/kcore. The copying process stops working from libelf about v1.62 onwards (the problem was found with v1.63). The cause is that a call to gelf_getphdr() in kcore__add_phdr() fails because additional validation has been added to gelf_getphdr(). The use of gelf_getphdr() is a misguided attempt to get default initialization of the Gelf_Phdr structure. That should not be necessary because every member of the Gelf_Phdr structure is subsequently assigned. So just remove the call to gelf_getphdr(). Similarly, a call to gelf_getehdr() in gelf_kcore__init() can be removed also. Committer notes: Note to stable@kernel.org, from Adrian in the cover letter for this patchkit: The "Fix copying of /proc/kcore" problem goes back to v3.13 if you think it is important enough for stable. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443089122-19082-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/symbol-elf.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a7ab6063e0389..3ddfab315e197 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1253,8 +1253,6 @@ static int kcore__open(struct kcore *kcore, const char *filename) static int kcore__init(struct kcore *kcore, char *filename, int elfclass, bool temp) { - GElf_Ehdr *ehdr; - kcore->elfclass = elfclass; if (temp) @@ -1271,9 +1269,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, if (!gelf_newehdr(kcore->elf, elfclass)) goto out_end; - ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); - if (!ehdr) - goto out_end; + memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); return 0; @@ -1330,23 +1326,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, u64 addr, u64 len) { - GElf_Phdr gphdr; - GElf_Phdr *phdr; - - phdr = gelf_getphdr(kcore->elf, idx, &gphdr); - if (!phdr) - return -1; - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_offset = offset; - phdr->p_vaddr = addr; - phdr->p_paddr = 0; - phdr->p_filesz = len; - phdr->p_memsz = len; - phdr->p_align = page_size; - - if (!gelf_update_phdr(kcore->elf, idx, phdr)) + GElf_Phdr phdr = { + .p_type = PT_LOAD, + .p_flags = PF_R | PF_W | PF_X, + .p_offset = offset, + .p_vaddr = addr, + .p_paddr = 0, + .p_filesz = len, + .p_memsz = len, + .p_align = page_size, + }; + + if (!gelf_update_phdr(kcore->elf, idx, &phdr)) return -1; return 0; From 5c23f8cd8a4a3ce31f65711f3b100055347b8751 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 10 Aug 2015 16:53:54 -0300 Subject: [PATCH 1011/2091] perf hists: Update the column width for the "srcline" sort key commit e8e6d37e73e6b950c891c780745460b87f4755b6 upstream. When we introduce a new sort key, we need to update the hists__calc_col_len() function accordingly, otherwise the width will be limited to strlen(header). We can't update it when obtaining a line value for a column (for instance, in sort__srcline_cmp()), because we reset it all when doing a resort (see hists__output_recalc_col_len()), so we need to, from what is in the hist_entry fields, set each of the column widths. Cc: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Fixes: 409a8be61560 ("perf tools: Add sort by src line/number") Link: http://lkml.kernel.org/n/tip-jgbe0yx8v1gs89cslr93pvz2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/hist.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cc22b9158b93c..c7966c0fa13e4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -151,6 +151,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + if (h->srcline) + hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); + if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); From 63f155ca0df06d116e4bbed31c264598a9d8c622 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 2 Jul 2015 03:08:43 -0400 Subject: [PATCH 1012/2091] perf stat: Get correct cpu id for print_aggr commit 601083cffb7cabdcc55b8195d732f0f7028570fa upstream. print_aggr() fails to print per-core/per-socket statistics after commit 582ec0829b3d ("perf stat: Fix per-socket output bug for uncore events") if events have differnt cpus. Because in print_aggr(), aggr_get_id needs index (not cpu id) to find core/pkg id. Also, evsel cpu maps should be used to get aggregated id. Here is an example: Counting events cycles,uncore_imc_0/cas_count_read/. (Uncore event has cpumask 0,18) $ perf stat -e cycles,uncore_imc_0/cas_count_read/ -C0,18 --per-core sleep 2 Without this patch, it failes to get CPU 18 result. Performance counter stats for 'CPU(s) 0,18': S0-C0 1 7526851 cycles S0-C0 1 1.05 MiB uncore_imc_0/cas_count_read/ S1-C0 0 cycles S1-C0 0 MiB uncore_imc_0/cas_count_read/ With this patch, it can get both CPU0 and CPU18 result. Performance counter stats for 'CPU(s) 0,18': S0-C0 1 6327768 cycles S0-C0 1 0.47 MiB uncore_imc_0/cas_count_read/ S1-C0 1 330228 cycles S1-C0 1 0.29 MiB uncore_imc_0/cas_count_read/ Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 582ec0829b3d ("perf stat: Fix per-socket output bug for uncore events") Link: http://lkml.kernel.org/r/1435820925-51091-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-stat.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f7b8218785f6f..a1f3ffc2786de 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1227,7 +1227,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void print_aggr(char *prefix) { struct perf_evsel *counter; - int cpu, cpu2, s, s2, id, nr; + int cpu, s, s2, id, nr; double uval; u64 ena, run, val; @@ -1240,8 +1240,7 @@ static void print_aggr(char *prefix) val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - cpu2 = perf_evsel__cpus(counter)->map[cpu]; - s2 = aggr_get_id(evsel_list->cpus, cpu2); + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); if (s2 != id) continue; val += counter->counts->cpu[cpu].val; From 1477cb59bba427bab5dd7e91a448d0c1a11ac230 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 4 Aug 2015 17:10:27 +0100 Subject: [PATCH 1013/2091] perf tools: Add empty Build files for architectures lacking them commit 93df8a1ed6231727c5db94a80b1a6bd5ee67cec3 upstream. perf currently fails to build on MIPS as there is no tools/perf/arch/mips/Build file. Adding an empty file fixes this as there are no MIPS-specific sources to build. It looks like the same is needed for Alpha and PA-RISC, though I haven't been able to test those. Signed-off-by: Ben Hutchings Fixes: 5e8c0fb6a957 ("perf build: Add arch x86 objects building") Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1438704627.7315.2.camel@decadent.org.uk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/alpha/Build | 1 + tools/perf/arch/mips/Build | 1 + tools/perf/arch/parisc/Build | 1 + 3 files changed, 3 insertions(+) create mode 100644 tools/perf/arch/alpha/Build create mode 100644 tools/perf/arch/mips/Build create mode 100644 tools/perf/arch/parisc/Build diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 0000000000000..1bb8bf6d7fd4c --- /dev/null +++ b/tools/perf/arch/alpha/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 0000000000000..1bb8bf6d7fd4c --- /dev/null +++ b/tools/perf/arch/mips/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 0000000000000..1bb8bf6d7fd4c --- /dev/null +++ b/tools/perf/arch/parisc/Build @@ -0,0 +1 @@ +# empty From 41e324d7edbb2c33a4492f8a5e65044609fe2732 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Sep 2015 12:36:12 -0300 Subject: [PATCH 1014/2091] perf header: Fixup reading of HEADER_NRCPUS feature commit caa470475d9b59eeff093ae650800d34612c4379 upstream. The original patch introducing this header wrote the number of CPUs available and online in one order and then swapped those values when reading, fix it. Before: # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 # echo 0 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 3 # echo 0 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 2 After the fix, bringing back the CPUs online: # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 2 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 3 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Kan Liang Cc: Stephane Eranian Cc: Wang Nan Fixes: fbe96f29ce4b ("perf tools: Make perf.data more self-descriptive (v8)") Link: http://lkml.kernel.org/r/20150911153323.GP23511@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/header.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 918fd8ae2d80b..23eea5e7fa946 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1426,7 +1426,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1435,7 +1435,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } From 1252222767288451387b0892840a844fdb095fa6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2015 12:32:49 +0200 Subject: [PATCH 1015/2091] perf: Fix AUX buffer refcounting commit 57ffc5ca679f499f4704fd9b6a372916f59930ee upstream. Its currently possible to drop the last refcount to the aux buffer from NMI context, which results in the expected fireworks. The refcounting needs a bigger overhaul, but to cure the immediate problem, delay the freeing by using an irq_work. Reviewed-and-tested-by: Alexander Shishkin Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150618103249.GK19282@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 8 -------- kernel/events/internal.h | 10 ++++++++++ kernel/events/ring_buffer.c | 27 +++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 94817491407b0..e1af58e23bee9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4411,14 +4411,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 9f6ce9ba4a043..a6adc36a3732f 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a7604c81168e2..7f63ad978cb8b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -559,7 +571,18 @@ static void __rb_free_aux(struct ring_buffer *rb) void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC From f829ccff84346019decac2a527495bbaa5565008 Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Sat, 25 Jul 2015 08:25:18 +0200 Subject: [PATCH 1016/2091] watchdog: sunxi: fix activation of system reset commit 0919e4445190da18496d31aac08b90828a47d45f upstream. Commit f2147de33470 ("watchdog: sunxi: support parameterized compatible strings") introduced a regression in sunxi_wdt_start(), by which the system reset function of the watchdog is not enabled upon starting the watchdog. As a result, the system is not reset when the watchdog expires. Fix it. Fixes: f2147de33470 ("watchdog: sunxi: support parameterized compatible strings") Signed-off-by: Francesco Lavra Acked-by: Maxime Ripard Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sunxi_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index a29afb37c48ca..47bd8a14d01f5 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -184,7 +184,7 @@ static int sunxi_wdt_start(struct watchdog_device *wdt_dev) /* Set system reset function */ reg = readl(wdt_base + regs->wdt_cfg); reg &= ~(regs->wdt_reset_mask); - reg |= ~(regs->wdt_reset_val); + reg |= regs->wdt_reset_val; writel(reg, wdt_base + regs->wdt_cfg); /* Enable watchdog */ From feada04ec6c916fa3779d736e75c8453ee3ddc58 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 18 Sep 2015 11:27:45 +0200 Subject: [PATCH 1017/2091] sched: access local runqueue directly in single_task_running commit 00cc1633816de8c95f337608a1ea64e228faf771 upstream. Commit 2ee507c47293 ("sched: Add function single_task_running to let a task check if it is the only task running on a cpu") referenced the current runqueue with the smp_processor_id. When CONFIG_DEBUG_PREEMPT is enabled, that is only allowed if preemption is disabled or the currrent task is bound to the local cpu (e.g. kernel worker). With commit f78195129963 ("kvm: add halt_poll_ns module parameter") KVM calls single_task_running. If CONFIG_DEBUG_PREEMPT is enabled that generates a lot of kernel messages. To avoid adding preemption in that cases, as it would limit the usefulness, we change single_task_running to access directly the cpu local runqueue. Cc: Tim Chen Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Fixes: 2ee507c472939db4b146d545352b8a7c79ef47f8 Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e6910526c84b8..4a42f8299ac8e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2358,13 +2358,20 @@ unsigned long nr_running(void) /* * Check if only the current task is running on the cpu. + * + * Caution: this function does not check that the caller has disabled + * preemption, thus the result might have a time-of-check-to-time-of-use + * race. The caller is responsible to use it correctly, for example: + * + * - from a non-preemptable section (of course) + * + * - from a thread that is bound to a single CPU + * + * - in a loop with very short iterations (e.g. a polling loop) */ bool single_task_running(void) { - if (cpu_rq(smp_processor_id())->nr_running == 1) - return true; - else - return false; + return raw_rq()->nr_running == 1; } EXPORT_SYMBOL(single_task_running); From 28ef4c4b4760e7787660795ea89f04aec284fd77 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 16:13:47 -0700 Subject: [PATCH 1018/2091] hwmon: (nct6775) Swap STEP_UP_TIME and STEP_DOWN_TIME registers for most chips commit 728d29400488d54974d3317fe8a232b45fdb42ee upstream. The STEP_UP_TIME and STEP_DOWN_TIME registers are swapped for all chips but NCT6775. Reported-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct6775.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index bd1c99deac71b..2aaedbe0b0235 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -354,6 +354,10 @@ static const u16 NCT6775_REG_TEMP_CRIT[ARRAY_SIZE(nct6775_temp_label) - 1] /* NCT6776 specific data */ +/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ +#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME +#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME + static const s8 NCT6776_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ @@ -3528,8 +3532,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3600,8 +3604,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3677,8 +3681,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; From 5e624cb5d02e467dbeea3dccdacbd4e86c1705b1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 16:44:02 +0100 Subject: [PATCH 1019/2091] ARM: fix Thumb2 signal handling when ARMv6 is enabled commit 9b55613f42e8d40d5c9ccb8970bde6af4764b2ab upstream. When a kernel is built covering ARMv6 to ARMv7, we omit to clear the IT state when entering a signal handler. This can cause the first few instructions to be conditionally executed depending on the parent context. In any case, the original test for >= ARMv7 is broken - ARMv6 can have Thumb-2 support as well, and an ARMv6T2 specific build would omit this code too. Relax the test back to ARMv6 or greater. This results in us always clearing the IT state bits in the PSR, even on CPUs where these bits are reserved. However, they're reserved for the IT state, so this should cause no harm. Fixes: d71e1352e240 ("Clear the IT state when invoking a Thumb-2 signal handler") Acked-by: Tony Lindgren Tested-by: H. Nikolaus Schaller Tested-by: Grazvydas Ignotas Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/signal.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 423663e23791e..586eef26203d1 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -343,12 +343,17 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 6 /* - * Clear the If-Then Thumb-2 execution state - * ARM spec requires this to be all 000s in ARM mode - * Snapdragon S4/Krait misbehaves on a Thumb=>ARM - * signal transition without this. + * Clear the If-Then Thumb-2 execution state. ARM spec + * requires this to be all 000s in ARM mode. Snapdragon + * S4/Krait misbehaves on a Thumb=>ARM signal transition + * without this. + * + * We must do this whenever we are running on a Thumb-2 + * capable CPU, which includes ARMv6T2. However, we elect + * to do this whenever we're on an ARMv6 or later CPU for + * simplicity. */ cpsr &= ~PSR_IT_MASK; #endif From fd9af2c0f0b32f7783a0b66c2949932de87ab36f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Sep 2015 13:24:40 +0100 Subject: [PATCH 1020/2091] ARM: 8429/1: disable GCC SRA optimization commit a077224fd35b2f7fbc93f14cf67074fc792fbac2 upstream. While working on the 32-bit ARM port of UEFI, I noticed a strange corruption in the kernel log. The following snprintf() statement (in drivers/firmware/efi/efi.c:efi_md_typeattr_format()) snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", was producing the following output in the log: | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | | | | |UC] |RUN| | | | | | | |UC] As it turns out, this is caused by incorrect code being emitted for the string() function in lib/vsprintf.c. The following code if (!(spec.flags & LEFT)) { while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } } for (i = 0; i < len; ++i) { if (buf < end) *buf = *s; ++buf; ++s; } while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } when called with len == 0, triggers an issue in the GCC SRA optimization pass (Scalar Replacement of Aggregates), which handles promotion of signed struct members incorrectly. This is a known but as yet unresolved issue. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932). In this particular case, it is causing the second while loop to be executed erroneously a single time, causing the additional space characters to be printed. So disable the optimization by passing -fno-ipa-sra. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 985227cbbd1bd..47f10e7ad1f6e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -50,6 +50,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes From c014e0ffd0ddaff75e59a77b463b5c852ecfd8cf Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 31 Jul 2015 14:08:58 +0200 Subject: [PATCH 1021/2091] windfarm: decrement client count when unregistering commit fe2b592173ff0274e70dc44d1d28c19bb995aa7c upstream. wf_unregister_client() increments the client count when a client unregisters. That is obviously incorrect. Decrement that client count instead. Fixes: 75722d3992f5 ("[PATCH] ppc64: Thermal control for SMU based machines") Signed-off-by: Paul Bolle Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/macintosh/windfarm_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 3ee198b658438..cc7ece1712b50 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -435,7 +435,7 @@ int wf_unregister_client(struct notifier_block *nb) { mutex_lock(&wf_lock); blocking_notifier_chain_unregister(&wf_client_list, nb); - wf_client_count++; + wf_client_count--; if (wf_client_count == 0) wf_stop_thread(); mutex_unlock(&wf_lock); From ca4104a08c62d29b43d30b8058507b2dd35ef5f2 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Wed, 26 Aug 2015 18:26:49 +0100 Subject: [PATCH 1022/2091] ARM: 8425/1: kgdb: Don't try to stop the machine when setting breakpoints commit 7ae85dc7687c7e7119053d83d02c560ea217b772 upstream. In (23a4e40 arm: kgdb: Handle read-only text / modules) we moved to using patch_text() to set breakpoints so that we could handle the case when we had CONFIG_DEBUG_RODATA. That patch used patch_text(). Unfortunately, patch_text() assumes that we're not in atomic context when it runs since it needs to grab a mutex and also wait for other CPUs to stop (which it does with a completion). This would result in a stack crawl if you had CONFIG_DEBUG_ATOMIC_SLEEP and tried to set a breakpoint in kgdb. The crawl looked something like: BUG: scheduling while atomic: swapper/0/0/0x00010007 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-rc7-00133-geb63b34 #1073 Hardware name: Rockchip (Device Tree) (unwind_backtrace) from [] (show_stack+0x20/0x24) (show_stack) from [] (dump_stack+0x84/0xb8) (dump_stack) from [] (__schedule_bug+0x54/0x6c) (__schedule_bug) from [] (__schedule+0x80/0x668) (__schedule) from [] (schedule+0xb8/0xd4) (schedule) from [] (schedule_timeout+0x2c/0x234) (schedule_timeout) from [] (wait_for_common+0xf4/0x188) (wait_for_common) from [] (wait_for_completion+0x20/0x24) (wait_for_completion) from [] (__stop_cpus+0x58/0x70) (__stop_cpus) from [] (stop_cpus+0x3c/0x54) (stop_cpus) from [] (__stop_machine+0xcc/0xe8) (__stop_machine) from [] (stop_machine+0x34/0x44) (stop_machine) from [] (patch_text+0x28/0x34) (patch_text) from [] (kgdb_arch_set_breakpoint+0x40/0x4c) (kgdb_arch_set_breakpoint) from [] (kgdb_validate_break_address+0x2c/0x60) (kgdb_validate_break_address) from [] (dbg_set_sw_break+0x1c/0xdc) (dbg_set_sw_break) from [] (gdb_serial_stub+0x9c4/0xba4) (gdb_serial_stub) from [] (kgdb_cpu_enter+0x1f8/0x60c) (kgdb_cpu_enter) from [] (kgdb_handle_exception+0x19c/0x1d0) (kgdb_handle_exception) from [] (kgdb_compiled_brk_fn+0x30/0x3c) (kgdb_compiled_brk_fn) from [] (do_undefinstr+0x1a4/0x20c) (do_undefinstr) from [] (__und_svc_finish+0x0/0x34) It turns out that when we're in kgdb all the CPUs are stopped anyway so there's no reason we should be calling patch_text(). We can instead directly call __patch_text() which assumes that CPUs have already been stopped. Fixes: 23a4e4050ba9 ("arm: kgdb: Handle read-only text / modules") Reported-by: Aapo Vienamo Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Acked-by: Kees Cook Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/kgdb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index a6ad93c9bce35..fd9eefce0a7b8 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -259,15 +259,17 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return err; - patch_text((void *)bpt->bpt_addr, - *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); return 0; } From b43dbfb4b38d0c414899285fe94c1bdc8db39706 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Wed, 16 Sep 2015 01:34:31 +0300 Subject: [PATCH 1023/2091] ARM: dts: omap5-uevm.dts: fix i2c5 pinctrl offsets commit 1dbdad75074d16c3e3005180f81a01cdc04a7872 upstream. The i2c5 pinctrl offsets are wrong. If the bootloader doesn't set the pins up, communication with tca6424a doesn't work (controller timeouts) and it is not possible to enable HDMI. Fixes: 9be495c42609 ("ARM: dts: omap5-evm: Add I2c pinctrl data") Signed-off-by: Grazvydas Ignotas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap5-uevm.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 74777a6e200a6..1b958e92d6742 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -174,8 +174,8 @@ i2c5_pins: pinmux_i2c5_pins { pinctrl-single,pins = < - 0x184 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ - 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ + 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ + 0x188 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ >; }; From 49d8fc8a402190421759e665b82988229de537b7 Mon Sep 17 00:00:00 2001 From: Carl Frederik Werner Date: Wed, 2 Sep 2015 10:07:57 +0900 Subject: [PATCH 1024/2091] ARM: dts: omap3-beagle: make i2c3, ddc and tfp410 gpio work again commit 3a2fa775bd1d0579113666c1a2e37654a34018a0 upstream. Let's fix pinmux address of gpio 170 used by tfp410 powerdown-gpio. According to the OMAP35x Technical Reference Manual CONTROL_PADCONF_I2C3_SDA[15:0] 0x480021C4 mode0: i2c3_sda CONTROL_PADCONF_I2C3_SDA[31:16] 0x480021C4 mode4: gpio_170 the pinmux address of gpio 170 must be 0x480021C6. The former wrong address broke i2c3 (used by hdmi ddc), resulting in kernel message: omap_i2c 48060000.i2c: controller timed out Fixes: 8cecf52befd7 ("ARM: omap3-beagle.dts: add display information") Signed-off-by: Carl Frederik Werner Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap3-beagle.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index a5474113cd506..67659a0ed13e1 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -202,7 +202,7 @@ tfp410_pins: pinmux_tfp410_pins { pinctrl-single,pins = < - 0x194 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ + 0x196 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ >; }; From d46cd96d855aa21e0c22c70534491ac3437864fe Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Tue, 1 Sep 2015 23:17:03 +0900 Subject: [PATCH 1025/2091] ARM: EXYNOS: reset Little cores when cpu is up commit 833b5794e3303cc97a0d2d4ba97f26cc9d9b4b79 upstream. The cpu booting of exynos5422 has been still broken since we discussed it in last year[1]. This patch is inspired from Odroid XU3 code (Actually, it was from samsung exynos vendor kernel)[2]. This weird reset code was founded exynos5420 octa cores series SoCs and only required for the first boot core is the Little core (Cortex A7). Some of the exynos5420 boards and all of the exynos5422 boards will require this code. There is two ways to check the little core is the first cpu. One is checking GPG2CON[1] GPIO value and the other is checking the cluster number of the first cpu. I selected the latter because it's more easier than the former. [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-June/350632.html [2] https://patchwork.kernel.org/patch/6782891/ Cc: Kevin Hilman Cc: Javier Martinez Canillas Cc: Krzysztof Kozlowski Tested-by: Kevin Hilman Signed-off-by: Chanho Park [k.kozlowski: Adding stable for v4.1+, reformat comment] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-exynos/mcpm-exynos.c | 27 ++++++++++++++++++++++++++- arch/arm/mach-exynos/regs-pmu.h | 6 ++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 9bdf54795f05d..56978199c4798 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "regs-pmu.h" #include "common.h" @@ -70,7 +71,31 @@ static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) cluster >= EXYNOS5420_NR_CLUSTERS) return -EINVAL; - exynos_cpu_power_up(cpunr); + if (!exynos_cpu_power_state(cpunr)) { + exynos_cpu_power_up(cpunr); + + /* + * This assumes the cluster number of the big cores(Cortex A15) + * is 0 and the Little cores(Cortex A7) is 1. + * When the system was booted from the Little core, + * they should be reset during power up cpu. + */ + if (cluster && + cluster == MPIDR_AFFINITY_LEVEL(cpu_logical_map(0), 1)) { + /* + * Before we reset the Little cores, we should wait + * the SPARE2 register is set to 1 because the init + * codes of the iROM will set the register after + * initialization. + */ + while (!pmu_raw_readl(S5P_PMU_SPARE2)) + udelay(10); + + pmu_raw_writel(EXYNOS5420_KFC_CORE_RESET(cpu), + EXYNOS_SWRESET); + } + } + return 0; } diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h index b7614333d2968..fba9068ed260d 100644 --- a/arch/arm/mach-exynos/regs-pmu.h +++ b/arch/arm/mach-exynos/regs-pmu.h @@ -513,6 +513,12 @@ static inline unsigned int exynos_pmu_cpunr(unsigned int mpidr) #define SPREAD_ENABLE 0xF #define SPREAD_USE_STANDWFI 0xF +#define EXYNOS5420_KFC_CORE_RESET0 BIT(8) +#define EXYNOS5420_KFC_ETM_RESET0 BIT(20) + +#define EXYNOS5420_KFC_CORE_RESET(_nr) \ + ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) + #define EXYNOS5420_BB_CON1 0x0784 #define EXYNOS5420_BB_SEL_EN BIT(31) #define EXYNOS5420_BB_PMOS_EN BIT(7) From 7ae7e1459597cbd0c30972f1c29a1b1c0d5825c4 Mon Sep 17 00:00:00 2001 From: "Felipe F. Tonello" Date: Wed, 16 Sep 2015 18:40:32 +0100 Subject: [PATCH 1026/2091] ARM: dts: fix usb pin control for imx-rex dts commit 0af822110871400908d5b6f83a8908c45f881d8f upstream. This fixes a duplicated pin control causing this error: imx6q-pinctrl 20e0000.iomuxc: pin MX6Q_PAD_GPIO_1 already requested by regulators:regulator@2; cannot claim for 2184000.usb imx6q-pinctrl 20e0000.iomuxc: pin-137 (2184000.usb) status -22 imx6q-pinctrl 20e0000.iomuxc: could not request pin 137 (MX6Q_PAD_GPIO_1) from group usbotggrp on device 20e0000.iomuxc imx_usb 2184000.usb: Error applying setting, reverse things back imx6q-pinctrl 20e0000.iomuxc: pin MX6Q_PAD_EIM_D31 already requested by regulators:regulator@1; cannot claim for 2184200.usb imx6q-pinctrl 20e0000.iomuxc: pin-52 (2184200.usb) status -22 imx6q-pinctrl 20e0000.iomuxc: could not request pin 52 (MX6Q_PAD_EIM_D31) from group usbh1grp on device 20e0000.iomuxc imx_usb 2184200.usb: Error applying setting, reverse things back Signed-off-by: Felipe F. Tonello Fixes: e2047e33f2bd ("ARM: dts: add initial Rex Pro board support") Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6qdl-rex.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi index 488a640796ac0..394a4ace351af 100644 --- a/arch/arm/boot/dts/imx6qdl-rex.dtsi +++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi @@ -35,7 +35,6 @@ compatible = "regulator-fixed"; reg = <1>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbh1>; regulator-name = "usbh1_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; @@ -47,7 +46,6 @@ compatible = "regulator-fixed"; reg = <2>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbotg>; regulator-name = "usb_otg_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; From eb218995a9031ed04fb5e0566fac5ab21bfe4b4e Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 14 Aug 2015 16:15:32 -0400 Subject: [PATCH 1027/2091] blockdev: don't set S_DAX for misaligned partitions commit f0b2e563bc419df7c1b3d2f494574c25125f6aed upstream. The dax code doesn't currently support misaligned partitions, so disable O_DIRECT via dax until such time as that support materializes. Suggested-by: Boaz Harrosh Signed-off-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index c7e4163ede87f..ccfd31f1df3a4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1234,6 +1234,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { From c46eada1470d4af205ffa3cf7aad8d909792af7f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Sep 2015 18:57:03 +0300 Subject: [PATCH 1028/2091] dmaengine: dw: properly read DWC_PARAMS register commit 6bea0f6d1c47b07be88dfd93f013ae05fcb3d8bf upstream. In case we have less than maximum allowed channels (8) and autoconfiguration is enabled the DWC_PARAMS read is wrong because it uses different arithmetic to what is needed for channel priority setup. Re-do the caclulations properly. This now works on AVR32 board well. Fixes: fed2574b3c9f (dw_dmac: introduce software emulation of LLP transfers) Cc: yitian.bu@tangramtek.com Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 1022c2e1a2b0a..9e504d3b0d4fc 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1591,7 +1591,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) INIT_LIST_HEAD(&dw->dma.channels); for (i = 0; i < nr_channels; i++) { struct dw_dma_chan *dwc = &dw->chan[i]; - int r = nr_channels - i - 1; dwc->chan.device = &dw->dma; dma_cookie_init(&dwc->chan); @@ -1603,7 +1602,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* 7 is highest priority & 0 is lowest. */ if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) - dwc->priority = r; + dwc->priority = nr_channels - i - 1; else dwc->priority = i; @@ -1622,6 +1621,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { unsigned int dwc_params; + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; void __iomem *addr = chip->regs + r * sizeof(u32); dwc_params = dma_read_byaddr(addr, DWC_PARAMS); From d97c1eca67cf264d1ac8f5abe5eba7221c26b1a8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 30 Jul 2015 16:24:43 -0700 Subject: [PATCH 1029/2091] x86/apic: Serialize LVTT and TSC_DEADLINE writes commit 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 upstream. The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not guaranteed that the write to LVTT has reached the APIC before the TSC_DEADLINE MSR is written. In such a case the write to the MSR is ignored and as a consequence the local timer interrupt never fires. The SDM decribes this issue for xAPIC and x2APIC modes. The serialization methods recommended by the SDM differ. xAPIC: "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b. 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter. 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2. 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline." x2APIC: "To allow for efficient access to the APIC registers in x2APIC mode, the serializing semantics of WRMSR are relaxed when writing to the APIC registers. Thus, system software should not use 'WRMSR to APIC registers in x2APIC mode' as a serializing instruction. Read and write accesses to the APIC registers will occur in program order. A WRMSR to an APIC register may complete before all preceding stores are globally visible; software can prevent this by inserting a serializing instruction, an SFENCE, or an MFENCE before the WRMSR." The xAPIC method is to just wait for the memory mapped write to hit the LVTT by checking whether the MSR write has reached the hardware. There is no reason why a proper MFENCE after the memory mapped write would not do the same. Andi Kleen confirmed that MFENCE is sufficient for the xAPIC case as well. Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE support. [ tglx: Massaged the changelog ] Signed-off-by: Shaohua Li Reviewed-by: Ingo Molnar Cc: Cc: Cc: Cc: Andi Kleen Cc: H. Peter Anvin Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cde732c1b4959..307a49828826b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } From 341711093165b2e33c353480f123e5a0c69c91b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Sep 2015 12:34:55 +0200 Subject: [PATCH 1030/2091] x86/alternatives: Make optimize_nops() interrupt safe and synced commit 66c117d7fa2ae429911e60d84bf31a90b2b96189 upstream. Richard reported the following crash: [ 0.036000] BUG: unable to handle kernel paging request at 55501e06 [ 0.036000] IP: [] common_interrupt+0xb/0x38 [ 0.036000] Call Trace: [ 0.036000] [] ? add_nops+0x90/0xa0 [ 0.036000] [] apply_alternatives+0x274/0x630 Chuck decoded: " 0: 8d 90 90 83 04 24 lea 0x24048390(%eax),%edx 6: 80 fc 0f cmp $0xf,%ah 9: a8 0f test $0xf,%al >> b: a0 06 1e 50 55 mov 0x55501e06,%al 10: 57 push %edi 11: 56 push %esi Interrupt 0x30 occurred while the alternatives code was replacing the initial 0x90,0x90,0x90 NOPs (from the ASM_CLAC macro) with the optimized version, 0x8d,0x76,0x00. Only the first byte has been replaced so far, and it makes a mess out of the insn decoding." optimize_nops() is buggy in two aspects: - It's not disabling interrupts across the modification - It's lacking a sync_core() call Add both. Fixes: 4fd4b6e5537c 'x86/alternatives: Use optimized NOPs for padding' Reported-and-tested-by: "Richard W.M. Jones" Signed-off-by: Thomas Gleixner Cc: Richard W.M. Jones Cc: Chuck Ebbert Cc: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509031232340.15006@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef6531931606..d1918a8c43930 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -325,10 +325,15 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); From e6491ecad829ef7e49a8a7cf3eb4b8189d926efa Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Sep 2015 14:10:03 +0100 Subject: [PATCH 1031/2091] x86/platform: Fix Geode LX timekeeping in the generic x86 build commit 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d upstream. In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable") bypassed verification of the TSC on Geode LX. However, this code (now in the check_system_tsc_reliable() function in arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was set. OpenWRT has recently started building its generic Geode target for Geode GX, not LX, to include support for additional platforms. This broke the timekeeping on LX-based devices, because the TSC wasn't marked as reliable: https://dev.openwrt.org/ticket/20531 By adding a runtime check on is_geode_lx(), we can also include the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus fixing the problem. Signed-off-by: David Woodhouse Cc: Andres Salomon Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0cf..21187ebee7d09 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1004,15 +1005,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; From b3eb2816d05082015e67f9275c5cc401c2eef43c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 20 Sep 2015 16:32:04 -0700 Subject: [PATCH 1032/2091] x86/paravirt: Replace the paravirt nop with a bona fide empty function commit fc57a7c68020dcf954428869eafd934c0ab1536f upstream. PARAVIRT_ADJUST_EXCEPTION_FRAME generates this code (using nmi as an example, trimmed for readability): ff 15 00 00 00 00 callq *0x0(%rip) # 2796 2792: R_X86_64_PC32 pv_irq_ops+0x2c That's a call through a function pointer to regular C function that does nothing on native boots, but that function isn't protected against kprobes, isn't marked notrace, and is certainly not guaranteed to preserve any registers if the compiler is feeling perverse. This is bad news for a CLBR_NONE operation. Of course, if everything works correctly, once paravirt ops are patched, it gets nopped out, but what if we hit this code before paravirt ops are patched in? This can potentially cause breakage that is very difficult to debug. A more subtle failure is possible here, too: if _paravirt_nop uses the stack at all (even just to push RBP), it will overwrite the "NMI executing" variable if it's called in the NMI prologue. The Xen case, perhaps surprisingly, is fine, because it's already written in asm. Fix all of the cases that default to paravirt_nop (including adjust_exception_frame) with a big hammer: replace paravirt_nop with an asm function that is just a ret instruction. The Xen case may have other problems, so document them. This is part of a fix for some random crashes that Sasha saw. Reported-and-tested-by: Sasha Levin Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/8f5d2ba295f9d73751c33d97fda03e0495d9ade0.1442791737.git.luto@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 11 +++++++++++ arch/x86/kernel/paravirt.c | 16 ++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4bd6c197563dd..50dd556a8c8be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1393,7 +1393,18 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) INTR_FRAME + /* + * Fix up the exception frame if we're on Xen. + * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most + * one value to the stack on native, so it may clobber the rdx + * scratch slot, but it won't clobber any of the important + * slots past it. + * + * Xen is a different story, because the Xen frame itself overlaps + * the "NMI executing" variable. + */ PARAVIRT_ADJUST_EXCEPTION_FRAME + /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c614dd492f5f7..1f316f066c499 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,10 +41,18 @@ #include #include -/* nop stub */ -void _paravirt_nop(void) -{ -} +/* + * nop stub, which must not clobber anything *including the stack* to + * avoid confusing the entry prologues. + */ +extern void _paravirt_nop(void); +asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" + "ret\n\t" + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); /* identity function, which can be inlined */ u32 _paravirt_ident_32(u32 x) From 3f2a8445b8082586cfc83ee640494bb1a51c157d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 20 Sep 2015 16:32:05 -0700 Subject: [PATCH 1033/2091] x86/nmi/64: Fix a paravirt stack-clobbering bug in the NMI code commit 83c133cf11fb0e68a51681447e372489f052d40e upstream. The NMI entry code that switches to the normal kernel stack needs to be very careful not to clobber any extra stack slots on the NMI stack. The code is fine under the assumption that SWAPGS is just a normal instruction, but that assumption isn't really true. Use SWAPGS_UNSAFE_STACK instead. This is part of a fix for some random crashes that Sasha saw. Fixes: 9b6e6a8334d5 ("x86/nmi/64: Switch stacks on userspace NMI entry") Reported-and-tested-by: Sasha Levin Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/974bc40edffdb5c2950a5c4977f821a446b76178.1442791737.git.luto@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/entry_64.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 50dd556a8c8be..6c9cb60738326 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1456,9 +1456,12 @@ ENTRY(nmi) * we don't want to enable interrupts, because then we'll end * up in an awkward situation in which IRQs are on but NMIs * are off. + * + * We also must not push anything to the stack before switching + * stacks lest we corrupt the "NMI executing" variable. */ - SWAPGS + SWAPGS_UNSAFE_STACK cld movq %rsp, %rdx movq PER_CPU_VAR(kernel_stack), %rsp From d6a4aed83c19f98741e081e79dbb36472aefa57a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20M=C3=BCller?= Date: Thu, 1 Oct 2015 13:43:42 +0200 Subject: [PATCH 1034/2091] Use WARN_ON_ONCE for missing X86_FEATURE_NRIPS commit d2922422c48df93f3edff7d872ee4f3191fefb08 upstream. The cpu feature flags are not ever going to change, so warning everytime can cause a lot of kernel log spam (in our case more than 10GB/hour). The warning seems to only occur when nested virtualization is enabled, so it's probably triggered by a KVM bug. This is a sensible and safe change anyway, and the KVM bug fix might not be suitable for stable releases anyway. Signed-off-by: Dirk Mueller Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4911bf19122b4..7858cd9acfe4a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -512,7 +512,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } From 6adcb2b15e3d534f7e34c197c9f314de25810729 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 25 Sep 2015 23:02:18 +0100 Subject: [PATCH 1035/2091] x86/efi: Fix boot crash by mapping EFI memmap entries bottom-up at runtime, instead of top-down commit a5caa209ba9c29c6421292e7879d2387a2ef39c9 upstream. Beginning with UEFI v2.5 EFI_PROPERTIES_TABLE was introduced that signals that the firmware PE/COFF loader supports splitting code and data sections of PE/COFF images into separate EFI memory map entries. This allows the kernel to map those regions with strict memory protections, e.g. EFI_MEMORY_RO for code, EFI_MEMORY_XP for data, etc. Unfortunately, an unwritten requirement of this new feature is that the regions need to be mapped with the same offsets relative to each other as observed in the EFI memory map. If this is not done crashes like this may occur, BUG: unable to handle kernel paging request at fffffffefe6086dd IP: [] 0xfffffffefe6086dd Call Trace: [] efi_call+0x7e/0x100 [] ? virt_efi_set_variable+0x61/0x90 [] efi_delete_dummy_variable+0x63/0x70 [] efi_enter_virtual_mode+0x383/0x392 [] start_kernel+0x38a/0x417 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0xeb/0xef Here 0xfffffffefe6086dd refers to an address the firmware expects to be mapped but which the OS never claimed was mapped. The issue is that included in these regions are relative addresses to other regions which were emitted by the firmware toolchain before the "splitting" of sections occurred at runtime. Needless to say, we don't satisfy this unwritten requirement on x86_64 and instead map the EFI memory map entries in reverse order. The above crash is almost certainly triggerable with any kernel newer than v3.13 because that's when we rewrote the EFI runtime region mapping code, in commit d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping"). For kernel versions before v3.13 things may work by pure luck depending on the fragmentation of the kernel virtual address space at the time we map the EFI regions. Instead of mapping the EFI memory map entries in reverse order, where entry N has a higher virtual address than entry N+1, map them in the same order as they appear in the EFI memory map to preserve this relative offset between regions. This patch has been kept as small as possible with the intention that it should be applied aggressively to stable and distribution kernels. It is very much a bugfix rather than support for a new feature, since when EFI_PROPERTIES_TABLE is enabled we must map things as outlined above to even boot - we have no way of asking the firmware not to split the code/data regions. In fact, this patch doesn't even make use of the more strict memory protections available in UEFI v2.5. That will come later. Suggested-by: Ard Biesheuvel Reported-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Chun-Yi Cc: Dave Young Cc: H. Peter Anvin Cc: James Bottomley Cc: Lee, Chun-Yi Cc: Leif Lindholm Cc: Linus Torvalds Cc: Matthew Garrett Cc: Mike Galbraith Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443218539-7610-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/efi.c | 67 ++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 841ea05e1b02c..477384985ac95 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -678,6 +678,70 @@ static void *realloc_pages(void *old_memmap, int old_shift) return ret; } +/* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return memmap.map_end - memmap.desc_size; + + entry -= memmap.desc_size; + if (entry < memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return memmap.map; + + entry += memmap.desc_size; + if (entry >= memmap.map_end) + return NULL; + + return entry; +} + /* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. @@ -688,7 +752,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift) unsigned long left = 0; efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) { #ifdef CONFIG_X86_64 From 18b756c4d6137c541252843c04667b6d6655dea4 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Tue, 29 Sep 2015 20:58:57 +0800 Subject: [PATCH 1036/2091] x86/kexec: Fix kexec crash in syscall kexec_file_load() commit e3c41e37b0f4b18cbd4dac76cbeece5a7558b909 upstream. The original bug is a page fault crash that sometimes happens on big machines when preparing ELF headers: BUG: unable to handle kernel paging request at ffffc90613fc9000 IP: [] prepare_elf64_ram_headers_callback+0x165/0x260 The bug is caused by us under-counting the number of memory ranges and subsequently not allocating enough ELF header space for them. The bug is typically masked on smaller systems, because the ELF header allocation is rounded up to the next page. This patch modifies the code in fill_up_crash_elf_data() by using walk_system_ram_res() instead of walk_system_ram_range() to correctly count the max number of crash memory ranges. That's because the walk_system_ram_range() filters out small memory regions that reside in the same page, but walk_system_ram_res() does not. Here's how I found the bug: After tracing prepare_elf64_headers() and prepare_elf64_ram_headers_callback(), the code uses walk_system_ram_res() to fill-in crash memory regions information to the program header, so it counts those small memory regions that reside in a page area. But, when the kernel was using walk_system_ram_range() in fill_up_crash_elf_data() to count the number of crash memory regions, it filters out small regions. I printed those small memory regions, for example: kexec: Get nr_ram ranges. vaddr=0xffff880077592258 paddr=0x77592258, sz=0xdc0 Based on the code in walk_system_ram_range(), this memory region will be filtered out: pfn = (0x77592258 + 0x1000 - 1) >> 12 = 0x77593 end_pfn = (0x77592258 + 0xfc0 -1 + 1) >> 12 = 0x77593 end_pfn - pfn = 0x77593 - 0x77593 = 0 <=== if (end_pfn > pfn) is FALSE So, the max_nr_ranges that's counted by the kernel doesn't include small memory regions - causing us to under-allocate the required space. That causes the page fault crash that happens in a later code path when preparing ELF headers. This bug is not easy to reproduce on small machines that have few CPUs, because the allocated page aligned ELF buffer has more free space to cover those small memory regions' PT_LOAD headers. Signed-off-by: Lee, Chun-Yi Cc: Andy Lutomirski Cc: Baoquan He Cc: Jiang Liu Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Takashi Iwai Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Vivek Goyal Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443531537-29436-1-git-send-email-jlee@suse.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/crash.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c76d3e37c6e1d..403ace539b73d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -184,10 +184,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -213,7 +212,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; From f0caabec9306b92288d8146a5e4454b968af009a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Sep 2015 08:38:22 +0000 Subject: [PATCH 1037/2091] x86/process: Add proper bound checks in 64bit get_wchan() commit eddd3826a1a0190e5235703d1e666affa4d13b96 upstream. Dmitry Vyukov reported the following using trinity and the memory error detector AddressSanitizer (https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel). [ 124.575597] ERROR: AddressSanitizer: heap-buffer-overflow on address ffff88002e280000 [ 124.576801] ffff88002e280000 is located 131938492886538 bytes to the left of 28857600-byte region [ffffffff81282e0a, ffffffff82e0830a) [ 124.578633] Accessed by thread T10915: [ 124.579295] inlined in describe_heap_address ./arch/x86/mm/asan/report.c:164 [ 124.579295] #0 ffffffff810dd277 in asan_report_error ./arch/x86/mm/asan/report.c:278 [ 124.580137] #1 ffffffff810dc6a0 in asan_check_region ./arch/x86/mm/asan/asan.c:37 [ 124.581050] #2 ffffffff810dd423 in __tsan_read8 ??:0 [ 124.581893] #3 ffffffff8107c093 in get_wchan ./arch/x86/kernel/process_64.c:444 The address checks in the 64bit implementation of get_wchan() are wrong in several ways: - The lower bound of the stack is not the start of the stack page. It's the start of the stack page plus sizeof (struct thread_info) - The upper bound must be: top_of_stack - TOP_OF_KERNEL_STACK_PADDING - 2 * sizeof(unsigned long). The 2 * sizeof(unsigned long) is required because the stack pointer points at the frame pointer. The layout on the stack is: ... IP FP ... IP FP. So we need to make sure that both IP and FP are in the bounds. Fix the bound checks and get rid of the mix of numeric constants, u64 and unsigned long. Making all unsigned long allows us to use the same function for 32bit as well. Use READ_ONCE() when accessing the stack. This does not prevent a concurrent wakeup of the task and the stack changing, but at least it avoids TOCTOU. Also check task state at the end of the loop. Again that does not prevent concurrent changes, but it avoids walking for nothing. Add proper comments while at it. Reported-by: Dmitry Vyukov Reported-by: Sasha Levin Based-on-patch-from: Wolfram Gloger Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andrey Konovalov Cc: Kostya Serebryany Cc: Alexander Potapenko Cc: kasan-dev Cc: Denys Vlasenko Cc: Andi Kleen Cc: Wolfram Gloger Link: http://lkml.kernel.org/r/20150930083302.694788319@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 52 +++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5e0bf57d9944a..58e02d9382183 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -499,27 +499,59 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) return 0; - fp = *(u64 *)(p->thread.sp); + + fp = READ_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } From b05730b2e8a43445244e436d9715b1ab1dd59c81 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 1 Oct 2015 09:04:22 -0400 Subject: [PATCH 1038/2091] x86/mm: Set NX on gap between __ex_table and rodata commit ab76f7b4ab2397ffdd2f1eb07c55697d19991d10 upstream. Unused space between the end of __ex_table and the start of rodata can be left W+x in the kernel page tables. Extend the setting of the NX bit to cover this gap by starting from text_end rather than rodata_start. Before: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff81600000 6M ro PSE GLB x pmd 0xffffffff81600000-0xffffffff81754000 1360K ro GLB x pte 0xffffffff81754000-0xffffffff81800000 688K RW GLB x pte 0xffffffff81800000-0xffffffff81a00000 2M ro PSE GLB NX pmd 0xffffffff81a00000-0xffffffff81b3b000 1260K ro GLB NX pte 0xffffffff81b3b000-0xffffffff82000000 4884K RW GLB NX pte 0xffffffff82000000-0xffffffff82200000 2M RW PSE GLB NX pmd 0xffffffff82200000-0xffffffffa0000000 478M pmd After: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff81600000 6M ro PSE GLB x pmd 0xffffffff81600000-0xffffffff81754000 1360K ro GLB x pte 0xffffffff81754000-0xffffffff81800000 688K RW GLB NX pte 0xffffffff81800000-0xffffffff81a00000 2M ro PSE GLB NX pmd 0xffffffff81a00000-0xffffffff81b3b000 1260K ro GLB NX pte 0xffffffff81b3b000-0xffffffff82000000 4884K RW GLB NX pte 0xffffffff82000000-0xffffffff82200000 2M RW PSE GLB NX pmd 0xffffffff82200000-0xffffffffa0000000 478M pmd Signed-off-by: Stephen Smalley Acked-by: Kees Cook Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443704662-3138-1-git-send-email-sds@tycho.nsa.gov Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3fba623e3ba55..f9977a7a94448 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,7 +1132,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); From 6cdd2beb9a3627de301bc03474e0805e4b53efba Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 25 Sep 2015 11:59:52 +0200 Subject: [PATCH 1039/2091] x86/xen: Support kexec/kdump in HVM guests by doing a soft reset commit 0b34a166f291d255755be46e43ed5497cdd194f2 upstream. Currently there is a number of issues preventing PVHVM Xen guests from doing successful kexec/kdump: - Bound event channels. - Registered vcpu_info. - PIRQ/emuirq mappings. - shared_info frame after XENMAPSPACE_shared_info operation. - Active grant mappings. Basically, newly booted kernel stumbles upon already set up Xen interfaces and there is no way to reestablish them. In Xen-4.7 a new feature called 'soft reset' is coming. A guest performing kexec/kdump operation is supposed to call SCHEDOP_shutdown hypercall with SHUTDOWN_soft_reset reason before jumping to new kernel. Hypervisor (with some help from toolstack) will do full domain cleanup (but keeping its memory and vCPU contexts intact) returning the guest to the state it had when it was first booted and thus allowing it to start over. Doing SHUTDOWN_soft_reset on Xen hypervisors which don't support it is probably OK as by default all unknown shutdown reasons cause domain destroy with a message in toolstack log: 'Unknown shutdown reason code 5. Destroying domain.' which gives a clue to what the problem is and eliminates false expectations. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten.c | 23 +++++++++++++++++++++++ include/xen/interface/sched.h | 8 ++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a671e837228d1..0cc657160cb69 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,10 @@ #include #include +#ifdef CONFIG_KEXEC_CORE +#include +#endif + #include #include #include @@ -1798,6 +1802,21 @@ static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; +#ifdef CONFIG_KEXEC_CORE +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -1817,6 +1836,10 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC_CORE + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif } #endif diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h index 9ce083960a257..f18490985fc8e 100644 --- a/include/xen/interface/sched.h +++ b/include/xen/interface/sched.h @@ -107,5 +107,13 @@ struct sched_watchdog { #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ #define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 #endif /* __XEN_PUBLIC_SCHED_H__ */ From 438437983e0cefa439e9b161f74b805d0416eec0 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Fri, 31 Jul 2015 13:36:21 +0200 Subject: [PATCH 1040/2091] leds/led-class: Add missing put_device() commit e5b5a61fcb3743f1dacf9e20d28f48423cecf0c1 upstream. Devices found by class_find_device must be freed with put_device(). Otherwise the reference count will not work properly. Fixes: a96aa64cb572 ("leds/led-class: Handle LEDs with the same name") Reported-by: Alan Tull Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Jacek Anaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/leds/led-class.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 7fb2a19ac649c..557f8a53a062c 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -223,12 +223,15 @@ static int led_classdev_next_name(const char *init_name, char *name, { unsigned int i = 0; int ret = 0; + struct device *dev; strlcpy(name, init_name, len); - while (class_find_device(leds_class, NULL, name, match_name) && - (ret < len)) + while ((ret < len) && + (dev = class_find_device(leds_class, NULL, name, match_name))) { + put_device(dev); ret = snprintf(name, len, "%s_%u", init_name, ++i); + } if (ret >= len) return -ENOMEM; From 8210b9199f66447513b63c9a5aef988ee60c4319 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 Sep 2015 14:45:09 +0200 Subject: [PATCH 1041/2091] sched/core: Fix TASK_DEAD race in finish_task_switch() commit 95913d97914f44db2b81271c2e2ebd4d2ac2df83 upstream. So the problem this patch is trying to address is as follows: CPU0 CPU1 context_switch(A, B) ttwu(A) LOCK A->pi_lock A->on_cpu == 0 finish_task_switch(A) prev_state = A->state <-. WMB | A->on_cpu = 0; | UNLOCK rq0->lock | | context_switch(C, A) `-- A->state = TASK_DEAD prev_state == TASK_DEAD put_task_struct(A) context_switch(A, C) finish_task_switch(A) A->state == TASK_DEAD put_task_struct(A) The argument being that the WMB will allow the load of A->state on CPU0 to cross over and observe CPU1's store of A->state, which will then result in a double-drop and use-after-free. Now the comment states (and this was true once upon a long time ago) that we need to observe A->state while holding rq->lock because that will order us against the wakeup; however the wakeup will not in fact acquire (that) rq->lock; it takes A->pi_lock these days. We can obviously fix this by upgrading the WMB to an MB, but that is expensive, so we'd rather avoid that. The alternative this patch takes is: smp_store_release(&A->on_cpu, 0), which avoids the MB on some archs, but not important ones like ARM. Reported-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: manfred@colorfullife.com Cc: will.deacon@arm.com Fixes: e4a52bcb9a18 ("sched: Remove rq->lock from the first half of ttwu()") Link: http://lkml.kernel.org/r/20150929124509.GG3816@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 10 +++++----- kernel/sched/sched.h | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4a42f8299ac8e..8476206a1e19d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2217,11 +2217,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for TASK_DEAD must occur while the runqueue locks are - * still held, otherwise prev could be scheduled on another cpu, die - * there before we look at prev->state, and then the reference would - * be dropped twice. - * Manfred Spraul + * + * We must observe prev->state before clearing prev->on_cpu (in + * finish_lock_switch), otherwise a concurrent wakeup can get prev + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. */ prev_state = prev->state; vtime_task_switch(prev); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e0e1299939588..aa1f059de4f7f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1068,9 +1068,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) * After ->on_cpu is cleared, the task can be moved to a different CPU. * We must ensure this doesn't happen until the switch is completely * finished. + * + * Pairs with the control dependency and rmb in try_to_wake_up(). */ - smp_wmb(); - prev->on_cpu = 0; + smp_store_release(&prev->on_cpu, 0); #endif #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ From e5864e3711404b6c277cd1f69d5ebe197e4f0b95 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Sep 2015 15:25:39 +0200 Subject: [PATCH 1042/2091] s390/compat: correct uc_sigmask of the compat signal frame commit 8d4bd0ed0439dfc780aab801a085961925ed6838 upstream. The uc_sigmask in the ucontext structure is an array of words to keep the 64 signal bits (or 1024 if you ask glibc but the kernel sigset_t only has 64 bits). For 64 bit the sigset_t contains a single 8 byte word, but for 31 bit there are two 4 byte words. The compat signal handler code uses a simple copy of the 64 bit sigset_t to the 31 bit compat_sigset_t. As s390 is a big-endian architecture this is incorrect, the two words in the 31 bit sigset_t array need to be swapped. Reported-by: Stefan Liebler Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/compat_signal.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index fe8d6924efaa8..c78ba51ae285b 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -48,6 +48,19 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; +static inline void sigset_to_sigset32(unsigned long *set64, + compat_sigset_word *set32) +{ + set32[0] = (compat_sigset_word) set64[0]; + set32[1] = (compat_sigset_word)(set64[0] >> 32); +} + +static inline void sigset32_to_sigset(compat_sigset_word *set32, + unsigned long *set64) +{ + set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32); +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -303,10 +316,12 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; @@ -323,10 +338,12 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset))) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -397,7 +414,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, return -EFAULT; /* Create struct sigcontext32 on the signal stack */ - memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sigset_to_sigset32(set->sig, sc.oldmask); sc.sregs = (__u32)(unsigned long __force) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; @@ -458,6 +475,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; @@ -505,11 +523,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, store_sigregs(); /* Create ucontext on the signal stack. */ + sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || - __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; From ff7cad4b0de99be1828554d8e43662ffdac1dba9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 28 Sep 2015 22:47:42 +0200 Subject: [PATCH 1043/2091] s390/boot/decompression: disable floating point in decompressor commit adc0b7fbf6fe9967505c0254d9535ec7288186ae upstream. my gcc 5.1 used an ldgr instruction with a register != 0,2,4,6 for spilling/filling into a floating point register in our decompressor. This will cause an AFP-register data exception as the decompressor did not setup the additional floating point registers via cr0. That causes a program check loop that looked like a hang with one "Uncompressing Linux... " message (directly booted via kvm) or a loop of "Uncompressing Linux... " messages (when booted via zipl boot loader). The offending code in my build was 48e400: e3 c0 af ff ff 71 lay %r12,-1(%r10) -->48e406: b3 c1 00 1c ldgr %f1,%r12 48e40a: ec 6c 01 22 02 7f clij %r6,2,12,0x48e64e but gcc could do spilling into an fpr at any function. We can simply disable floating point support at that early stage. Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index d4788111c1617..fac6ac9790fad 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,7 +10,7 @@ targets += misc.o piggy.o sizes.h head.o KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) From dc7a3d707c9784f26048d6f059e7467a8a92e04b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 6 Sep 2015 01:46:54 +0300 Subject: [PATCH 1044/2091] spi: Fix documentation of spi_alloc_master() commit a394d635193b641f2c86ead5ada5b115d57c51f8 upstream. Actually, spi_master_put() after spi_alloc_master() must _not_ be followed by kfree(). The memory is already freed with the call to spi_master_put() through spi_master_class, which registers a release function. Calling both spi_master_put() and kfree() results in often nasty (and delayed) crashes elsewhere in the kernel, often in the networking stack. This reverts commit eb4af0f5349235df2e4a5057a72fc8962d00308a. Link to patch and concerns: https://lkml.org/lkml/2012/9/3/269 or http://lkml.iu.edu/hypermail/linux/kernel/1209.0/00790.html Alexey Klimov: This revert becomes valid after 94c69f765f1b4a658d96905ec59928e3e3e07e6a when spi-imx.c has been fixed and there is no need to call kfree() so comment for spi_alloc_master() should be fixed. Signed-off-by: Guenter Roeck Signed-off-by: Alexey Klimov Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index d35c1a13217c5..029dbd33b4b28 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1427,8 +1427,7 @@ static struct class spi_master_class = { * * The caller is responsible for assigning the bus number and initializing * the master's methods before calling spi_register_master(); and (after errors - * adding the device) calling spi_master_put() and kfree() to prevent a memory - * leak. + * adding the device) calling spi_master_put() to prevent a memory leak. */ struct spi_master *spi_alloc_master(struct device *dev, unsigned size) { From ee346d7a9f345f68678a1e19797f72f51b7c04af Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 22 Sep 2015 14:32:03 +0300 Subject: [PATCH 1045/2091] spi: xtensa-xtfpga: fix register endianness commit b0b4855099e301c8603ea37da9a0103a96c2e0b1 upstream. XTFPGA SPI controller has native endian registers. Fix register acessors so that they work in big-endian configurations. Signed-off-by: Max Filippov Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-xtensa-xtfpga.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c index 2e32ea2f194f3..be6155cba9de7 100644 --- a/drivers/spi/spi-xtensa-xtfpga.c +++ b/drivers/spi/spi-xtensa-xtfpga.c @@ -34,13 +34,13 @@ struct xtfpga_spi { static inline void xtfpga_spi_write32(const struct xtfpga_spi *spi, unsigned addr, u32 val) { - iowrite32(val, spi->regs + addr); + __raw_writel(val, spi->regs + addr); } static inline unsigned int xtfpga_spi_read32(const struct xtfpga_spi *spi, unsigned addr) { - return ioread32(spi->regs + addr); + return __raw_readl(spi->regs + addr); } static inline void xtfpga_spi_wait_busy(struct xtfpga_spi *xspi) From 52b5970ab83c557628b91382914e5d0dac5c4c11 Mon Sep 17 00:00:00 2001 From: "Tan, Jui Nee" Date: Tue, 1 Sep 2015 10:22:51 +0800 Subject: [PATCH 1046/2091] spi: spi-pxa2xx: Check status register to determine if SSSR_TINT is disabled commit 02bc933ebb59208f42c2e6305b2c17fd306f695d upstream. On Intel Baytrail, there is case when interrupt handler get called, no SPI message is captured. The RX FIFO is indeed empty when RX timeout pending interrupt (SSSR_TINT) happens. Use the BIOS version where both HSUART and SPI are on the same IRQ. Both drivers are using IRQF_SHARED when calling the request_irq function. When running two separate and independent SPI and HSUART application that generate data traffic on both components, user will see messages like below on the console: pxa2xx-spi pxa2xx-spi.0: bad message state in interrupt handler This commit will fix this by first checking Receiver Time-out Interrupt, if it is disabled, ignore the request and return without servicing. Signed-off-by: Tan, Jui Nee Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index e3223ac75a7c5..f089082c00e1d 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -624,6 +624,10 @@ static irqreturn_t ssp_int(int irq, void *dev_id) if (!(sccr1_reg & SSCR1_TIE)) mask &= ~SSSR_TFS; + /* Ignore RX timeout interrupt if it is disabled */ + if (!(sccr1_reg & SSCR1_TINTE)) + mask &= ~SSSR_TINT; + if (!(status & mask)) return IRQ_NONE; From f72f5774c30418cf802a8964398bbc102d08abe5 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 10 Sep 2015 16:48:13 +0530 Subject: [PATCH 1047/2091] spi: spidev: fix possible NULL dereference commit dd85ebf681ef0ee1fc985c353dd45e8b53b5dc1e upstream. During the last close we are freeing spidev if spidev->spi is NULL, but just before checking if spidev->spi is NULL we are dereferencing it. Lets add a check there to avoid the NULL dereference. Fixes: 9169051617df ("spi: spidev: Don't mangle max_speed_hz in underlying spi device") Signed-off-by: Sudip Mukherjee Reviewed-by: Jarkko Nikula Tested-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spidev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 92c909eed6b50..8fab566e0f0bc 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -664,7 +664,8 @@ static int spidev_release(struct inode *inode, struct file *filp) kfree(spidev->rx_buffer); spidev->rx_buffer = NULL; - spidev->speed_hz = spidev->spi->max_speed_hz; + if (spidev->spi) + spidev->speed_hz = spidev->spi->max_speed_hz; /* ... after we unbound from the underlying device? */ spin_lock_irq(&spidev->spi_lock); From 9a36019b8e0a6064614e6227d26c144e88c635ea Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 22 Sep 2015 14:59:14 -0700 Subject: [PATCH 1048/2091] mm: migrate: hugetlb: putback destination hugepage to active list commit 3aaa76e125c1dd58c9b599baa8c6021896874c12 upstream. Since commit bcc54222309c ("mm: hugetlb: introduce page_huge_active") each hugetlb page maintains its active flag to avoid a race condition betwe= en multiple calls of isolate_huge_page(), but current kernel doesn't set the f= lag on a hugepage allocated by migration because the proper putback routine isn= 't called. This means that users could still encounter the race referred to by bcc54222309c in this special case, so this patch fixes it. Fixes: bcc54222309c ("mm: hugetlb: introduce page_huge_active") Signed-off-by: Naoya Horiguchi Cc: Michal Hocko Cc: Andi Kleen Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index f53838fe3dfe6..2c37b1a44a8c8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1062,7 +1062,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (rc != MIGRATEPAGE_SUCCESS && put_new_page) put_new_page(new_hpage, private); else - put_page(new_hpage); + putback_active_hugepage(new_hpage); if (result) { if (rc) From 455a35d039d0a84d021721cdde2384ec36c0b6e0 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Tue, 22 Sep 2015 14:59:20 -0700 Subject: [PATCH 1049/2091] lib/iommu-common.c: do not try to deref a null iommu->lazy_flush() pointer when n < pool->hint commit d046b770c9fc36ccb19c27afdb8322220108cbc7 upstream. The check for invoking iommu->lazy_flush() from iommu_tbl_range_alloc() has to be refactored so that we only call ->lazy_flush() if it is non-null. I had a sparc kernel that was crashing when I was trying to process some very large perf.data files- the crash happens when the scsi driver calls into dma_4v_map_sg and thus the iommu_tbl_range_alloc(). Signed-off-by: Sowmini Varadhan Cc: Benjamin Herrenschmidt Cc: Guenter Roeck Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/iommu-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iommu-common.c b/lib/iommu-common.c index df30632f0bef9..4fdeee02e0a94 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -21,8 +21,7 @@ static DEFINE_PER_CPU(unsigned int, iommu_hash_common); static inline bool need_flush(struct iommu_map_table *iommu) { - return (iommu->lazy_flush != NULL && - (iommu->flags & IOMMU_NEED_FLUSH) != 0); + return ((iommu->flags & IOMMU_NEED_FLUSH) != 0); } static inline void set_flush(struct iommu_map_table *iommu) @@ -211,7 +210,8 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, goto bail; } } - if (n < pool->hint || need_flush(iommu)) { + if (iommu->lazy_flush && + (n < pool->hint || need_flush(iommu))) { clear_flush(iommu); iommu->lazy_flush(iommu); } From 40c96f62047368d4f0a70b505dfd302be0bd82bb Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 22 Sep 2015 14:59:20 -0700 Subject: [PATCH 1050/2091] ocfs2/dlm: fix deadlock when dispatch assert master commit 012572d4fc2e4ddd5c8ec8614d51414ec6cae02a upstream. The order of the following three spinlocks should be: dlm_domain_lock < dlm_ctxt->spinlock < dlm_lock_resource->spinlock But dlm_dispatch_assert_master() is called while holding dlm_ctxt->spinlock and dlm_lock_resource->spinlock, and then it calls dlm_grab() which will take dlm_domain_lock. Once another thread (for example, dlm_query_join_handler) has already taken dlm_domain_lock, and tries to take dlm_ctxt->spinlock deadlock happens. Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Cc: "Junxiao Bi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmmaster.c | 9 ++++++--- fs/ocfs2/dlm/dlmrecovery.c | 8 ++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index fdf4b41d0609a..482cfd34472d6 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, int found, ret; int set_maybe; int dispatch_assert = 0; + int dispatched = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1658,15 +1659,18 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); } - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return response; } @@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); item->u.am.lockres = res; /* already have a ref */ /* can optionally ignore node numbers higher than this node */ diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ce12e0b1a31f1..3d90ad7ff91fe 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, unsigned int hash; int master = DLM_LOCK_RES_OWNER_UNKNOWN; u32 flags = DLM_ASSERT_MASTER_REQUERY; + int dispatched = 0; if (!dlm_grab(dlm)) { /* since the domain has gone away on this @@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, dlm_put(dlm); /* sender will take care of this and retry */ return ret; - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { /* put.. incase we are not the master */ @@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, } spin_unlock(&dlm->spinlock); - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return master; } From 51d3a065d242f59ac6a26c31ad9a350cf6e210b6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 1 Oct 2015 15:36:57 -0700 Subject: [PATCH 1051/2091] mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault commit 2f84a8990ebbe235c59716896e017c6b2ca1200f upstream. SunDong reported the following on https://bugzilla.kernel.org/show_bug.cgi?id=103841 I think I find a linux bug, I have the test cases is constructed. I can stable recurring problems in fedora22(4.0.4) kernel version, arch for x86_64. I construct transparent huge page, when the parent and child process with MAP_SHARE, MAP_PRIVATE way to access the same huge page area, it has the opportunity to lead to huge page copy on write failure, and then it will munmap the child corresponding mmap area, but then the child mmap area with VM_MAYSHARE attributes, child process munmap this area can trigger VM_BUG_ON in set_vma_resv_flags functions (vma - > vm_flags & VM_MAYSHARE). There were a number of problems with the report (e.g. it's hugetlbfs that triggers this, not transparent huge pages) but it was fundamentally correct in that a VM_BUG_ON in set_vma_resv_flags() can be triggered that looks like this vma ffff8804651fd0d0 start 00007fc474e00000 end 00007fc475e00000 next ffff8804651fd018 prev ffff8804651fd188 mm ffff88046b1b1800 prot 8000000000000027 anon_vma (null) vm_ops ffffffff8182a7a0 pgoff 0 file ffff88106bdb9800 private_data (null) flags: 0x84400fb(read|write|shared|mayread|maywrite|mayexec|mayshare|dontexpand|hugetlb) ------------ kernel BUG at mm/hugetlb.c:462! SMP Modules linked in: xt_pkttype xt_LOG xt_limit [..] CPU: 38 PID: 26839 Comm: map Not tainted 4.0.4-default #1 Hardware name: Dell Inc. PowerEdge R810/0TT6JF, BIOS 2.7.4 04/26/2012 set_vma_resv_flags+0x2d/0x30 The VM_BUG_ON is correct because private and shared mappings have different reservation accounting but the warning clearly shows that the VMA is shared. When a private COW fails to allocate a new page then only the process that created the VMA gets the page -- all the children unmap the page. If the children access that data in the future then they get killed. The problem is that the same file is mapped shared and private. During the COW, the allocation fails, the VMAs are traversed to unmap the other private pages but a shared VMA is found and the bug is triggered. This patch identifies such VMAs and skips them. Signed-off-by: Mel Gorman Reported-by: SunDong Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: David Rientjes Reviewed-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8c4c1f9f9a9a9..a6ff935476e3d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2896,6 +2896,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, if (iter_vma == vma) continue; + /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these From e2899bcb8c36757a6cb823dc4ebde07f127ea0d6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 24 Sep 2015 17:36:51 +0200 Subject: [PATCH 1052/2091] ALSA: hda - Disable power_save_node for Thinkpads commit 7f57d803ee03730d570dc59a9e3e4842b58dd5cc upstream. Lenovo Thinkpads with recent Realtek codecs seem suffering from click noises at power transition since the introduction of widget power saving in 4.1 kernel. Although this might be solved by some delays in appropriate points, as a quick workaround, just disable the power_save_node feature for now. The gain it gives is relatively small, and this makes the situation back to pre 4.1 time. This patch ended up with a bit more code changes than usual because the existing fixup for Thinkpads is highly chained. Instead of adding yet another chain, combine a few of them into a single fixup entry, as a gratis cleanup. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=943982 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6fe862594e9b9..5819ba9bca720 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4182,6 +4182,24 @@ static void alc_fixup_disable_aamix(struct hda_codec *codec, } } +/* fixup for Thinkpad docks: add dock pins, avoid HP parser fixup */ +static void alc_fixup_tpt440_dock(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x16, 0x21211010 }, /* dock headphone */ + { 0x19, 0x21a11010 }, /* dock mic */ + { } + }; + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; + codec->power_save_node = 0; /* avoid click noises */ + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4507,7 +4525,6 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, - ALC292_FIXUP_TPT440_DOCK2, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -4972,17 +4989,7 @@ static const struct hda_fixup alc269_fixups[] = { }, [ALC292_FIXUP_TPT440_DOCK] = { .type = HDA_FIXUP_FUNC, - .v.func = alc269_fixup_pincfg_no_hp_to_lineout, - .chained = true, - .chain_id = ALC292_FIXUP_TPT440_DOCK2 - }, - [ALC292_FIXUP_TPT440_DOCK2] = { - .type = HDA_FIXUP_PINS, - .v.pins = (const struct hda_pintbl[]) { - { 0x16, 0x21211010 }, /* dock headphone */ - { 0x19, 0x21a11010 }, /* dock mic */ - { } - }, + .v.func = alc_fixup_tpt440_dock, .chained = true, .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST }, From d7a83056d3f0827ea20ad57228ba5589adc15868 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 5 Oct 2015 16:55:09 +0200 Subject: [PATCH 1053/2091] ALSA: synth: Fix conflicting OSS device registration on AWE32 commit 225db5762dc1a35b26850477ffa06e5cd0097243 upstream. When OSS emulation is loaded on ISA SB AWE32 chip, we get now kernel warnings like: WARNING: CPU: 0 PID: 2791 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x51/0x80() sysfs: cannot create duplicate filename '/devices/isa/sbawe.0/sound/card0/seq-oss-0-0' It's because both emux synth and opl3 drivers try to register their OSS device object with the same static index number 0. This hasn't been a big problem until the recent rewrite of device management code (that exposes sysfs at the same time), but it's been an obvious bug. This patch works around it just by using a different index number of emux synth object. There can be a more elegant way to fix, but it's enough for now, as this code won't be touched so often, in anyway. Reported-and-tested-by: Michael Shell Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/synth/emux/emux_oss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index 82e350e9501cc..ac75816ada7c3 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -69,7 +69,8 @@ snd_emux_init_seq_oss(struct snd_emux *emu) struct snd_seq_oss_reg *arg; struct snd_seq_device *dev; - if (snd_seq_device_new(emu->card, 0, SNDRV_SEQ_DEV_ID_OSS, + /* using device#1 here for avoiding conflicts with OPL3 */ + if (snd_seq_device_new(emu->card, 1, SNDRV_SEQ_DEV_ID_OSS, sizeof(struct snd_seq_oss_reg), &dev) < 0) return; From dd555c5e6380e2227beae37e681517afcf4bbbe0 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 2 Oct 2015 11:09:54 -0700 Subject: [PATCH 1054/2091] ALSA: hda: Add dock support for ThinkPad T550 commit d05ea7da0e8f6df3c62cfee75538f347cb3d89ef upstream. Much like all the other Lenovo laptops, add a quirk to make sound work with docking. Reported-and-tested-by: lacknerflo@gmail.com Signed-off-by: Laura Abbott Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5819ba9bca720..57bb5a559f8e5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5233,6 +5233,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad T440", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad X240", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), From ace476458e5b77bcbefeee0dfff8e3e4a531907b Mon Sep 17 00:00:00 2001 From: John Flatness Date: Fri, 2 Oct 2015 17:07:49 -0400 Subject: [PATCH 1055/2091] ALSA: hda - Apply SPDIF pin ctl to MacBookPro 12,1 commit e8ff581f7ac2bc3b8886094b7ca635dcc4d1b0e9 upstream. The MacBookPro 12,1 has the same setup as the 11 for controlling the status of the optical audio light. Simply apply the existing workaround to the subsystem ID for the 12,1. [sorted the fixup entry by tiwai] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=105401 Signed-off-by: John Flatness Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_cirrus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 3a24f7739aaaa..b791529bf31cd 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -634,6 +634,7 @@ static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11), SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6), SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6), + SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11), {} /* terminator */ }; From efec92c44e9883d1b13b7785ff4dc2b66872d87b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 4 Oct 2015 22:44:12 +0200 Subject: [PATCH 1056/2091] ALSA: hda - Disable power_save_node for IDT 92HD73xx chips commit c7e1008048a97148d3aecae742f66fb2f944644c upstream. The recent widget power saving introduced some unavoidable click noises on old IDT 92HD73xx chips while it still seems working on the compatible new chips. In the bugzilla, we tried lots of tests and workarounds, but they didn't help much. So, let's disable the feature for these specific chips as the least (but safest) fix. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=104981 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 25f0f45e66403..b1bc667839745 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4522,7 +4522,11 @@ static int patch_stac92hd73xx(struct hda_codec *codec) return err; spec = codec->spec; - codec->power_save_node = 1; + /* enable power_save_node only for new 92HD89xx chips, as it causes + * click noises on old 92HD73xx chips. + */ + if ((codec->core.vendor_id & 0xfffffff0) != 0x111d7670) + codec->power_save_node = 1; spec->linear_tone_beep = 0; spec->gen.mixer_nid = 0x1d; spec->have_spdif_mux = 1; From ca51bf3e5504c8c0df5dd00688ac242de8e2efa6 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 22 Sep 2015 21:20:22 +0200 Subject: [PATCH 1057/2091] ASoC: pxa: pxa2xx-ac97: fix dma requestor lines commit 8811191fdf7ed02ee07cb8469428158572d355a2 upstream. PCM receive and transmit DMA requestor lines were reverted, breaking the PCM playback interface for PXA platforms using the sound/soc/ variant instead of the sound/arm variant. The commit below shows the inversion in the requestor lines. Fixes: d65a14587a9b ("ASoC: pxa: use snd_dmaengine_dai_dma_data") Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/pxa/pxa2xx-ac97.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index 1f6054650991d..9e4b04e0fbd12 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -49,7 +49,7 @@ static struct snd_ac97_bus_ops pxa2xx_ac97_ops = { .reset = pxa2xx_ac97_cold_reset, }; -static unsigned long pxa2xx_ac97_pcm_stereo_in_req = 12; +static unsigned long pxa2xx_ac97_pcm_stereo_in_req = 11; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, @@ -57,7 +57,7 @@ static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .filter_data = &pxa2xx_ac97_pcm_stereo_in_req, }; -static unsigned long pxa2xx_ac97_pcm_stereo_out_req = 11; +static unsigned long pxa2xx_ac97_pcm_stereo_out_req = 12; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_out = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, From 99100337fa0b6654712ca76997cf52c51696e820 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 15 Sep 2015 20:51:31 +0200 Subject: [PATCH 1058/2091] ASoC: fix broken pxa SoC support commit 3c8f7710c1c44fb650bc29b6ef78ed8b60cfaa28 upstream. The previous fix of pxa library support, which was introduced to fix the library dependency, broke the previous SoC behavior, where a machine code binding pxa2xx-ac97 with a coded relied on : - sound/soc/pxa/pxa2xx-ac97.c - sound/soc/codecs/XXX.c For example, the mioa701_wm9713.c machine code is currently broken. The "select ARM" statement wrongly selects the soc/arm/pxa2xx-ac97 for compilation, as per an unfortunate fate SND_PXA2XX_AC97 is both declared in sound/arm/Kconfig and sound/soc/pxa/Kconfig. Fix this by ensuring that SND_PXA2XX_SOC correctly triggers the correct pxa2xx-ac97 compilation. Fixes: 846172dfe33c ("ASoC: fix SND_PXA2XX_LIB Kconfig warning") Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/arm/Kconfig | 15 ++++++++------- sound/soc/pxa/Kconfig | 2 -- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sound/arm/Kconfig b/sound/arm/Kconfig index 885683a3b0bdf..e0406211716b0 100644 --- a/sound/arm/Kconfig +++ b/sound/arm/Kconfig @@ -9,6 +9,14 @@ menuconfig SND_ARM Drivers that are implemented on ASoC can be found in "ALSA for SoC audio support" section. +config SND_PXA2XX_LIB + tristate + select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 + select SND_DMAENGINE_PCM + +config SND_PXA2XX_LIB_AC97 + bool + if SND_ARM config SND_ARMAACI @@ -21,13 +29,6 @@ config SND_PXA2XX_PCM tristate select SND_PCM -config SND_PXA2XX_LIB - tristate - select SND_AC97_CODEC if SND_PXA2XX_LIB_AC97 - -config SND_PXA2XX_LIB_AC97 - bool - config SND_PXA2XX_AC97 tristate "AC97 driver for the Intel PXA2xx chip" depends on ARCH_PXA diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 39cea80846c31..f2bf8661dd21f 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,7 +1,6 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" depends on ARCH_PXA - select SND_ARM select SND_PXA2XX_LIB help Say Y or M if you want to add support for codecs attached to @@ -25,7 +24,6 @@ config SND_PXA2XX_AC97 config SND_PXA2XX_SOC_AC97 tristate select AC97_BUS - select SND_ARM select SND_PXA2XX_LIB_AC97 select SND_SOC_AC97_BUS From c606b8d1b2c549588ca70435780f05f25b8239eb Mon Sep 17 00:00:00 2001 From: Yitian Bu Date: Fri, 2 Oct 2015 15:18:41 +0800 Subject: [PATCH 1059/2091] ASoC: dwc: correct irq clear method commit 4873867e5f2bd90faad861dd94865099fc3140f3 upstream. from Designware I2S datasheet, tx/rx XRUN irq is cleared by reading register TOR/ROR, rather than by writing into them. Signed-off-by: Yitian Bu Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/dwc/designware_i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c index a3e97b46b64e3..0d28e3b356f67 100644 --- a/sound/soc/dwc/designware_i2s.c +++ b/sound/soc/dwc/designware_i2s.c @@ -131,10 +131,10 @@ static inline void i2s_clear_irqs(struct dw_i2s_dev *dev, u32 stream) if (stream == SNDRV_PCM_STREAM_PLAYBACK) { for (i = 0; i < 4; i++) - i2s_write_reg(dev->i2s_base, TOR(i), 0); + i2s_read_reg(dev->i2s_base, TOR(i)); } else { for (i = 0; i < 4; i++) - i2s_write_reg(dev->i2s_base, ROR(i), 0); + i2s_read_reg(dev->i2s_base, ROR(i)); } } From 6cf02fadec0245b6fe13d7d7c8bc7405a38208c9 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 25 Sep 2015 11:07:04 +0200 Subject: [PATCH 1060/2091] ASoC: db1200: Fix DAI link format for db1300 and db1550 commit e74679b38c9417c1c524081121cdcdb36f82264d upstream. Commit b4508d0f95fa ("ASoC: db1200: Use static DAI format setup") switched the db1200 driver over to using static DAI format setup instead of a callback function. But the commit only added the dai_fmt field to one of the three DAI links in the driver. This breaks audio on db1300 and db1550. Add the two missing dai_fmt settings to fix the issue. Fixes: b4508d0f95fa ("ASoC: db1200: Use static DAI format setup") Reported-by: Manuel Lauss Tested-by: Manuel Lauss Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/au1x/db1200.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/au1x/db1200.c b/sound/soc/au1x/db1200.c index c75995f2779cf..b914a08258ea0 100644 --- a/sound/soc/au1x/db1200.c +++ b/sound/soc/au1x/db1200.c @@ -129,6 +129,8 @@ static struct snd_soc_dai_link db1300_i2s_dai = { .cpu_dai_name = "au1xpsc_i2s.2", .platform_name = "au1xpsc-pcm.2", .codec_name = "wm8731.0-001b", + .dai_fmt = SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBM_CFM, .ops = &db1200_i2s_wm8731_ops, }; @@ -146,6 +148,8 @@ static struct snd_soc_dai_link db1550_i2s_dai = { .cpu_dai_name = "au1xpsc_i2s.3", .platform_name = "au1xpsc-pcm.3", .codec_name = "wm8731.0-001b", + .dai_fmt = SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBM_CFM, .ops = &db1200_i2s_wm8731_ops, }; From 43af64e93407fe866252328a99ec57d3947da511 Mon Sep 17 00:00:00 2001 From: Gianluca Renzi Date: Fri, 25 Sep 2015 21:33:41 +0200 Subject: [PATCH 1061/2091] ASoC: sgtl5000: fix wrong register MIC_BIAS_VOLTAGE setup on probe commit e256da84a04ea31c3c215997c847609af224e8f4 upstream. Signed-off-by: Gianluca Renzi Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sgtl5000.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 3593a1496056d..3a29c0ac5d8a2 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1339,8 +1339,8 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) sgtl5000->micbias_resistor << SGTL5000_BIAS_R_SHIFT); snd_soc_update_bits(codec, SGTL5000_CHIP_MIC_CTRL, - SGTL5000_BIAS_R_MASK, - sgtl5000->micbias_voltage << SGTL5000_BIAS_R_SHIFT); + SGTL5000_BIAS_VOLT_MASK, + sgtl5000->micbias_voltage << SGTL5000_BIAS_VOLT_SHIFT); /* * disable DAP * TODO: From 51e828d6a3b9af30a23937252f1686afbb750a8e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 11 Sep 2015 21:44:17 -0400 Subject: [PATCH 1062/2091] btrfs: skip waiting on ordered range for special files commit a30e577c96f59b1e1678ea5462432b09bf7d5cbc upstream. In btrfs_evict_inode, we properly truncate the page cache for evicted inodes but then we call btrfs_wait_ordered_range for every inode as well. It's the right thing to do for regular files but results in incorrect behavior for device inodes for block devices. filemap_fdatawrite_range gets called with inode->i_mapping which gets resolved to the block device inode before getting passed to wbc_attach_fdatawrite_inode and ultimately to inode_to_bdi. What happens next depends on whether there's an open file handle associated with the inode. If there is, we write to the block device, which is unexpected behavior. If there isn't, we through normally and inode->i_data is used. We can also end up racing against open/close which can result in crashes when i_mapping points to a block device inode that has been closed. Since there can't be any page cache associated with special file inodes, it's safe to skip the btrfs_wait_ordered_range call entirely and avoid the problem. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100911 Tested-by: Christoph Biedl Signed-off-by: Jeff Mahoney Reviewed-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bb013672aee0..e3b39f0c46662 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5035,7 +5035,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_free_io_failure_record(inode, 0, (u64)-1); From 98ef625978667ef284f3671dee77e886d1511a18 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Sep 2015 09:09:31 +0100 Subject: [PATCH 1063/2091] Btrfs: fix read corruption of compressed and shared extents commit 005efedf2c7d0a270ffbe28d8997b03844f3e3e7 upstream. If a file has a range pointing to a compressed extent, followed by another range that points to the same compressed extent and a read operation attempts to read both ranges (either completely or part of them), the pages that correspond to the second range are incorrectly filled with zeroes. Consider the following example: File layout [0 - 8K] [8K - 24K] | | | | points to extent X, points to extent X, offset 4K, length of 8K offset 0, length 16K [extent X, compressed length = 4K uncompressed length = 16K] If a readpages() call spans the 2 ranges, a single bio to read the extent is submitted - extent_io.c:submit_extent_page() would only create a new bio to cover the second range pointing to the extent if the extent it points to had a different logical address than the extent associated with the first range. This has a consequence of the compressed read end io handler (compression.c:end_compressed_bio_read()) finish once the extent is decompressed into the pages covering the first range, leaving the remaining pages (belonging to the second range) filled with zeroes (done by compression.c:btrfs_clear_biovec_end()). So fix this by submitting the current bio whenever we find a range pointing to a compressed extent that was preceded by a range with a different extent map. This is the simplest solution for this corner case. Making the end io callback populate both ranges (or more, if we have multiple pointing to the same extent) is a much more complex solution since each bio is tightly coupled with a single extent map and the extent maps associated to the ranges pointing to the shared extent can have different offsets and lengths. The following test case for fstests triggers the issue: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner rm -f $seqres.full test_clone_and_read_compressed_extent() { local mount_opts=$1 _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount $mount_opts # Create a test file with a single extent that is compressed (the # data we write into it is highly compressible no matter which # compression algorithm is used, zlib or lzo). $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 4K" \ -c "pwrite -S 0xbb 4K 8K" \ -c "pwrite -S 0xcc 12K 4K" \ $SCRATCH_MNT/foo | _filter_xfs_io # Now clone our extent into an adjacent offset. $CLONER_PROG -s $((4 * 1024)) -d $((16 * 1024)) -l $((8 * 1024)) \ $SCRATCH_MNT/foo $SCRATCH_MNT/foo # Same as before but for this file we clone the extent into a lower # file offset. $XFS_IO_PROG -f -c "pwrite -S 0xaa 8K 4K" \ -c "pwrite -S 0xbb 12K 8K" \ -c "pwrite -S 0xcc 20K 4K" \ $SCRATCH_MNT/bar | _filter_xfs_io $CLONER_PROG -s $((12 * 1024)) -d 0 -l $((8 * 1024)) \ $SCRATCH_MNT/bar $SCRATCH_MNT/bar echo "File digests before unmounting filesystem:" md5sum $SCRATCH_MNT/foo | _filter_scratch md5sum $SCRATCH_MNT/bar | _filter_scratch # Evicting the inode or clearing the page cache before reading # again the file would also trigger the bug - reads were returning # all bytes in the range corresponding to the second reference to # the extent with a value of 0, but the correct data was persisted # (it was a bug exclusively in the read path). The issue happened # only if the same readpages() call targeted pages belonging to the # first and second ranges that point to the same compressed extent. _scratch_remount echo "File digests after mounting filesystem again:" # Must match the same digests we got before. md5sum $SCRATCH_MNT/foo | _filter_scratch md5sum $SCRATCH_MNT/bar | _filter_scratch } echo -e "\nTesting with zlib compression..." test_clone_and_read_compressed_extent "-o compress=zlib" _scratch_unmount echo -e "\nTesting with lzo compression..." test_clone_and_read_compressed_extent "-o compress=lzo" status=0 exit Signed-off-by: Filipe Manana Reviewed-by: Qu Wenruo Reviewed-by: Liu Bo Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 65 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c32d226bfeccb..1a14acd6163c4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2795,7 +2795,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, - unsigned long bio_flags) + unsigned long bio_flags, + bool force_bio_submit) { int ret = 0; struct bio *bio; @@ -2813,6 +2814,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || + force_bio_submit || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, @@ -2906,7 +2908,8 @@ static int __do_readpage(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2954,6 +2957,7 @@ static int __do_readpage(struct extent_io_tree *tree, } while (cur <= end) { unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + bool force_bio_submit = false; if (cur >= last_byte) { char *userpage; @@ -3004,6 +3008,49 @@ static int __do_readpage(struct extent_io_tree *tree, block_start = em->block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) block_start = EXTENT_MAP_HOLE; + + /* + * If we have a file range that points to a compressed extent + * and it's followed by a consecutive file range that points to + * to the same compressed extent (possibly with a different + * offset and/or length, so it either points to the whole extent + * or only part of it), we must make sure we do not submit a + * single bio to populate the pages for the 2 ranges because + * this makes the compressed extent read zero out the pages + * belonging to the 2nd range. Imagine the following scenario: + * + * File layout + * [0 - 8K] [8K - 24K] + * | | + * | | + * points to extent X, points to extent X, + * offset 4K, length of 8K offset 0, length 16K + * + * [extent X, compressed length = 4K uncompressed length = 16K] + * + * If the bio to read the compressed extent covers both ranges, + * it will decompress extent X into the pages belonging to the + * first range and then it will stop, zeroing out the remaining + * pages that belong to the other range that points to extent X. + * So here we make sure we submit 2 bios, one for the first + * range and another one for the third range. Both will target + * the same physical extent from disk, but we can't currently + * make the compressed bio endio callback populate the pages + * for both ranges because each compressed bio is tightly + * coupled with a single extent map, and each range can have + * an extent map with a different offset value relative to the + * uncompressed data of our extent and different lengths. This + * is a corner case so we prioritize correctness over + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && + prev_em_start && *prev_em_start != (u64)-1 && + *prev_em_start != em->orig_start) + force_bio_submit = true; + + if (prev_em_start) + *prev_em_start = em->orig_start; + free_extent_map(em); em = NULL; @@ -3053,7 +3100,8 @@ static int __do_readpage(struct extent_io_tree *tree, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, - this_bio_flag); + this_bio_flag, + force_bio_submit); if (!ret) { nr++; *bio_flags = this_bio_flag; @@ -3085,6 +3133,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, struct inode *inode; struct btrfs_ordered_extent *ordered; int index; + u64 prev_em_start = (u64)-1; inode = pages[0]->mapping->host; while (1) { @@ -3100,7 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, &prev_em_start); page_cache_release(pages[index]); } } @@ -3168,7 +3217,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw); + bio_flags, rw, NULL); return ret; } @@ -3194,7 +3243,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, int ret; ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, READ, NULL); if (bio) ret = submit_one_bio(READ, bio, mirror_num, bio_flags); return ret; @@ -3447,7 +3496,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, - 0, 0, 0); + 0, 0, 0, false); if (ret) SetPageError(page); } @@ -3749,7 +3798,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, ret = submit_extent_page(rw, tree, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, - 0, epd->bio_flags, bio_flags); + 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { set_btree_ioerr(p); From e84e81255e79c19128b0f30ba6937ccf55933a86 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 28 Sep 2015 09:56:26 +0100 Subject: [PATCH 1064/2091] Btrfs: update fix for read corruption of compressed and shared extents commit 808f80b46790f27e145c72112189d6a3be2bc884 upstream. My previous fix in commit 005efedf2c7d ("Btrfs: fix read corruption of compressed and shared extents") was effective only if the compressed extents cover a file range with a length that is not a multiple of 16 pages. That's because the detection of when we reached a different range of the file that shares the same compressed extent as the previously processed range was done at extent_io.c:__do_contiguous_readpages(), which covers subranges with a length up to 16 pages, because extent_readpages() groups the pages in clusters no larger than 16 pages. So fix this by tracking the start of the previously processed file range's extent map at extent_readpages(). The following test case for fstests reproduces the issue: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner rm -f $seqres.full test_clone_and_read_compressed_extent() { local mount_opts=$1 _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount $mount_opts # Create our test file with a single extent of 64Kb that is going to # be compressed no matter which compression algo is used (zlib/lzo). $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 64K" \ $SCRATCH_MNT/foo | _filter_xfs_io # Now clone the compressed extent into an adjacent file offset. $CLONER_PROG -s 0 -d $((64 * 1024)) -l $((64 * 1024)) \ $SCRATCH_MNT/foo $SCRATCH_MNT/foo echo "File digest before unmount:" md5sum $SCRATCH_MNT/foo | _filter_scratch # Remount the fs or clear the page cache to trigger the bug in # btrfs. Because the extent has an uncompressed length that is a # multiple of 16 pages, all the pages belonging to the second range # of the file (64K to 128K), which points to the same extent as the # first range (0K to 64K), had their contents full of zeroes instead # of the byte 0xaa. This was a bug exclusively in the read path of # compressed extents, the correct data was stored on disk, btrfs # just failed to fill in the pages correctly. _scratch_remount echo "File digest after remount:" # Must match the digest we got before. md5sum $SCRATCH_MNT/foo | _filter_scratch } echo -e "\nTesting with zlib compression..." test_clone_and_read_compressed_extent "-o compress=zlib" _scratch_unmount echo -e "\nTesting with lzo compression..." test_clone_and_read_compressed_extent "-o compress=lzo" status=0 exit Signed-off-by: Filipe Manana Tested-by: Timofey Titovets Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1a14acd6163c4..885f533a34d9c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3128,12 +3128,12 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode; struct btrfs_ordered_extent *ordered; int index; - u64 prev_em_start = (u64)-1; inode = pages[0]->mapping->host; while (1) { @@ -3149,7 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw, &prev_em_start); + mirror_num, bio_flags, rw, prev_em_start); page_cache_release(pages[index]); } } @@ -3159,7 +3159,8 @@ static void __extent_readpages(struct extent_io_tree *tree, int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { u64 start = 0; u64 end = 0; @@ -3180,7 +3181,7 @@ static void __extent_readpages(struct extent_io_tree *tree, index - first_index, start, end, get_extent, em_cached, bio, mirror_num, bio_flags, - rw); + rw, prev_em_start); start = page_start; end = start + PAGE_CACHE_SIZE - 1; first_index = index; @@ -3191,7 +3192,8 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, + prev_em_start); } static int __extent_read_full_page(struct extent_io_tree *tree, @@ -4202,6 +4204,7 @@ int extent_readpages(struct extent_io_tree *tree, struct page *page; struct extent_map *em_cached = NULL; int nr = 0; + u64 prev_em_start = (u64)-1; for (page_idx = 0; page_idx < nr_pages; page_idx++) { page = list_entry(pages->prev, struct page, lru); @@ -4218,12 +4221,12 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ); + &bio, 0, &bio_flags, READ, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ); + &bio, 0, &bio_flags, READ, &prev_em_start); if (em_cached) free_extent_map(em_cached); From 7559c548a3c54d83a6caaaa31c83d01076d8c54f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 15 Sep 2015 11:17:21 -0600 Subject: [PATCH 1065/2091] PCI: Fix devfn for VPD access through function 0 commit 9d9240756e63dd87d6cbf5da8b98ceb8f8192b55 upstream. Commit 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") passes PCI_SLOT(devfn) for the devfn parameter of pci_get_slot(). Generally this works because we're fairly well guaranteed that a PCIe device is at slot address 0, but for the general case, including conventional PCI, it's incorrect. We need to get the slot and then convert it back into a devfn. Fixes: 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe Acked-by: Mark Rustad Signed-off-by: Greg Kroah-Hartman --- drivers/pci/access.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index b965c12168b72..24e5520fd5353 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -442,7 +442,8 @@ static const struct pci_vpd_ops pci_vpd_pci22_ops = { static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, void *arg) { - struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); ssize_t ret; if (!tdev) @@ -456,7 +457,8 @@ static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, const void *arg) { - struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); ssize_t ret; if (!tdev) @@ -475,7 +477,8 @@ static const struct pci_vpd_ops pci_vpd_f0_ops = { static int pci_vpd_f0_dev_check(struct pci_dev *dev) { - struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + struct pci_dev *tdev = pci_get_slot(dev->bus, + PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); int ret = 0; if (!tdev) From 0ef7b705764bad68176542f16f7bd2d3fa72cc7e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 15 Sep 2015 22:24:46 -0600 Subject: [PATCH 1066/2091] PCI: Use function 0 VPD for identical functions, regular VPD for others commit da2d03ea27f6ed9d2005a67b20dd021ddacf1e4d upstream. 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") added PCI_DEV_FLAGS_VPD_REF_F0. Previously, we set the flag on every non-zero function of quirked devices. If a function turned out to be different from function 0, i.e., it had a different class, vendor ID, or device ID, the flag remained set but we didn't make VPD accessible at all. Flip this around so we only set PCI_DEV_FLAGS_VPD_REF_F0 for functions that are identical to function 0, and allow regular VPD access for any other functions. [bhelgaas: changelog, stable tag] Fixes: 932c435caba8 ("PCI: Add dev_flags bit to access VPD through function 0") Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe Acked-by: Mark Rustad Signed-off-by: Greg Kroah-Hartman --- drivers/pci/access.c | 22 ---------------------- drivers/pci/quirks.c | 20 ++++++++++++++++++-- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 24e5520fd5353..502a82ca1db05 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -475,23 +475,6 @@ static const struct pci_vpd_ops pci_vpd_f0_ops = { .release = pci_vpd_pci22_release, }; -static int pci_vpd_f0_dev_check(struct pci_dev *dev) -{ - struct pci_dev *tdev = pci_get_slot(dev->bus, - PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); - int ret = 0; - - if (!tdev) - return -ENODEV; - if (!tdev->vpd || !tdev->multifunction || - dev->class != tdev->class || dev->vendor != tdev->vendor || - dev->device != tdev->device) - ret = -ENODEV; - - pci_dev_put(tdev); - return ret; -} - int pci_vpd_pci22_init(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd; @@ -500,12 +483,7 @@ int pci_vpd_pci22_init(struct pci_dev *dev) cap = pci_find_capability(dev, PCI_CAP_ID_VPD); if (!cap) return -ENODEV; - if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { - int ret = pci_vpd_f0_dev_check(dev); - if (ret) - return ret; - } vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC); if (!vpd) return -ENOMEM; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 804cd3b02c66b..4a6933f02cd01 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1915,11 +1915,27 @@ static void quirk_netmos(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos); +/* + * Quirk non-zero PCI functions to route VPD access through function 0 for + * devices that share VPD resources between functions. The functions are + * expected to be identical devices. + */ static void quirk_f0_vpd_link(struct pci_dev *dev) { - if (!dev->multifunction || !PCI_FUNC(dev->devfn)) + struct pci_dev *f0; + + if (!PCI_FUNC(dev->devfn)) return; - dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; + + f0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + if (!f0) + return; + + if (f0->vpd && dev->class == f0->class && + dev->vendor == f0->vendor && dev->device == f0->device) + dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; + + pci_dev_put(f0); } DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET, 8, quirk_f0_vpd_link); From d1124f24bd6910248a7f12af5ec0efbc915781ab Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 22 Sep 2015 17:03:54 -0500 Subject: [PATCH 1067/2091] PCI: Clear IORESOURCE_UNSET when clipping a bridge window commit b838b39e930aa1cfd099ea82ac40ed6d6413af26 upstream. c770cb4cb505 ("PCI: Mark invalid BARs as unassigned") sets IORESOURCE_UNSET if we fail to claim a resource. If we tried to claim a bridge window, failed, clipped the window, and tried to claim the clipped window, we failed again because of IORESOURCE_UNSET: pci_bus 0000:00: root bus resource [mem 0xc0000000-0xffffffff window] pci 0000:00:01.0: can't claim BAR 15 [mem 0xbdf00000-0xddefffff 64bit pref]: no compatible bridge window pci 0000:00:01.0: [mem size 0x20000000 64bit pref] clipped to [mem size 0x1df00000 64bit pref] pci 0000:00:01.0: bridge window [mem size 0x1df00000 64bit pref] pci 0000:00:01.0: can't claim BAR 15 [mem size 0x1df00000 64bit pref]: no address assigned The 00:01.0 window started as [mem 0xbdf00000-0xddefffff 64bit pref]. That starts before the host bridge window [mem 0xc0000000-0xffffffff window], so we clipped the 00:01.0 window to [mem 0xc0000000-0xddefffff 64bit pref]. But we left it marked IORESOURCE_UNSET, so the second claim failed when it should have succeeded. This means downstream devices will also fail for lack of resources, e.g., in the bugzilla below, radeon 0000:01:00.0: Fatal error during GPU init Clear IORESOURCE_UNSET when we clip a bridge window. Also clear IORESOURCE_UNSET in our copy of the unclipped window so we can see exactly what the original window was and how it now fits inside the upstream window. Fixes: c770cb4cb505 ("PCI: Mark invalid BARs as unassigned") Link: https://bugzilla.kernel.org/show_bug.cgi?id=85491#c47 Based-on-patch-by: Lorenzo Pieralisi Based-on-patch-by: Yinghai Lu Tested-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Lorenzo Pieralisi Acked-by: Yinghai Lu Signed-off-by: Greg Kroah-Hartman --- drivers/pci/bus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 6fbd3f2b5992a..d3346d23963b8 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -256,6 +256,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx) res->start = start; res->end = end; + res->flags &= ~IORESOURCE_UNSET; + orig_res.flags &= ~IORESOURCE_UNSET; dev_printk(KERN_DEBUG, &dev->dev, "%pR clipped to %pR\n", &orig_res, res); From 839e359734a7bdd7143c48ff125e6b8c507710de Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 9 Sep 2015 21:34:51 -0400 Subject: [PATCH 1068/2091] dm crypt: constrain crypt device's max_segment_size to PAGE_SIZE commit 586b286b110e94eb31840ac5afc0c24e0881fe34 upstream. Setting the dm-crypt device's max_segment_size to PAGE_SIZE is an unfortunate constraint that is required to avoid the potential for exceeding dm-crypt's underlying device's max_segments limits -- due to crypt_alloc_buffer() possibly allocating pages for the encryption bio that are not as physically contiguous as the original bio. It is interesting to note that this problem was already fixed back in 2007 via commit 91e106259 ("dm crypt: use bio_add_page"). But Linux 4.0 commit cf2f1abfb ("dm crypt: don't allocate pages for a partial request") regressed dm-crypt back to _not_ using bio_add_page(). But given dm-crypt's cpu parallelization changes all depend on commit cf2f1abfb's abandoning of the more complex io fragments processing that dm-crypt previously had we cannot easily go back to using bio_add_page(). So all said the cleanest way to resolve this issue is to fix dm-crypt to properly constrain the original bios entering dm-crypt so the encryption bios that dm-crypt generates from the original bios are always compatible with the underlying device's max_segments queue limits. It should be noted that technically Linux 4.3 does _not_ need this fix because of the block core's new late bio-splitting capability. But, it is reasoned, there is little to be gained by having the block core split the encrypted bio that is composed of PAGE_SIZE segments. That said, in the future we may revert this change. Fixes: cf2f1abfb ("dm crypt: don't allocate pages for a partial request") Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=104421 Suggested-by: Jeff Moyer Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5503e43e5f282..049282e6482ff 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -955,7 +955,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); /* * Generate a new unfragmented bio with the given size - * This should never violate the device limitations + * This should never violate the device limitations (but only because + * max_segment_size is being constrained to PAGE_SIZE). * * This function may be called concurrently. If we allocate from the mempool * concurrently, there is a possibility of deadlock. For example, if we have @@ -2040,9 +2041,20 @@ static int crypt_iterate_devices(struct dm_target *ti, return fn(ti, cc->dev, cc->start, ti->len, data); } +static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + /* + * Unfortunate constraint that is required to avoid the potential + * for exceeding underlying device's max_segments limits -- due to + * crypt_alloc_buffer() possibly allocating pages for the encryption + * bio that are not as physically contiguous as the original bio. + */ + limits->max_segment_size = PAGE_SIZE; +} + static struct target_type crypt_target = { .name = "crypt", - .version = {1, 14, 0}, + .version = {1, 14, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -2054,6 +2066,7 @@ static struct target_type crypt_target = { .message = crypt_message, .merge = crypt_merge, .iterate_devices = crypt_iterate_devices, + .io_hints = crypt_io_hints, }; static int __init dm_crypt_init(void) From 35dc0ffe365c80e33b7c096c582688226c57963a Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Wed, 19 Aug 2015 13:10:43 +0200 Subject: [PATCH 1069/2091] ath10k: fix dma_mapping_error() handling commit 5e55e3cbd1042cffa6249f22c10585e63f8a29bf upstream. The function returns 1 when DMA mapping fails. The driver would return bogus values and could possibly confuse itself if DMA failed. Fixes: 767d34fc67af ("ath10k: remove DMA mapping wrappers") Reported-by: Dan Carpenter Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/htc.c | 4 +++- drivers/net/wireless/ath/ath10k/htt_tx.c | 8 ++++++-- drivers/net/wireless/ath/ath10k/pci.c | 8 ++++++-- drivers/net/wireless/ath/ath10k/wmi.c | 1 + 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 2fd9e180272b3..c5dc6b57212e4 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -163,8 +163,10 @@ int ath10k_htc_send(struct ath10k_htc *htc, skb_cb->eid = eid; skb_cb->paddr = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE); ret = dma_mapping_error(dev, skb_cb->paddr); - if (ret) + if (ret) { + ret = -EIO; goto err_credits; + } sg_item.transfer_id = ep->eid; sg_item.transfer_context = skb; diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index cbd2bc9e62025..7f4854a52a7c1 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -371,8 +371,10 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); - if (res) + if (res) { + res = -EIO; goto err_free_txdesc; + } skb_put(txdesc, len); cmd = (struct htt_cmd *)txdesc->data; @@ -463,8 +465,10 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu) skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len, DMA_TO_DEVICE); res = dma_mapping_error(dev, skb_cb->paddr); - if (res) + if (res) { + res = -EIO; goto err_free_txbuf; + } if (likely(use_frags)) { frags = skb_cb->htt.txbuf->frags; diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index ead5432821287..3c4c800ab5051 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1378,8 +1378,10 @@ static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar, req_paddr = dma_map_single(ar->dev, treq, req_len, DMA_TO_DEVICE); ret = dma_mapping_error(ar->dev, req_paddr); - if (ret) + if (ret) { + ret = -EIO; goto err_dma; + } if (resp && resp_len) { tresp = kzalloc(*resp_len, GFP_KERNEL); @@ -1391,8 +1393,10 @@ static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar, resp_paddr = dma_map_single(ar->dev, tresp, *resp_len, DMA_FROM_DEVICE); ret = dma_mapping_error(ar->dev, resp_paddr); - if (ret) + if (ret) { + ret = EIO; goto err_req; + } xfer.wait_for_resp = true; xfer.resp_len = 0; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index c7ea77edce245..408ecd98e61be 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2517,6 +2517,7 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to map beacon: %d\n", ret); dev_kfree_skb_any(bcn); + ret = -EIO; goto skip; } From 6b27c668eff7929c87993542935d1206d800bfd1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 Jul 2015 16:45:18 -0400 Subject: [PATCH 1070/2091] svcrdma: Fix send_reply() scatter/gather set-up commit 9d11b51ce7c150a69e761e30518f294fc73d55ff upstream. The Linux NFS server returns garbage in the data payload of inline NFS/RDMA READ replies. These are READs of under 1000 bytes or so where the client has not provided either a reply chunk or a write list. The NFS server delivers the data payload for an NFS READ reply to the transport in an xdr_buf page list. If the NFS client did not provide a reply chunk or a write list, send_reply() is supposed to set up a separate sge for the page containing the READ data, and another sge for XDR padding if needed, then post all of the sges via a single SEND Work Request. The problem is send_reply() does not advance through the xdr_buf when setting up scatter/gather entries for SEND WR. It always calls dma_map_xdr with xdr_off set to zero. When there's more than one sge, dma_map_xdr() sets up the SEND sge's so they all point to the xdr_buf's head. The current Linux NFS/RDMA client always provides a reply chunk or a write list when performing an NFS READ over RDMA. Therefore, it does not exercise this particular case. The Linux server has never had to use more than one extra sge for building RPC/RDMA replies with a Linux client. However, an NFS/RDMA client _is_ allowed to send small NFS READs without setting up a write list or reply chunk. The NFS READ reply fits entirely within the inline reply buffer in this case. This is perhaps a more efficient way of performing NFS READs that the Linux NFS/RDMA client may some day adopt. Fixes: b432e6b3d9c1 ('svcrdma: Change DMA mapping logic to . . .') BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=285 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7de33d1af9b6d..7fa6d78331edd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -382,6 +382,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; + u32 xdr_off; int sge_no; int sge_bytes; int page_no; @@ -416,8 +417,8 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->direction = DMA_TO_DEVICE; /* Map the payload indicated by 'byte_count' */ + xdr_off = 0; for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { - int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; ctxt->sge[sge_no].addr = @@ -455,6 +456,13 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; + /* The loop above bumps sc_dma_used for each sge. The + * xdr_buf.tail gets a separate sge, but resides in the + * same page as xdr_buf.head. Don't count it twice. + */ + if (sge_no > ctxt->count) + atomic_dec(&rdma->sc_dma_used); + if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; From f11a82cafa7200fd60ef1cb62d53d3ae8233a77f Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 12 Aug 2015 15:12:09 +0100 Subject: [PATCH 1071/2091] dm btree: add ref counting ops for the leaves of top level btrees commit b0dc3c8bc157c60b1d470163882be8c13e1950af upstream. When using nested btrees, the top leaves of the top levels contain block addresses for the root of the next tree down. If we shadow a shared leaf node the leaf values (sub tree roots) should be incremented accordingly. This is only an issue if there is metadata sharing in the top levels. Which only occurs if metadata snapshots are being used (as is possible with dm-thinp). And could result in a block from the thinp metadata snap being reused early, thus corrupting the thinp metadata snap. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- .../md/persistent-data/dm-btree-internal.h | 6 +++ drivers/md/persistent-data/dm-btree-remove.c | 12 ++---- drivers/md/persistent-data/dm-btree-spine.c | 37 +++++++++++++++++++ drivers/md/persistent-data/dm-btree.c | 7 +--- 4 files changed, 47 insertions(+), 15 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index bf2b80d5c4707..8731b6ea026bd 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -138,4 +138,10 @@ int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; +/* + * Value type for upper levels of multi-level btrees. + */ +extern void init_le64_type(struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt); + #endif /* DM_BTREE_INTERNAL_H */ diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index a03178e91a79a..7c0d75547ccf5 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -544,14 +544,6 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, return r; } -static struct dm_btree_value_type le64_type = { - .context = NULL, - .size = sizeof(__le64), - .inc = NULL, - .dec = NULL, - .equal = NULL -}; - int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root) { @@ -559,12 +551,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int index = 0, r = 0; struct shadow_spine spine; struct btree_node *n; + struct dm_btree_value_type le64_vt; + init_le64_type(info->tm, &le64_vt); init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { r = remove_raw(&spine, info, (level == last_level ? - &info->value_type : &le64_type), + &info->value_type : &le64_vt), root, keys[level], (unsigned *)&index); if (r < 0) break; diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 1b5e13ec7f96a..0dee514ba4c5f 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -249,3 +249,40 @@ int shadow_root(struct shadow_spine *s) { return s->root; } + +static void le64_inc(void *context, const void *value_le) +{ + struct dm_transaction_manager *tm = context; + __le64 v_le; + + memcpy(&v_le, value_le, sizeof(v_le)); + dm_tm_inc(tm, le64_to_cpu(v_le)); +} + +static void le64_dec(void *context, const void *value_le) +{ + struct dm_transaction_manager *tm = context; + __le64 v_le; + + memcpy(&v_le, value_le, sizeof(v_le)); + dm_tm_dec(tm, le64_to_cpu(v_le)); +} + +static int le64_equal(void *context, const void *value1_le, const void *value2_le) +{ + __le64 v1_le, v2_le; + + memcpy(&v1_le, value1_le, sizeof(v1_le)); + memcpy(&v2_le, value2_le, sizeof(v2_le)); + return v1_le == v2_le; +} + +void init_le64_type(struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt) +{ + vt->context = tm; + vt->size = sizeof(__le64); + vt->inc = le64_inc; + vt->dec = le64_dec; + vt->equal = le64_equal; +} diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index fdd3793e22f95..c7726cebc4950 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -667,12 +667,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, struct btree_node *n; struct dm_btree_value_type le64_type; - le64_type.context = NULL; - le64_type.size = sizeof(__le64); - le64_type.inc = NULL; - le64_type.dec = NULL; - le64_type.equal = NULL; - + init_le64_type(info->tm, &le64_type); init_shadow_spine(&spine, info); for (level = 0; level < (info->levels - 1); level++) { From 8e61c75ae0cf1b0b168cb2cf5ca3dc3ca72ff777 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 9 Sep 2015 15:41:52 +0800 Subject: [PATCH 1072/2091] staging: ion: fix corruption of ion_import_dma_buf commit 6fa92e2bcf6390e64895b12761e851c452d87bd8 upstream. we found this issue but still exit in lastest kernel. Simply keep ion_handle_create under mutex_lock to avoid this race. WARNING: CPU: 2 PID: 2648 at drivers/staging/android/ion/ion.c:512 ion_handle_add+0xb4/0xc0() ion_handle_add: buffer already found. Modules linked in: iwlmvm iwlwifi mac80211 cfg80211 compat CPU: 2 PID: 2648 Comm: TimedEventQueue Tainted: G W 3.14.0 #7 00000000 00000000 9a3efd2c 80faf273 9a3efd6c 9a3efd5c 80935dc9 811d7fd3 9a3efd88 00000a58 812208a0 00000200 80e128d4 80e128d4 8d4ae00c a8cd8600 a8cd8094 9a3efd74 80935e0e 00000009 9a3efd6c 811d7fd3 9a3efd88 9a3efd9c Call Trace: [<80faf273>] dump_stack+0x48/0x69 [<80935dc9>] warn_slowpath_common+0x79/0x90 [<80e128d4>] ? ion_handle_add+0xb4/0xc0 [<80e128d4>] ? ion_handle_add+0xb4/0xc0 [<80935e0e>] warn_slowpath_fmt+0x2e/0x30 [<80e128d4>] ion_handle_add+0xb4/0xc0 [<80e144cc>] ion_import_dma_buf+0x8c/0x110 [<80c517c4>] reg_init+0x364/0x7d0 [<80993363>] ? futex_wait+0x123/0x210 [<80992e0e>] ? get_futex_key+0x16e/0x1e0 [<8099308f>] ? futex_wake+0x5f/0x120 [<80c51e19>] vpu_service_ioctl+0x1e9/0x500 [<80994aec>] ? do_futex+0xec/0x8e0 [<80971080>] ? prepare_to_wait_event+0xc0/0xc0 [<80c51c30>] ? reg_init+0x7d0/0x7d0 [<80a22562>] do_vfs_ioctl+0x2d2/0x4c0 [<80b198ad>] ? inode_has_perm.isra.41+0x2d/0x40 [<80b199cf>] ? file_has_perm+0x7f/0x90 [<80b1a5f7>] ? selinux_file_ioctl+0x47/0xf0 [<80a227a8>] SyS_ioctl+0x58/0x80 [<80fb45e8>] syscall_call+0x7/0x7 [<80fb0000>] ? mmc_do_calc_max_discard+0xab/0xe4 Fixes: 83271f626 ("ion: hold reference to handle...") Signed-off-by: Shawn Lin Reviewed-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index b0b96ab31954a..abbc42a56e7c5 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -1179,13 +1179,13 @@ struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd) mutex_unlock(&client->lock); goto end; } - mutex_unlock(&client->lock); handle = ion_handle_create(client, buffer); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + mutex_unlock(&client->lock); goto end; + } - mutex_lock(&client->lock); ret = ion_handle_add(client, handle); mutex_unlock(&client->lock); if (ret) { From 182318334b8298203e16e6f6d8a11e2030e8bb38 Mon Sep 17 00:00:00 2001 From: "Liu.Zhao" Date: Mon, 24 Aug 2015 08:36:12 -0700 Subject: [PATCH 1073/2091] USB: option: add ZTE PIDs commit 19ab6bc5674a30fdb6a2436b068d19a3c17dc73e upstream. This is intended to add ZTE device PIDs on kernel. Signed-off-by: Liu.Zhao [johan: sort the new entries ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 876423b8892c9..7c8eb4c4c1752 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -278,6 +278,10 @@ static void option_instat_callback(struct urb *urb); #define ZTE_PRODUCT_MF622 0x0001 #define ZTE_PRODUCT_MF628 0x0015 #define ZTE_PRODUCT_MF626 0x0031 +#define ZTE_PRODUCT_ZM8620_X 0x0396 +#define ZTE_PRODUCT_ME3620_MBIM 0x0426 +#define ZTE_PRODUCT_ME3620_X 0x1432 +#define ZTE_PRODUCT_ME3620_L 0x1433 #define ZTE_PRODUCT_AC2726 0xfff1 #define ZTE_PRODUCT_MG880 0xfffd #define ZTE_PRODUCT_CDMA_TECH 0xfffe @@ -544,6 +548,18 @@ static const struct option_blacklist_info zte_mc2716_z_blacklist = { .sendsetup = BIT(1) | BIT(2) | BIT(3), }; +static const struct option_blacklist_info zte_me3620_mbim_blacklist = { + .reserved = BIT(2) | BIT(3) | BIT(4), +}; + +static const struct option_blacklist_info zte_me3620_xl_blacklist = { + .reserved = BIT(3) | BIT(4) | BIT(5), +}; + +static const struct option_blacklist_info zte_zm8620_x_blacklist = { + .reserved = BIT(3) | BIT(4) | BIT(5), +}; + static const struct option_blacklist_info huawei_cdc12_blacklist = { .reserved = BIT(1) | BIT(2), }; @@ -1591,6 +1607,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&zte_ad3812_z_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_mc2716_z_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_L), + .driver_info = (kernel_ulong_t)&zte_me3620_xl_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_MBIM), + .driver_info = (kernel_ulong_t)&zte_me3620_mbim_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_X), + .driver_info = (kernel_ulong_t)&zte_me3620_xl_blacklist }, + { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ZM8620_X), + .driver_info = (kernel_ulong_t)&zte_zm8620_x_blacklist }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) }, From abb5e1983c9088f1d889b16338bc0a05fa936eea Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Aug 2015 13:11:47 +1000 Subject: [PATCH 1074/2091] md/raid0: update queue parameter in a safer location. commit 199dc6ed5179251fa6158a461499c24bdd99c836 upstream. When a (e.g.) RAID5 array is reshaped to RAID0, the updating of queue parameters (e.g. max number of sectors per bio) is done in the wrong place. It should be part of ->run, but it is actually part of ->takeover. This means it happens before level_store() calls: blk_set_stacking_limits(&mddev->queue->limits); and so it ineffective. This can lead to errors from underlying devices. So move all the relevant settings out of create_stripe_zones() and into raid0_run(). As this can lead to a bug-on it is suitable for any -stable kernel which supports reshape to RAID0. So 2.6.35 or later. As the bug has been present for five years there is no urgency, so no need to rush into -stable. Fixes: 9af204cf720c ("md: Add support for Raid5->Raid0 and Raid10->Raid0 takeover") Reported-by: Yi Zhang Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid0.c | 75 ++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index efb654eb53992..4a13c3cb940b8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -83,7 +83,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) char b[BDEVNAME_SIZE]; char b2[BDEVNAME_SIZE]; struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - bool discard_supported = false; + unsigned short blksize = 512; if (!conf) return -ENOMEM; @@ -98,6 +98,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) sector_div(sectors, mddev->chunk_sectors); rdev1->sectors = sectors * mddev->chunk_sectors; + blksize = max(blksize, queue_logical_block_size( + rdev1->bdev->bd_disk->queue)); + rdev_for_each(rdev2, mddev) { pr_debug("md/raid0:%s: comparing %s(%llu)" " with %s(%llu)\n", @@ -134,6 +137,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } pr_debug("md/raid0:%s: FINAL %d zones\n", mdname(mddev), conf->nr_strip_zones); + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if ((mddev->chunk_sectors << 9) % blksize) { + printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n", + mdname(mddev), + mddev->chunk_sectors << 9, blksize); + err = -EINVAL; + goto abort; + } + err = -ENOMEM; conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); @@ -188,19 +203,12 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } dev[j] = rdev1; - if (mddev->queue) - disk_stack_limits(mddev->gendisk, rdev1->bdev, - rdev1->data_offset << 9); - if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) conf->has_merge_bvec = 1; if (!smallest || (rdev1->sectors < smallest->sectors)) smallest = rdev1; cnt++; - - if (blk_queue_discard(bdev_get_queue(rdev1->bdev))) - discard_supported = true; } if (cnt != mddev->raid_disks) { printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - " @@ -261,28 +269,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) (unsigned long long)smallest->sectors); } - /* - * now since we have the hard sector sizes, we can make sure - * chunk size is a multiple of that sector size - */ - if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { - printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n", - mdname(mddev), - mddev->chunk_sectors << 9); - goto abort; - } - - if (mddev->queue) { - blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); - blk_queue_io_opt(mddev->queue, - (mddev->chunk_sectors << 9) * mddev->raid_disks); - - if (!discard_supported) - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - else - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); - } - pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -433,12 +419,6 @@ static int raid0_run(struct mddev *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; - if (mddev->queue) { - blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); - } - /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { ret = create_strip_zones(mddev, &conf); @@ -447,6 +427,29 @@ static int raid0_run(struct mddev *mddev) mddev->private = conf; } conf = mddev->private; + if (mddev->queue) { + struct md_rdev *rdev; + bool discard_supported = false; + + rdev_for_each(rdev, mddev) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; + } + blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); + + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + blk_queue_io_opt(mddev->queue, + (mddev->chunk_sectors << 9) * mddev->raid_disks); + + if (!discard_supported) + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + else + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + } /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); From 6298be5ee4f9445a0d5003adc32a2d0967e336b8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Sep 2015 15:47:47 +1000 Subject: [PATCH 1075/2091] md/raid0: apply base queue limits *before* disk_stack_limits commit 66eefe5de11db1e0d8f2edc3880d50e7c36a9d43 upstream. Calling e.g. blk_queue_max_hw_sectors() after calls to disk_stack_limits() discards the settings determined by disk_stack_limits(). So we need to make those calls first. Fixes: 199dc6ed5179 ("md/raid0: update queue parameter in a safer location.") Reported-by: Jes Sorensen Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 4a13c3cb940b8..0875e5e7e09ab 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -431,12 +431,6 @@ static int raid0_run(struct mddev *mddev) struct md_rdev *rdev; bool discard_supported = false; - rdev_for_each(rdev, mddev) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; - } blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); @@ -445,6 +439,12 @@ static int raid0_run(struct mddev *mddev) blk_queue_io_opt(mddev->queue, (mddev->chunk_sectors << 9) * mddev->raid_disks); + rdev_for_each(rdev, mddev) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; + } if (!discard_supported) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else From a95d7d9f4cb0f61d7e0da862a95897d7bc777f62 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 2 Oct 2015 11:17:37 -0400 Subject: [PATCH 1076/2091] dm raid: fix round up of default region size commit 042745ee53a0a7c1f5aff191a4a24213c6dcfb52 upstream. Commit 3a0f9aaee028 ("dm raid: round region_size to power of two") intended to make sure that the default region size is a power of two. However, the logic in that commit is incorrect and sets the variable region_size to 0 or 1, depending on whether min_region_size is a power of two. Fix this logic, using roundup_pow_of_two(), so that region_size is properly rounded up to the next power of two. Signed-off-by: Mikulas Patocka Fixes: 3a0f9aaee028 ("dm raid: round region_size to power of two") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 88e4c7f249864..2c1f2e13719e7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -327,8 +327,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) */ if (min_region_size > (1 << 13)) { /* If not a power of 2, make it the next power of 2 */ - if (min_region_size & (min_region_size - 1)) - region_size = 1 << fls(region_size); + region_size = roundup_pow_of_two(min_region_size); DMINFO("Choosing default region size of %lu sectors", region_size); } else { From 16e6df022b41901165f67c30f807074fd6c4507b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 28 Aug 2015 21:01:43 +0200 Subject: [PATCH 1077/2091] netfilter: nfnetlink: work around wrong endianess in res_id field commit a9de9777d613500b089a7416f936bf3ae5f070d2 upstream. The convention in nfnetlink is to use network byte order in every header field as well as in the attribute payload. The initial version of the batching infrastructure assumes that res_id comes in host byte order though. The only client of the batching infrastructure is nf_tables, so let's add a workaround to address this inconsistency. We currently have 11 nfnetlink subsystems according to NFNL_SUBSYS_COUNT, so we can assume that the subsystem 2560, ie. htons(10), will not be allocated anytime soon, so it can be an alias of nf_tables from the nfnetlink batching path when interpreting the res_id field. Based on original patch from Florian Westphal. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c90ecd76..69e3ceffa14dd 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -432,6 +432,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); + u_int16_t res_id; int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || @@ -456,7 +457,12 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfgenmsg = nlmsg_data(nlh); skb_pull(skb, msglen); - nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + /* Work around old nft using host byte order */ + if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES) + res_id = NFNL_SUBSYS_NFTABLES; + else + res_id = ntohs(nfgenmsg->res_id); + nfnetlink_rcv_batch(skb, nlh, res_id); } else { netlink_rcv_skb(skb, &nfnetlink_rcv_msg); } From 5e51f797611ef276603e7070fc4c39b5a43ddb21 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 12 Aug 2015 17:41:00 +0200 Subject: [PATCH 1078/2091] netfilter: nf_tables: Use 32 bit addressing register from nft_type_to_reg() commit bf798657eb5ba57552096843c315f096fdf9b715 upstream. nft_type_to_reg() needs to return the register in the new 32 bit addressing, otherwise we hit EINVAL when using mappings. Fixes: 49499c3 ("netfilter: nf_tables: switch registers to 32 bit addressing") Reported-by: Andreas Schultz Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e6bcf55dcf200..fd0ca42b1d63d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -125,7 +125,7 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) { - return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } unsigned int nft_parse_register(const struct nlattr *attr); From 99fecec5708258d5b0bcf8359581d3cfa0f34287 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 21 Jul 2015 21:37:31 -0700 Subject: [PATCH 1079/2091] netfilter: nf_conntrack: Support expectations in different zones commit 4b31814d20cbe5cd4ccf18089751e77a04afe4f2 upstream. When zones were originally introduced, the expectation functions were all extended to perform lookup using the zone. However, insertion was not modified to check the zone. This means that two expectations which are intended to apply for different connections that have the same tuple but exist in different zones cannot both be tracked. Fixes: 5d0aa2ccd4 (netfilter: nf_conntrack: add support for "conntrack zones") Signed-off-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_expect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 7a17070c5dabb..b45a4223cb058 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, From 5eb491ba5d0669b3435965f08f41132f9ee4274c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 9 Jul 2015 22:56:00 +0200 Subject: [PATCH 1080/2091] netfilter: ctnetlink: put back references to master ct and expect objects commit 95dd8653de658143770cb0e55a58d2aab97c79d2 upstream. We have to put back the references to the master conntrack and the expectation that we just created, otherwise we'll leak them. Fixes: 0ef71ee1a5b9 ("netfilter: ctnetlink: refactor ctnetlink_create_expect") Reported-by: Tim Wiess Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_netlink.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1c23940a86ad..6b8b0abbfab48 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone, } err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) - goto err_exp; - - return 0; -err_exp: nf_ct_expect_put(exp); err_ct: nf_ct_put(ct); From 1ad248d2d57fdc457283072bde67931d5b564eb7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 14:03:39 -0500 Subject: [PATCH 1081/2091] netfilter: nf_qeueue: Drop queue entries on nf_unregister_hook commit 8405a8fff3f8545c888a872d6e3c0c8eecd4d348 upstream. Add code to nf_unregister_hook to flush the nf_queue when a hook is unregistered. This guarantees that the pointer that the nf_queue code retains into the nf_hook list will remain valid while a packet is queued. I tested what would happen if we do not flush queued packets and was trivially able to obtain the oops below. All that was required was to stop the nf_queue listening process, to delete all of the nf_tables, and to awaken the nf_queue listening process. > BUG: unable to handle kernel paging request at 0000000100000001 > IP: [<0000000100000001>] 0x100000001 > PGD b9c35067 PUD 0 > Oops: 0010 [#1] SMP > Modules linked in: > CPU: 0 PID: 519 Comm: lt-nfqnl_test Not tainted > task: ffff8800b9c8c050 ti: ffff8800ba9d8000 task.ti: ffff8800ba9d8000 > RIP: 0010:[<0000000100000001>] [<0000000100000001>] 0x100000001 > RSP: 0018:ffff8800ba9dba40 EFLAGS: 00010a16 > RAX: ffff8800bab48a00 RBX: ffff8800ba9dba90 RCX: ffff8800ba9dba90 > RDX: ffff8800b9c10128 RSI: ffff8800ba940900 RDI: ffff8800bab48a00 > RBP: ffff8800b9c10128 R08: ffffffff82976660 R09: ffff8800ba9dbb28 > R10: dead000000100100 R11: dead000000200200 R12: ffff8800ba940900 > R13: ffffffff8313fd50 R14: ffff8800b9c95200 R15: 0000000000000000 > FS: 00007fb91fc34700(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000100000001 CR3: 00000000babfb000 CR4: 00000000000007f0 > Stack: > ffffffff8206ab0f ffffffff82982240 ffff8800bab48a00 ffff8800b9c100a8 > ffff8800b9c10100 0000000000000001 ffff8800ba940900 ffff8800b9c10128 > ffffffff8206bd65 ffff8800bfb0d5e0 ffff8800bab48a00 0000000000014dc0 > Call Trace: > [] ? nf_iterate+0x4f/0xa0 > [] ? nf_reinject+0x125/0x190 > [] ? nfqnl_recv_verdict+0x255/0x360 > [] ? nla_parse+0x80/0xf0 > [] ? nfnetlink_rcv_msg+0x13c/0x240 > [] ? __memcg_kmem_get_cache+0x4c/0x150 > [] ? nfnl_lock+0x20/0x20 > [] ? netlink_rcv_skb+0xa9/0xc0 > [] ? netlink_unicast+0x12f/0x1c0 > [] ? netlink_sendmsg+0x28e/0x650 > [] ? sock_sendmsg+0x44/0x50 > [] ? ___sys_sendmsg+0x2ab/0x2c0 > [] ? __wake_up+0x43/0x70 > [] ? tty_write+0x1c4/0x2a0 > [] ? __sys_sendmsg+0x44/0x80 > [] ? system_call_fastpath+0x12/0x6a > Code: Bad RIP value. > RIP [<0000000100000001>] 0x100000001 > RSP > CR2: 0000000100000001 > ---[ end trace 08eb65d42362793f ]--- Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_queue.h | 2 ++ net/netfilter/core.c | 1 + net/netfilter/nf_internals.h | 1 + net/netfilter/nf_queue.c | 17 +++++++++++++++++ net/netfilter/nfnetlink_queue_core.c | 24 +++++++++++++++++++++++- 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d81d584157e11..e8635854a55bd 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,6 +24,8 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(const struct nf_queue_handler *qh); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e6163017c42db..5d0c6fd59475a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -89,6 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); + nf_queue_nf_hook_drop(reg); } EXPORT_SYMBOL(nf_unregister_hook); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index ea7f36784b3da..399210693c2a8 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -19,6 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum); +void nf_queue_nf_hook_drop(struct nf_hook_ops *ops); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 2e88032cd5ad2..cd60d397fe056 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -105,6 +105,23 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); +void nf_queue_nf_hook_drop(struct nf_hook_ops *ops) +{ + const struct nf_queue_handler *qh; + struct net *net; + + rtnl_lock(); + rcu_read_lock(); + qh = rcu_dereference(queue_handler); + if (qh) { + for_each_net(net) { + qh->nf_hook_drop(net, ops); + } + } + rcu_read_unlock(); + rtnl_unlock(); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 11c7682fa0ea1..32d0437abdd8a 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -824,6 +824,27 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +{ + return entry->elem == (struct nf_hook_ops *)ops_ptr; +} + +static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +{ + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + int i; + + rcu_read_lock(); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct nfqnl_instance *inst; + struct hlist_head *head = &q->instance_table[i]; + + hlist_for_each_entry_rcu(inst, head, hlist) + nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook); + } + rcu_read_unlock(); +} + static int nfqnl_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -1031,7 +1052,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { }; static const struct nf_queue_handler nfqh = { - .outfn = &nfqnl_enqueue_packet, + .outfn = &nfqnl_enqueue_packet, + .nf_hook_drop = &nfqnl_nf_hook_drop, }; static int From a251cb2078af1e3be62847547746bf13f75c6d90 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 10:41:21 -0500 Subject: [PATCH 1082/2091] netfilter: nftables: Do not run chains in the wrong network namespace commit fdab6a4cbd8933092155449ca7253eba973ada14 upstream. Currenlty nf_tables chains added in one network namespace are being run in all network namespace. The issues are myriad with the simplest being an unprivileged user can cause any network packets to be dropped. Address this by simply not running nf_tables chains in the wrong network namespace. Signed-off-by: "Eric W. Biederman" Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f153b07073afb..f77bad46ac683 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -114,7 +114,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; - const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); + const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet); + const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; @@ -124,6 +125,10 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) int rulenum; unsigned int gencursor = nft_genmask_cur(net); + /* Ignore chains that are not for the current network namespace */ + if (!net_eq(net, chain_net)) + return NF_ACCEPT; + do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); From cc2cc007bc8307f0f8f6e6686af2835d8fd0eecd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 17 Sep 2015 13:37:00 +0200 Subject: [PATCH 1083/2091] netfilter: nf_log: wait for rcu grace after logger unregistration commit ad5001cc7cdf9aaee5eb213fdee657e4a3c94776 upstream. The nf_log_unregister() function needs to call synchronize_rcu() to make sure that the objects are not dereferenced anymore on module removal. Fixes: 5962815a6a56 ("netfilter: nf_log: use an array of loggers instead of list") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 675d12c69e325..3fa4f47c34c81 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -113,6 +113,7 @@ void nf_log_unregister(struct nf_logger *logger) for (i = 0; i < NFPROTO_NUMPROTO; i++) RCU_INIT_POINTER(loggers[i][logger->type], NULL); mutex_unlock(&nf_log_mutex); + synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unregister); From 146560ee780bd92bda069ec7bf685ea4369ed4a3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Sep 2015 18:04:09 +0200 Subject: [PATCH 1084/2091] netfilter: nft_compat: skip family comparison in case of NFPROTO_UNSPEC commit ba378ca9c04a5fc1b2cf0f0274a9d02eb3d1bad9 upstream. Fix lookup of existing match/target structures in the corresponding list by skipping the family check if NFPROTO_UNSPEC is used. This is resulting in the allocation and insertion of one match/target structure for each use of them. So this not only bloats memory consumption but also severely affects the time to reload the ruleset from the iptables-compat utility. After this patch, iptables-compat-restore and iptables-compat take almost the same time to reload large rulesets. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_compat.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7f29cfc76349f..4d05c7bf5a033 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -617,6 +617,13 @@ struct nft_xt { static struct nft_expr_type nft_match_type; +static bool nft_match_cmp(const struct xt_match *match, + const char *name, u32 rev, u32 family) +{ + return strcmp(match->name, name) == 0 && match->revision == rev && + (match->family == NFPROTO_UNSPEC || match->family == family); +} + static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -624,7 +631,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct nft_xt *nft_match; struct xt_match *match; char *mt_name; - __u32 rev, family; + u32 rev, family; if (tb[NFTA_MATCH_NAME] == NULL || tb[NFTA_MATCH_REV] == NULL || @@ -639,8 +646,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) { + if (nft_match_cmp(match, mt_name, rev, family)) { if (!try_module_get(match->me)) return ERR_PTR(-ENOENT); @@ -691,6 +697,13 @@ static LIST_HEAD(nft_target_list); static struct nft_expr_type nft_target_type; +static bool nft_target_cmp(const struct xt_target *tg, + const char *name, u32 rev, u32 family) +{ + return strcmp(tg->name, name) == 0 && tg->revision == rev && + (tg->family == NFPROTO_UNSPEC || tg->family == family); +} + static const struct nft_expr_ops * nft_target_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -698,7 +711,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct nft_xt *nft_target; struct xt_target *target; char *tg_name; - __u32 rev, family; + u32 rev, family; if (tb[NFTA_TARGET_NAME] == NULL || tb[NFTA_TARGET_REV] == NULL || @@ -713,8 +726,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) { + if (nft_target_cmp(target, tg_name, rev, family)) { if (!try_module_get(target->me)) return ERR_PTR(-ENOENT); From a365025b053c37cfe27d16a4cf32e971cde3b41d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Sep 2015 02:57:21 +0200 Subject: [PATCH 1085/2091] netfilter: nf_log: don't zap all loggers on unregister commit 205ee117d4dc4a11ac3bd9638bb9b2e839f4de9a upstream. like nf_log_unset, nf_log_unregister must not reset the list of loggers. Otherwise, a call to nf_log_unregister() will render loggers of other nf protocols unusable: iptables -A INPUT -j LOG modprobe nf_log_arp ; rmmod nf_log_arp iptables -A INPUT -j LOG iptables: No chain/target/match by that name Fixes: 30e0c6a6be ("netfilter: nf_log: prepare net namespace support for loggers") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_log.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3fa4f47c34c81..a5d41dfa9f05d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -107,11 +107,15 @@ EXPORT_SYMBOL(nf_log_register); void nf_log_unregister(struct nf_logger *logger) { + const struct nf_logger *log; int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < NFPROTO_NUMPROTO; i++) - RCU_INIT_POINTER(loggers[i][logger->type], NULL); + for (i = 0; i < NFPROTO_NUMPROTO; i++) { + log = nft_log_dereference(loggers[i][logger->type]); + if (log == logger) + RCU_INIT_POINTER(loggers[i][logger->type], NULL); + } mutex_unlock(&nf_log_mutex); synchronize_rcu(); } From 25cbde3442a1fd07fc233bd881ded2f5bc8f4e26 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 4 Sep 2015 12:22:46 +0300 Subject: [PATCH 1086/2091] Bluetooth: Delay check for conn->smp in smp_conn_security() commit d8949aad3eab5d396f4fefcd581773bf07b9a79e upstream. There are several actions that smp_conn_security() might make that do not require a valid SMP context (conn->smp pointer). One of these actions is to encrypt the link with an existing LTK. If the SMP context wasn't initialized properly we should still allow the independent actions to be done, i.e. the check for the context should only be done at the last possible moment. Reported-by: Chuck Ebbert Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/smp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 7b815bcc8c9b5..69ad5091e2cef 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2294,12 +2294,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (!conn) return 1; - chan = conn->smp; - if (!chan) { - BT_ERR("SMP security requested but not available"); - return 1; - } - if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; @@ -2313,6 +2307,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) return 0; + chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } + l2cap_chan_lock(chan); /* If SMP is already in progress ignore this request */ From 953c972242a0772d938b391bc9d70b3c0591c20f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sun, 20 Sep 2015 23:03:28 +0800 Subject: [PATCH 1087/2091] NFS: Do cleanup before resetting pageio read/write to mds commit 6f29b9bba7b08c6b1d6f2cc4cf750b342fc1946c upstream. There is a reference leak of layout segment after resetting pageio read/write to mds. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/read.c | 3 +++ fs/nfs/write.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ae0ff7a11b403..01b8cc8e8cfc4 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; /* read path should never have more than one mirror */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 07115b9b1ad28..1fe9cbe6445a1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1331,6 +1331,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; nfs_pageio_stop_mirroring(pgio); From 0fe83960ecd61b82fb436728712330b61b47d53a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 11 Sep 2015 11:14:06 +0800 Subject: [PATCH 1088/2091] nfs: fix pg_test page count calculation commit 048883e0b934d9a5103d40e209cb14b7f33d2933 upstream. We really want sizeof(struct page *) instead. Otherwise we limit maximum IO size to 64 pages rather than 512 pages on a 64bit system. Fixes 2e11f829(nfs: cap request size to fit a kmalloced page array). Cc: Christoph Hellwig Signed-off-by: Peng Tao Fixes: 2e11f8296d22 ("nfs: cap request size to fit a kmalloced page array") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 069914ce76410..93d355c8b4676 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * for it without upsetting the slab allocator. */ if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * - sizeof(struct page) > PAGE_SIZE) + sizeof(struct page *) > PAGE_SIZE) return 0; return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); From 896f0858ce91e3207ea5bd59abc23fb58b2c159a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Oct 2015 18:38:27 -0400 Subject: [PATCH 1089/2091] NFS: Fix a write performance regression commit 8fa4592a14ebb3c22a21d846d1e4f65dab7d1a7c upstream. If all other conditions in nfs_can_extend_write() are met, and there are no locks, then we should be able to assume close-to-open semantics and the ability to extend our write to cover the whole page. With this patch, the xfstests generic/074 test completes in 242s instead of >1400s on my test rig. Fixes: bd61e0a9c852 ("locks: convert posix locks to file_lock_context") Cc: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1fe9cbe6445a1..d9851a6a28138 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1203,7 +1203,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino return 1; if (!flctx || (list_empty_careful(&flctx->flc_flock) && list_empty_careful(&flctx->flc_posix))) - return 0; + return 1; /* Check to see if there are whole file write locks */ ret = 0; From f1f22082dfd9398f548af6fb169dc5fbfbec1508 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 24 Sep 2015 00:52:37 -0500 Subject: [PATCH 1090/2091] Fix sec=krb5 on smb3 mounts commit ceb1b0b9b4d1089e9f2731a314689ae17784c861 upstream. Kerberos, which is very important for security, was only enabled for CIFS not SMB2/SMB3 mounts (e.g. vers=3.0) Patch based on the information detailed in http://thread.gmane.org/gmane.linux.kernel.cifs/10081/focus=10307 to enable Kerberized SMB2/SMB3 a) SMB2_negotiate: enable/use decode_negTokenInit in SMB2_negotiate b) SMB2_sess_setup: handle Kerberos sectype and replicate Kerberos SMB1 processing done in sess_auth_kerberos Signed-off-by: Noel Power Signed-off-by: Jim McDonough Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 76 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 54cbe19d9c087..894f259d3989b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -46,6 +46,7 @@ #include "smb2status.h" #include "smb2glob.h" #include "cifspdu.h" +#include "cifs_spnego.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -427,19 +428,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) cifs_dbg(FYI, "missing security blob on negprot\n"); rc = cifs_enable_signing(server, ses->sign); -#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ if (rc) goto neg_exit; - if (blob_length) + if (blob_length) { rc = decode_negTokenInit(security_blob, blob_length, server); - if (rc == 1) - rc = 0; - else if (rc == 0) { - rc = -EIO; - goto neg_exit; + if (rc == 1) + rc = 0; + else if (rc == 0) + rc = -EIO; } -#endif - neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; @@ -533,7 +530,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ struct TCP_Server_Info *server = ses->server; u16 blob_length = 0; - char *security_blob; + struct key *spnego_key = NULL; + char *security_blob = NULL; char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ @@ -561,7 +559,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, ses->ntlmssp->sesskey_per_smbsess = true; /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ - ses->sectype = RawNTLMSSP; + if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP) + ses->sectype = RawNTLMSSP; ssetup_ntlmssp_authenticate: if (phase == NtLmChallenge) @@ -590,7 +589,48 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field and 1 for pad */ iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; - if (phase == NtLmNegotiate) { + + if (ses->sectype == Kerberos) { +#ifdef CONFIG_CIFS_UPCALL + struct cifs_spnego_msg *msg; + + spnego_key = cifs_get_spnego_key(ses); + if (IS_ERR(spnego_key)) { + rc = PTR_ERR(spnego_key); + spnego_key = NULL; + goto ssetup_exit; + } + + msg = spnego_key->payload.data; + /* + * check version field to make sure that cifs.upcall is + * sending us a response in an expected form + */ + if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { + cifs_dbg(VFS, + "bad cifs.upcall version. Expected %d got %d", + CIFS_SPNEGO_UPCALL_VERSION, msg->version); + rc = -EKEYREJECTED; + goto ssetup_exit; + } + ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, + GFP_KERNEL); + if (!ses->auth_key.response) { + cifs_dbg(VFS, + "Kerberos can't allocate (%u bytes) memory", + msg->sesskey_len); + rc = -ENOMEM; + goto ssetup_exit; + } + ses->auth_key.len = msg->sesskey_len; + blob_length = msg->secblob_len; + iov[1].iov_base = msg->data + msg->sesskey_len; + iov[1].iov_len = blob_length; +#else + rc = -EOPNOTSUPP; + goto ssetup_exit; +#endif /* CONFIG_CIFS_UPCALL */ + } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */ ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE), GFP_KERNEL); if (ntlmssp_blob == NULL) { @@ -613,6 +653,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, /* with raw NTLMSSP we don't encapsulate in SPNEGO */ security_blob = ntlmssp_blob; } + iov[1].iov_base = security_blob; + iov[1].iov_len = blob_length; } else if (phase == NtLmAuthenticate) { req->hdr.SessionId = ses->Suid; ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, @@ -640,6 +682,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, } else { security_blob = ntlmssp_blob; } + iov[1].iov_base = security_blob; + iov[1].iov_len = blob_length; } else { cifs_dbg(VFS, "illegal ntlmssp phase\n"); rc = -EIO; @@ -651,8 +695,6 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */ - 4 /* rfc1001 len */); req->SecurityBufferLength = cpu_to_le16(blob_length); - iov[1].iov_base = security_blob; - iov[1].iov_len = blob_length; inc_rfc1001_len(req, blob_length - 1 /* pad */); @@ -663,6 +705,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, kfree(security_blob); rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; + ses->Suid = rsp->hdr.SessionId; if (resp_buftype != CIFS_NO_BUFFER && rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { if (phase != NtLmNegotiate) { @@ -680,7 +723,6 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, /* NTLMSSP Negotiate sent now processing challenge (response) */ phase = NtLmChallenge; /* process ntlmssp challenge */ rc = 0; /* MORE_PROCESSING is not an error here but expected */ - ses->Suid = rsp->hdr.SessionId; rc = decode_ntlmssp_challenge(rsp->Buffer, le16_to_cpu(rsp->SecurityBufferLength), ses); } @@ -737,6 +779,10 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, kfree(ses->auth_key.response); ses->auth_key.response = NULL; } + if (spnego_key) { + key_invalidate(spnego_key); + key_put(spnego_key); + } kfree(ses->ntlmssp); return rc; From 52213f9e44c7396c64fcb32b48b62e34871f2763 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 22 Sep 2015 09:29:38 -0500 Subject: [PATCH 1091/2091] disabling oplocks/leases via module parm enable_oplocks broken for SMB3 commit e0ddde9d44e37fbc21ce893553094ecf1a633ab5 upstream. leases (oplocks) were always requested for SMB2/SMB3 even when oplocks disabled in the cifs.ko module. Signed-off-by: Steve French Reviewed-by: Chandrika Srinivasan Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 54daee5ad4c10..1678b9cb94c7e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -50,9 +50,13 @@ change_conf(struct TCP_Server_Info *server) break; default: server->echoes = true; - server->oplocks = true; + if (enable_oplocks) { + server->oplocks = true; + server->oplock_credits = 1; + } else + server->oplocks = false; + server->echo_credits = 1; - server->oplock_credits = 1; } server->credits -= server->echo_credits + server->oplock_credits; return 0; From 5ceb4f5dd1059664aa4093e88e64edd3955f3d21 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 28 Sep 2015 17:21:07 -0500 Subject: [PATCH 1092/2091] Do not fall back to SMBWriteX in set_file_size error cases commit 646200a041203f440fb6fcf9cacd9efeda9de74c upstream. The error paths in set_file_size for cifs and smb3 are incorrect. In the unlikely event that a server did not support set file info of the file size, the code incorrectly falls back to trying SMBWriteX (note that only the original core SMB Write, used for example by DOS, can set the file size this way - this actually does not work for the more recent SMBWriteX). The idea was since the old DOS SMB Write could set the file size if you write zero bytes at that offset then use that if server rejects the normal set file info call. Fortunately the SMBWriteX will never be sent on the wire (except when file size is zero) since the length and offset fields were reversed in the two places in this function that call SMBWriteX causing the fall back path to return an error. It is also important to never call an SMB request from an SMB2/sMB3 session (which theoretically would be possible, and can cause a brief session drop, although the client recovers) so this should be fixed. In practice this path does not happen with modern servers but the error fall back to SMBWriteX is clearly wrong. Removing the calls to SMBWriteX in the error paths in cifs_set_file_size Pointed out by PaX/grsecurity team Signed-off-by: Steve French Reported-by: PaX Team CC: Emese Revfy CC: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f621b44cb8009..6b66dd5d15408 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2034,7 +2034,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, struct tcon_link *tlink = NULL; struct cifs_tcon *tcon = NULL; struct TCP_Server_Info *server; - struct cifs_io_parms io_parms; /* * To avoid spurious oplock breaks from server, in the case of @@ -2056,18 +2055,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, rc = -ENOSYS; cifsFileInfo_put(open_file); cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - unsigned int bytes_written; - - io_parms.netfid = open_file->fid.netfid; - io_parms.pid = open_file->pid; - io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = attrs->ia_size; - rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, - NULL, NULL, 1); - cifs_dbg(FYI, "Wrt seteof rc %d\n", rc); - } } else rc = -EINVAL; @@ -2093,28 +2080,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, else rc = -ENOSYS; cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - __u16 netfid; - int oplock = 0; - rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN, - GENERIC_WRITE, CREATE_NOT_DIR, &netfid, - &oplock, NULL, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (rc == 0) { - unsigned int bytes_written; - - io_parms.netfid = netfid; - io_parms.pid = current->tgid; - io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = attrs->ia_size; - rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, - NULL, 1); - cifs_dbg(FYI, "wrt seteof rc %d\n", rc); - CIFSSMBClose(xid, tcon, netfid); - } - } if (tlink) cifs_put_tlink(tlink); From 827630e9731d77aeac08a8569f18685278f07ac4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 14 Sep 2015 10:28:34 +1000 Subject: [PATCH 1093/2091] drm/qxl: only report first monitor as connected if we have no state commit 69e5d3f893e19613486f300fd6e631810338aa4b upstream. If the server isn't new enough to give us state, report the first monitor as always connected, otherwise believe the server side. Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 32248791bc4b0..42b3dbbe8004e 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -886,13 +886,15 @@ static enum drm_connector_status qxl_conn_detect( drm_connector_to_qxl_output(connector); struct drm_device *ddev = connector->dev; struct qxl_device *qdev = ddev->dev_private; - int connected; + bool connected = false; /* The first monitor is always connected */ - connected = (output->index == 0) || - (qdev->client_monitors_config && - qdev->client_monitors_config->count > output->index && - qxl_head_enabled(&qdev->client_monitors_config->heads[output->index])); + if (!qdev->client_monitors_config) { + if (output->index == 0) + connected = true; + } else + connected = qdev->client_monitors_config->count > output->index && + qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]); DRM_DEBUG("#%d connected: %d\n", output->index, connected); if (!connected) From 83777bf17d3bc20ef777e18aa3391eb3f5b11043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Sep 2015 15:18:34 +0200 Subject: [PATCH 1094/2091] drm/qxl: recreate the primary surface when the bo is not primary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8d0d94015e96b8853c4f7f06eac3f269e1b3d866 upstream. When disabling/enabling a crtc the primary area must be updated independently of which crtc has been disabled/enabled. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1264735 Signed-off-by: Fabiano Fidêncio Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 42b3dbbe8004e..52921a8712303 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -618,7 +618,7 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, adjusted_mode->hdisplay, adjusted_mode->vdisplay); - if (qcrtc->index == 0) + if (bo->is_primary == false) recreate_primary = true; if (bo->surf.stride * bo->surf.height > qdev->vram_size) { From eea5a642208204cc2999d1f03f5db7177c40159a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 17 Sep 2015 16:42:07 +0300 Subject: [PATCH 1095/2091] drm/i915/bios: handle MIPI Sequence Block v3+ gracefully commit cd67d226ebd909d239d2c6e5a6abd6e2a338d1cd upstream. The VBT MIPI Sequence Block version 3 has forward incompatible changes: First, the block size in the header has been specified reserved, and the actual size is a separate 32-bit value within the block. The current find_section() function to will only look at the size in the block header, and, depending on what's in that now reserved size field, continue looking for other sections in the wrong place. Fix this by taking the new block size field into account. This will ensure that the lookups for other sections will work properly, as long as the new 32-bit size does not go beyond the opregion VBT mailbox size. Second, the contents of the block have been completely changed. Gracefully refuse parsing the yet unknown data version. Cc: Deepak M Reviewed-by: Deepak M Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_bios.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c684085cb56ac..fadf9865709e5 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -41,7 +41,7 @@ find_section(struct bdb_header *bdb, int section_id) { u8 *base = (u8 *)bdb; int index = 0; - u16 total, current_size; + u32 total, current_size; u8 current_id; /* skip to first section */ @@ -56,6 +56,10 @@ find_section(struct bdb_header *bdb, int section_id) current_size = *((u16 *)(base + index)); index += 2; + /* The MIPI Sequence Block v3+ has a separate size field. */ + if (current_id == BDB_MIPI_SEQUENCE && *(base + index) >= 3) + current_size = *((const u32 *)(base + index + 1)); + if (index + current_size > total) return NULL; @@ -845,6 +849,12 @@ parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb) return; } + /* Fail gracefully for forward incompatible sequence block. */ + if (sequence->version >= 3) { + DRM_ERROR("Unable to parse MIPI Sequence Block v3+\n"); + return; + } + DRM_DEBUG_DRIVER("Found MIPI sequence block\n"); block_size = get_blocksize(sequence); From 64822f988a306843157002fa8536ae64df73a3d0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 23 Jun 2015 11:34:21 +0200 Subject: [PATCH 1096/2091] drm: Reject DRI1 hw lock ioctl functions for kms drivers commit da168d81b44898404d281d5dbe70154ab5f117c1 upstream. I've done some extensive history digging across libdrm, mesa and xf86-video-{intel,nouveau,ati}. The only potential user of this with kms drivers I could find was ttmtest, which once used drmGetLock still. But that mistake was quickly fixed up. Even the intel xvmc library (which otherwise was really good with using dri1 stuff in kms mode) managed to never take the hw lock for dri2 (and hence kms). Hence it should be save to unconditionally disallow this. Cc: Peter Antoine Reviewed-by: Peter Antoine Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_lock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index f861361a635e0..4924d381b6642 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -61,6 +61,9 @@ int drm_legacy_lock(struct drm_device *dev, void *data, struct drm_master *master = file_priv->master; int ret = 0; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + ++file_priv->lock_count; if (lock->context == DRM_KERNEL_CONTEXT) { @@ -153,6 +156,9 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_ struct drm_lock *lock = data; struct drm_master *master = file_priv->master; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", task_pid_nr(current), lock->context); From 27b9cf2edc7aac66c229c847411443cecc338720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 28 Sep 2015 18:16:31 +0900 Subject: [PATCH 1097/2091] drm/radeon: Restore LCD backlight level on resume (>= R5xx) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4281f46ef839050d2ef60348f661eb463c21cc2e upstream. Instead of only enabling the backlight (which seems to set it to max brightness), just re-set the current backlight level, which also takes care of enabling the backlight if necessary. Only the radeon_atom_encoder_dpms_dig part tested on a Kaveri laptop, the radeon_atom_encoder_dpms_avivo part is only compile tested. Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index dd39f434b4a7e..b4ff4c134fbb4 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1624,8 +1624,9 @@ radeon_atom_encoder_dpms_avivo(struct drm_encoder *encoder, int mode) } else atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - args.ucAction = ATOM_LCD_BLON; - atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); } break; case DRM_MODE_DPMS_STANDBY: @@ -1706,8 +1707,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0); } if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - atombios_dig_transmitter_setup(encoder, - ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); if (ext_encoder) atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); break; From ba8a7feeb68433c76fa35e4c8277e3af5460ead8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 30 Sep 2015 10:39:42 +1000 Subject: [PATCH 1098/2091] drm/dp/mst: drop cancel work sync in the mstb destroy path (v2) commit 274d83524895fe41ca8debae4eec60ede7252bb5 upstream. Since 9eb1e57f564d4e6e10991402726cc83fe0b9172f drm/dp/mst: make sure mst_primary mstb is valid in work function we validate the mstb structs in the work function, and doing that takes a reference. So we should never get here with the work function running using the mstb device, only if the work function hasn't run yet or is running for another mstb. So we don't need to sync the work here, this was causing lockdep spew as below. [ +0.000160] ============================================= [ +0.000001] [ INFO: possible recursive locking detected ] [ +0.000002] 3.10.0-320.el7.rhel72.stable.backport.3.x86_64.debug #1 Tainted: G W ------------ [ +0.000001] --------------------------------------------- [ +0.000001] kworker/4:2/1262 is trying to acquire lock: [ +0.000001] ((&mgr->work)){+.+.+.}, at: [] flush_work+0x5/0x2e0 [ +0.000007] but task is already holding lock: [ +0.000001] ((&mgr->work)){+.+.+.}, at: [] process_one_work+0x1b4/0x710 [ +0.000004] other info that might help us debug this: [ +0.000001] Possible unsafe locking scenario: [ +0.000002] CPU0 [ +0.000000] ---- [ +0.000001] lock((&mgr->work)); [ +0.000002] lock((&mgr->work)); [ +0.000001] *** DEADLOCK *** [ +0.000001] May be due to missing lock nesting notation [ +0.000002] 2 locks held by kworker/4:2/1262: [ +0.000001] #0: (events_long){.+.+.+}, at: [] process_one_work+0x1b4/0x710 [ +0.000004] #1: ((&mgr->work)){+.+.+.}, at: [] process_one_work+0x1b4/0x710 [ +0.000003] stack backtrace: [ +0.000003] CPU: 4 PID: 1262 Comm: kworker/4:2 Tainted: G W ------------ 3.10.0-320.el7.rhel72.stable.backport.3.x86_64.debug #1 [ +0.000001] Hardware name: LENOVO 20EGS0R600/20EGS0R600, BIOS GNET71WW (2.19 ) 02/05/2015 [ +0.000008] Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper] [ +0.000001] ffffffff82c26c90 00000000a527b914 ffff88046399bae8 ffffffff816fe04d [ +0.000004] ffff88046399bb58 ffffffff8110f47f ffff880461438000 0001009b840fc003 [ +0.000002] ffff880461438a98 0000000000000000 0000000804dc26e1 ffffffff824a2c00 [ +0.000003] Call Trace: [ +0.000004] [] dump_stack+0x19/0x1b [ +0.000004] [] __lock_acquire+0x115f/0x1250 [ +0.000002] [] lock_acquire+0x99/0x1e0 [ +0.000002] [] ? flush_work+0x5/0x2e0 [ +0.000002] [] flush_work+0x4e/0x2e0 [ +0.000002] [] ? flush_work+0x5/0x2e0 [ +0.000004] [] ? native_sched_clock+0x35/0x80 [ +0.000002] [] ? sched_clock+0x9/0x10 [ +0.000002] [] ? local_clock+0x25/0x30 [ +0.000002] [] ? mark_held_locks+0xb9/0x140 [ +0.000003] [] ? __cancel_work_timer+0x95/0x160 [ +0.000002] [] __cancel_work_timer+0xa8/0x160 [ +0.000002] [] cancel_work_sync+0x10/0x20 [ +0.000007] [] drm_dp_destroy_mst_branch_device+0x27/0x120 [drm_kms_helper] [ +0.000006] [] drm_dp_mst_link_probe_work+0x78/0xa0 [drm_kms_helper] [ +0.000002] [] process_one_work+0x220/0x710 [ +0.000002] [] ? process_one_work+0x1b4/0x710 [ +0.000005] [] worker_thread+0x11b/0x3a0 [ +0.000003] [] ? process_one_work+0x710/0x710 [ +0.000002] [] kthread+0xed/0x100 [ +0.000003] [] ? insert_kthread_work+0x80/0x80 [ +0.000003] [] ret_from_fork+0x58/0x90 v2: add flush_work. Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index b0487c9f018cf..7f467fdc9107a 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -804,8 +804,6 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref) struct drm_dp_mst_port *port, *tmp; bool wake_tx = false; - cancel_work_sync(&mstb->mgr->work); - /* * destroy all ports - don't need lock * as there are no more references to the mst branch @@ -1977,6 +1975,8 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr) drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, DP_MST_EN | DP_UPSTREAM_IS_SRC); mutex_unlock(&mgr->lock); + flush_work(&mgr->work); + flush_work(&mgr->destroy_connector_work); } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend); @@ -2730,6 +2730,7 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init); */ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr) { + flush_work(&mgr->work); flush_work(&mgr->destroy_connector_work); mutex_lock(&mgr->payload_lock); kfree(mgr->payloads); From 44f73be485f66dfeca7c6a5e334a7a11b97a4151 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Sep 2015 11:41:42 -0700 Subject: [PATCH 1099/2091] USB: whiteheat: fix potential null-deref at probe commit cbb4be652d374f64661137756b8f357a1827d6a4 upstream. Fix potential null-pointer dereference at probe by making sure that the required endpoints are present. The whiteheat driver assumes there are at least five pairs of bulk endpoints, of which the final pair is used for the "command port". An attempt to bind to an interface with fewer bulk endpoints would currently lead to an oops. Fixes CVE-2015-5257. Reported-by: Moein Ghasemzadeh Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/whiteheat.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 6c3734d2b45a7..d3ea90bef84d9 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -80,6 +80,8 @@ static int whiteheat_firmware_download(struct usb_serial *serial, static int whiteheat_firmware_attach(struct usb_serial *serial); /* function prototypes for the Connect Tech WhiteHEAT serial converter */ +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id); static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_release(struct usb_serial *serial); static int whiteheat_port_probe(struct usb_serial_port *port); @@ -116,6 +118,7 @@ static struct usb_serial_driver whiteheat_device = { .description = "Connect Tech - WhiteHEAT", .id_table = id_table_std, .num_ports = 4, + .probe = whiteheat_probe, .attach = whiteheat_attach, .release = whiteheat_release, .port_probe = whiteheat_port_probe, @@ -217,6 +220,34 @@ static int whiteheat_firmware_attach(struct usb_serial *serial) /***************************************************************************** * Connect Tech's White Heat serial driver functions *****************************************************************************/ + +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id) +{ + struct usb_host_interface *iface_desc; + struct usb_endpoint_descriptor *endpoint; + size_t num_bulk_in = 0; + size_t num_bulk_out = 0; + size_t min_num_bulk; + unsigned int i; + + iface_desc = serial->interface->cur_altsetting; + + for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { + endpoint = &iface_desc->endpoint[i].desc; + if (usb_endpoint_is_bulk_in(endpoint)) + ++num_bulk_in; + if (usb_endpoint_is_bulk_out(endpoint)) + ++num_bulk_out; + } + + min_num_bulk = COMMAND_PORT + 1; + if (num_bulk_in < min_num_bulk || num_bulk_out < min_num_bulk) + return -ENODEV; + + return 0; +} + static int whiteheat_attach(struct usb_serial *serial) { struct usb_serial_port *command_port; From eb8e51323be935f52aec7c164deb1c1d86f8f706 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 21 Sep 2015 17:46:10 +0300 Subject: [PATCH 1100/2091] xhci: give command abortion one more chance before killing xhci commit a6809ffd1687b3a8c192960e69add559b9d32649 upstream. We want to give the command abortion an additional try to stop the command ring before we completely hose xhci. Tested-by: Vincent Pelletier Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b3a0a2275f5a4..aae8126e486d3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -302,6 +302,15 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) ret = xhci_handshake(&xhci->op_regs->cmd_ring, CMD_RING_RUNNING, 0, 5 * 1000 * 1000); if (ret < 0) { + /* we are about to kill xhci, give it one more chance */ + xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, + &xhci->op_regs->cmd_ring); + udelay(1000); + ret = xhci_handshake(&xhci->op_regs->cmd_ring, + CMD_RING_RUNNING, 0, 3 * 1000 * 1000); + if (ret == 0) + return 0; + xhci_err(xhci, "Stopped the command ring failed, " "maybe the host is dead\n"); xhci->xhc_state |= XHCI_STATE_DYING; From 04563b9427dd1a3728775ad96fbd37541e612999 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 21 Sep 2015 17:46:12 +0300 Subject: [PATCH 1101/2091] usb: xhci: lock mutex on xhci_stop commit 85ac90f8953a58f6a057b727bc9db97721e3fb8e upstream. Else it races with xhci_setup_device Signed-off-by: Roger Quadros Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index c502c2277aebf..a7508c2db18d2 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -683,8 +683,11 @@ void xhci_stop(struct usb_hcd *hcd) u32 temp; struct xhci_hcd *xhci = hcd_to_xhci(hcd); + mutex_lock(&xhci->mutex); + if (!usb_hcd_is_primary_hcd(hcd)) { xhci_only_stop_hcd(xhci->shared_hcd); + mutex_unlock(&xhci->mutex); return; } @@ -723,6 +726,7 @@ void xhci_stop(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "xhci_stop completed - status = %x", readl(&xhci->op_regs->status)); + mutex_unlock(&xhci->mutex); } /* From 4881cd43051c1763cd86c526b789ad05fa1ab7a4 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 21 Sep 2015 17:46:13 +0300 Subject: [PATCH 1102/2091] usb: xhci: Clear XHCI_STATE_DYING on start commit e5bfeab0ad515b4f6df39fe716603e9dc6d3dfd0 upstream. For whatever reason if XHCI died in the previous instant then it will never recover on the next xhci_start unless we clear the DYING flag. Signed-off-by: Roger Quadros Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index a7508c2db18d2..244bcf55863f3 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -146,7 +146,8 @@ static int xhci_start(struct xhci_hcd *xhci) "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); if (!ret) - xhci->xhc_state &= ~XHCI_STATE_HALTED; + xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING); + return ret; } From adaca10d389b17d75339e1338b929da45e6254a4 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 21 Sep 2015 17:46:15 +0300 Subject: [PATCH 1103/2091] usb: xhci: exit early in xhci_setup_device() if we're halted or dying commit 448116bfa856d3c076fa7178ed96661a008a5d45 upstream. During quick plug/removal of OTG adapter during dual-role testing it can happen that xhci_alloc_device() is called for the newly detected device after the DRD library has called xhci_stop to remove the HCD. If that is the case, just fail early to prevent the following warning. [ 154.732649] hub 4-0:1.0: USB hub found [ 154.742204] hub 4-0:1.0: 1 port detected [ 154.824458] hub 3-0:1.0: state 7 ports 1 chg 0002 evt 0000 [ 154.854609] hub 4-0:1.0: state 7 ports 1 chg 0000 evt 0000 [ 154.944430] usb 3-1: new high-speed USB device number 2 using xhci-hcd [ 154.951009] xhci-hcd xhci-hcd.0.auto: xhci_setup_device [ 155.038191] xhci-hcd xhci-hcd.0.auto: remove, state 4 [ 155.043315] usb usb4: USB disconnect, device number 1 [ 155.055270] xhci-hcd xhci-hcd.0.auto: xhci_stop [ 155.060094] xhci-hcd xhci-hcd.0.auto: USB bus 4 deregistered [ 155.066576] xhci-hcd xhci-hcd.0.auto: remove, state 1 [ 155.071710] usb usb3: USB disconnect, device number 1 [ 155.077124] xhci-hcd xhci-hcd.0.auto: xhci_setup_device [ 155.082389] ------------[ cut here ]------------ [ 155.087690] WARNING: CPU: 0 PID: 72 at drivers/usb/host/xhci.c:3800 xhci_setup_device+0x410/0x484 [xhci_hcd]() [ 155.097861] Modules linked in: sd_mod usb_storage scsi_mod usb_f_ss_lb g_zero libcomposite ipv6 xhci_plat_hcd xhci_hcd usbcore dwc3 udc_core evdev ti_am335x_adc joydev kfifo_buf industrialio snd_soc_simple_cc [ 155.146734] CPU: 0 PID: 72 Comm: kworker/0:3 Tainted: G W 4.1.4-00834-gcd9380b-dirty #50 [ 155.156073] Hardware name: Generic AM43 (Flattened Device Tree) [ 155.162117] Workqueue: usb_hub_wq hub_event [usbcore] [ 155.167249] Backtrace: [ 155.169751] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 155.177390] r6:c089d4a4 r5:ffffffff r4:00000000 r3:ee46c000 [ 155.183137] [] (show_stack) from [] (dump_stack+0x84/0xd0) [ 155.190446] [] (dump_stack) from [] (warn_slowpath_common+0x80/0xbc) [ 155.198605] r7:00000009 r6:00000ed8 r5:bf27eb70 r4:00000000 [ 155.204348] [] (warn_slowpath_common) from [] (warn_slowpath_null+0x24/0x2c) [ 155.213202] r8:ee49f000 r7:ee7c0004 r6:00000000 r5:ee7c0158 r4:ee7c0000 [ 155.220051] [] (warn_slowpath_null) from [] (xhci_setup_device+0x410/0x484 [xhci_hcd]) [ 155.229816] [] (xhci_setup_device [xhci_hcd]) from [] (xhci_address_device+0x14/0x18 [xhci_hcd]) [ 155.240415] r10:ee598200 r9:00000001 r8:00000002 r7:00000001 r6:00000003 r5:00000002 [ 155.248363] r4:ee49f000 [ 155.250978] [] (xhci_address_device [xhci_hcd]) from [] (hub_port_init+0x1b8/0xa9c [usbcore]) [ 155.261403] [] (hub_port_init [usbcore]) from [] (hub_event+0x738/0x1020 [usbcore]) [ 155.270874] r10:ee598200 r9:ee7c0000 r8:ee7c0038 r7:ee518800 r6:ee49f000 r5:00000001 [ 155.278822] r4:00000000 [ 155.281426] [] (hub_event [usbcore]) from [] (process_one_work+0x128/0x340) [ 155.290196] r10:00000000 r9:00000003 r8:00000000 r7:fedfa000 r6:eeec5400 r5:ee598314 [ 155.298151] r4:ee434380 [ 155.300718] [] (process_one_work) from [] (worker_thread+0x158/0x49c) [ 155.308963] r10:ee434380 r9:00000003 r8:eeec5400 r7:00000008 r6:ee434398 r5:eeec5400 [ 155.316913] r4:eeec5414 [ 155.319482] [] (worker_thread) from [] (kthread+0xdc/0xf8) [ 155.326765] r10:00000000 r9:00000000 r8:00000000 r7:c00577a0 r6:ee434380 r5:ee4441c0 [ 155.334713] r4:00000000 r3:00000000 [ 155.338341] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 155.345626] r7:00000000 r6:00000000 r5:c005cb64 r4:ee4441c0 [ 155.356108] ---[ end trace a58d34c223b190e6 ]--- [ 155.360783] xhci-hcd xhci-hcd.0.auto: Virt dev invalid for slot_id 0x1! [ 155.574404] xhci-hcd xhci-hcd.0.auto: xhci_setup_device [ 155.579667] ------------[ cut here ]------------ Signed-off-by: Roger Quadros Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 244bcf55863f3..981c0791431e7 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3795,6 +3795,9 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); + if (xhci->xhc_state) /* dying or halted */ + goto out; + if (!udev->slot_id) { xhci_dbg_trace(xhci, trace_xhci_dbg_address, "Bad Slot ID %d", udev->slot_id); From 0dee698fa202b662e9d2f8f2a44db5b2f68f44d7 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 21 Sep 2015 17:46:16 +0300 Subject: [PATCH 1104/2091] xhci: change xhci 1.0 only restrictions to support xhci 1.1 commit dca7794539eff04b786fb6907186989e5eaaa9c2 upstream. Some changes between xhci 0.96 and xhci 1.0 specifications forced us to check the hci version in code, some of these checks were implemented as hci_version == 1.0, which will not work with new xhci 1.1 controllers. xhci 1.1 behaves similar to xhci 1.0 in these cases, so change these checks to hci_version >= 1.0 Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 6 +++--- drivers/usb/host/xhci-ring.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 9a8c936cd42c1..8497fb8f61933 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1498,10 +1498,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, * use Event Data TRBs, and we don't chain in a link TRB on short * transfers, we're basically dividing by 1. * - * xHCI 1.0 specification indicates that the Average TRB Length should - * be set to 8 for control endpoints. + * xHCI 1.0 and 1.1 specification indicates that the Average TRB Length + * should be set to 8 for control endpoints. */ - if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version == 0x100) + if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) ep_ctx->tx_info |= cpu_to_le32(AVG_TRB_LENGTH_FOR_EP(8)); else ep_ctx->tx_info |= diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index aae8126e486d3..e9dd586c4b01b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3394,8 +3394,8 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (start_cycle == 0) field |= 0x1; - /* xHCI 1.0 6.4.1.2.1: Transfer Type field */ - if (xhci->hci_version == 0x100) { + /* xHCI 1.0/1.1 6.4.1.2.1: Transfer Type field */ + if (xhci->hci_version >= 0x100) { if (urb->transfer_buffer_length > 0) { if (setup->bRequestType & USB_DIR_IN) field |= TRB_TX_TYPE(TRB_DATA_IN); From bc1a2e4e22baf5f6eeada5be042c208b1147fc1b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 21 Sep 2015 17:46:17 +0300 Subject: [PATCH 1105/2091] xhci: init command timeout timer earlier to avoid deleting it uninitialized commit cc8e4fc0c3b5e8340bc8358990515d116a3c274c upstream. Don't check if timer is running with a timer_pending() before deleting it with del_timer_sync(), this defies the whole point of the sync part and can cause a possible race. Instead we just want to make sure the timer is initialized early enough before we have a chance to delete it. Reported-by: Oliver Neukum Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 8497fb8f61933..41f841fa6c4de 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1792,8 +1792,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int size; int i, j, num_ports; - if (timer_pending(&xhci->cmd_timer)) - del_timer_sync(&xhci->cmd_timer); + del_timer_sync(&xhci->cmd_timer); /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); @@ -2321,6 +2320,10 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) INIT_LIST_HEAD(&xhci->cmd_list); + /* init command timeout timer */ + setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, + (unsigned long)xhci); + page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Supported page size register = 0x%x", page_size); @@ -2505,10 +2508,6 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) "Wrote ERST address to ir_set 0."); xhci_print_ir_set(xhci, 0); - /* init command timeout timer */ - setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, - (unsigned long)xhci); - /* * XXX: Might need to set the Interrupter Moderation Register to * something other than the default (~1ms minimum between interrupts). From 4e34c36a797dc4ca1e0b9b97f668628801dd882b Mon Sep 17 00:00:00 2001 From: Reyad Attiyat Date: Thu, 6 Aug 2015 19:23:58 +0300 Subject: [PATCH 1106/2091] usb: xhci: Add support for URB_ZERO_PACKET to bulk/sg transfers commit 4758dcd19a7d9ba9610b38fecb93f65f56f86346 upstream. This commit checks for the URB_ZERO_PACKET flag and creates an extra zero-length td if the urb transfer length is a multiple of the endpoint's max packet length. Signed-off-by: Reyad Attiyat Signed-off-by: Mathias Nyman Cc: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 66 ++++++++++++++++++++++++++++-------- drivers/usb/host/xhci.c | 5 +++ 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e9dd586c4b01b..ad975a2975ca4 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3050,9 +3050,11 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct xhci_td *td; struct scatterlist *sg; int num_sgs; - int trb_buff_len, this_sg_len, running_total; + int trb_buff_len, this_sg_len, running_total, ret; unsigned int total_packet_count; + bool zero_length_needed; bool first_trb; + int last_trb_num; u64 addr; bool more_trbs_coming; @@ -3068,13 +3070,27 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, total_packet_count = DIV_ROUND_UP(urb->transfer_buffer_length, usb_endpoint_maxp(&urb->ep->desc)); - trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id], + ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, num_trbs, urb, 0, mem_flags); - if (trb_buff_len < 0) - return trb_buff_len; + if (ret < 0) + return ret; urb_priv = urb->hcpriv; + + /* Deal with URB_ZERO_PACKET - need one more td/trb */ + zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET && + urb_priv->length == 2; + if (zero_length_needed) { + num_trbs++; + xhci_dbg(xhci, "Creating zero length td.\n"); + ret = prepare_transfer(xhci, xhci->devs[slot_id], + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + if (ret < 0) + return ret; + } + td = urb_priv->td[0]; /* @@ -3104,6 +3120,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length; first_trb = true; + last_trb_num = zero_length_needed ? 2 : 1; /* Queue the first TRB, even if it's zero-length */ do { u32 field = 0; @@ -3121,12 +3138,15 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Chain all the TRBs together; clear the chain bit in the last * TRB to indicate it's the last TRB in the chain. */ - if (num_trbs > 1) { + if (num_trbs > last_trb_num) { field |= TRB_CHAIN; - } else { - /* FIXME - add check for ZERO_PACKET flag before this */ + } else if (num_trbs == last_trb_num) { td->last_trb = ep_ring->enqueue; field |= TRB_IOC; + } else if (zero_length_needed && num_trbs == 1) { + trb_buff_len = 0; + urb_priv->td[1]->last_trb = ep_ring->enqueue; + field |= TRB_IOC; } /* Only set interrupt on short packet for IN endpoints */ @@ -3188,7 +3208,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, if (running_total + trb_buff_len > urb->transfer_buffer_length) trb_buff_len = urb->transfer_buffer_length - running_total; - } while (running_total < urb->transfer_buffer_length); + } while (num_trbs > 0); check_trb_math(urb, num_trbs, running_total); giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, @@ -3206,7 +3226,9 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, int num_trbs; struct xhci_generic_trb *start_trb; bool first_trb; + int last_trb_num; bool more_trbs_coming; + bool zero_length_needed; int start_cycle; u32 field, length_field; @@ -3237,7 +3259,6 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, num_trbs++; running_total += TRB_MAX_BUFF_SIZE; } - /* FIXME: this doesn't deal with URB_ZERO_PACKET - need one more */ ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, @@ -3246,6 +3267,20 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, return ret; urb_priv = urb->hcpriv; + + /* Deal with URB_ZERO_PACKET - need one more td/trb */ + zero_length_needed = urb->transfer_flags & URB_ZERO_PACKET && + urb_priv->length == 2; + if (zero_length_needed) { + num_trbs++; + xhci_dbg(xhci, "Creating zero length td.\n"); + ret = prepare_transfer(xhci, xhci->devs[slot_id], + ep_index, urb->stream_id, + 1, urb, 1, mem_flags); + if (ret < 0) + return ret; + } + td = urb_priv->td[0]; /* @@ -3267,7 +3302,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length; first_trb = true; - + last_trb_num = zero_length_needed ? 2 : 1; /* Queue the first TRB, even if it's zero-length */ do { u32 remainder = 0; @@ -3284,12 +3319,15 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Chain all the TRBs together; clear the chain bit in the last * TRB to indicate it's the last TRB in the chain. */ - if (num_trbs > 1) { + if (num_trbs > last_trb_num) { field |= TRB_CHAIN; - } else { - /* FIXME - add check for ZERO_PACKET flag before this */ + } else if (num_trbs == last_trb_num) { td->last_trb = ep_ring->enqueue; field |= TRB_IOC; + } else if (zero_length_needed && num_trbs == 1) { + trb_buff_len = 0; + urb_priv->td[1]->last_trb = ep_ring->enqueue; + field |= TRB_IOC; } /* Only set interrupt on short packet for IN endpoints */ @@ -3327,7 +3365,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = urb->transfer_buffer_length - running_total; if (trb_buff_len > TRB_MAX_BUFF_SIZE) trb_buff_len = TRB_MAX_BUFF_SIZE; - } while (running_total < urb->transfer_buffer_length); + } while (num_trbs > 0); check_trb_math(urb, num_trbs, running_total); giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 981c0791431e7..26f62b2b33f84 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1345,6 +1345,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) if (usb_endpoint_xfer_isoc(&urb->ep->desc)) size = urb->number_of_packets; + else if (usb_endpoint_is_bulk_out(&urb->ep->desc) && + urb->transfer_buffer_length > 0 && + urb->transfer_flags & URB_ZERO_PACKET && + !(urb->transfer_buffer_length % usb_endpoint_maxp(&urb->ep->desc))) + size = 2; else size = 1; From 7983297d99ea11152a76420d4325f5d1925e2547 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Sep 2015 12:48:40 -0400 Subject: [PATCH 1107/2091] Initialize msg/shm IPC objects before doing ipc_addid() commit b9a532277938798b53178d5a66af6e2915cb27cf upstream. As reported by Dmitry Vyukov, we really shouldn't do ipc_addid() before having initialized the IPC object state. Yes, we initialize the IPC object in a locked state, but with all the lockless RCU lookup work, that IPC object lock no longer means that the state cannot be seen. We already did this for the IPC semaphore code (see commit e8577d1f0329: "ipc/sem.c: fully initialize sem_array before making it visible") but we clearly forgot about msg and shm. Reported-by: Dmitry Vyukov Cc: Manfred Spraul Cc: Davidlohr Bueso Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 14 +++++++------- ipc/shm.c | 13 +++++++------ ipc/util.c | 8 ++++---- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 2b6fdbb9e0e9a..652540613d265 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -137,13 +137,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* ipc_addid() locks msq upon success. */ - id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); - if (id < 0) { - ipc_rcu_putref(msq, msg_rcu_free); - return id; - } - msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -153,6 +146,13 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); + /* ipc_addid() locks msq upon success. */ + id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); + if (id < 0) { + ipc_rcu_putref(msq, msg_rcu_free); + return id; + } + ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); diff --git a/ipc/shm.c b/ipc/shm.c index 6d767071c3673..499a8bd22fad1 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -550,12 +550,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); - if (id < 0) { - error = id; - goto no_id; - } - shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; @@ -564,6 +558,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; + goto no_id; + } + list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); /* diff --git a/ipc/util.c b/ipc/util.c index ff3323ef8d8b4..c917e9fd10b13 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -237,6 +237,10 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) rcu_read_lock(); spin_lock(&new->lock); + current_euid_egid(&euid, &egid); + new->cuid = new->uid = euid; + new->gid = new->cgid = egid; + id = idr_alloc(&ids->ipcs_idr, new, (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); @@ -249,10 +253,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) ids->in_use++; - current_euid_egid(&euid, &egid); - new->cuid = new->uid = euid; - new->gid = new->cgid = egid; - if (next_id < 0) { new->seq = ids->seq++; if (ids->seq > IPCID_SEQ_MAX) From c04b33c93dc0c566b02ac573528bb82ac6065666 Mon Sep 17 00:00:00 2001 From: Ben Segall Date: Mon, 6 Apr 2015 15:28:10 -0700 Subject: [PATCH 1108/2091] sched/fair: Prevent throttling in early pick_next_task_fair() commit 54d27365cae88fbcc853b391dcd561e71acb81fa upstream. The optimized task selection logic optimistically selects a new task to run without first doing a full put_prev_task(). This is so that we can avoid a put/set on the common ancestors of the old and new task. Similarly, we should only call check_cfs_rq_runtime() to throttle eligible groups if they're part of the common ancestry, otherwise it is possible to end up with no eligible task in the simple task selection. Imagine: /root /prev /next /A /B If our optimistic selection ends up throttling /next, we goto simple and our put_prev_task() ends up throttling /prev, after which we're going to bug out in set_next_entity() because there aren't any tasks left. Avoid this scenario by only throttling common ancestors. Reported-by: Mohammed Naser Reported-by: Konstantin Khlebnikov Signed-off-by: Ben Segall [ munged Changelog ] Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Roman Gushchin Cc: Thomas Gleixner Cc: pjt@google.com Fixes: 678d5718d8d0 ("sched/fair: Optimize cgroup pick_next_task_fair()") Link: http://lkml.kernel.org/r/xm26wq1oswoq.fsf@sword-of-the-dawn.mtv.corp.google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c2980e8733bcb..77690b653ca9e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5126,18 +5126,21 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) * entity, update_curr() will update its vruntime, otherwise * forget we've ever seen it. */ - if (curr && curr->on_rq) - update_curr(cfs_rq); - else - curr = NULL; + if (curr) { + if (curr->on_rq) + update_curr(cfs_rq); + else + curr = NULL; - /* - * This call to check_cfs_rq_runtime() will do the throttle and - * dequeue its entity in the parent(s). Therefore the 'simple' - * nr_running test will indeed be correct. - */ - if (unlikely(check_cfs_rq_runtime(cfs_rq))) - goto simple; + /* + * This call to check_cfs_rq_runtime() will do the + * throttle and dequeue its entity in the parent(s). + * Therefore the 'simple' nr_running test will indeed + * be correct. + */ + if (unlikely(check_cfs_rq_runtime(cfs_rq))) + goto simple; + } se = pick_next_entity(cfs_rq, curr); cfs_rq = group_cfs_rq(se); From 66196fff4dddb5ff617b45f991f83a25f7629e1f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 24 Jul 2015 17:39:21 +0100 Subject: [PATCH 1109/2091] serial/amba-pl011: Disable interrupts around TX softirq pl011_tx_softirq() currently uses spin_{,un}lock(), which are not sufficient to inhibit pl011_int() from being triggered by a local IRQ and trying to re-take the same lock. This can lead to deadlocks. This patch uses the _irq() locking variants instead to ensure that pl011_int() handling for a given port is deferred until any pl011_tx_softirq() work for that port is complete. Notes for stable: This patch fixes an issue that is fixed by the following upstream commit, which is a more substantial rewrite of the affected code, fixing multiple, mostly more minor issues: 1e84d22322ceed4767db1e5342c830dd60c8210f serial/amba-pl011: Refactor and simplify TX FIFO handling The upstream patch was rejected for stable on the reasonable grounds that it was too big and complex a patch. The original buggy code was merged in v4.1, and the rewrite was merged in v4.2, leaving only v4.1 affected. This patch replaces the 1e84d22, for 4.1.x only. Fixes: 734745caeb9f serial/amba-pl011: Activate TX IRQ passively Signed-off-by: Dave Martin Tested-by: Robin Murphy Tested-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 763eb20fe3213..0cc622afb67d4 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1360,9 +1360,9 @@ static void pl011_tx_softirq(struct work_struct *work) struct uart_amba_port *uap = container_of(dwork, struct uart_amba_port, tx_softirq_work); - spin_lock(&uap->port.lock); + spin_lock_irq(&uap->port.lock); while (pl011_tx_chars(uap)) ; - spin_unlock(&uap->port.lock); + spin_unlock_irq(&uap->port.lock); } static void pl011_tx_irq_seen(struct uart_amba_port *uap) From f1d62fb20245bc89d6ba93d829763450250a592b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 27 Jun 2015 14:39:30 +0300 Subject: [PATCH 1110/2091] ipvs: do not use random local source address for tunnels commit 4754957f04f5f368792a0eb7dab0ae89fb93dcfd upstream. Michael Vallaly reports about wrong source address used in rare cases for tunneled traffic. Looks like __ip_vs_get_out_rt in 3.10+ is providing uninitialized dest_dst->dst_saddr.ip because ip_vs_dest_dst_alloc uses kmalloc. While we retry after seeing EINVAL from routing for data that does not look like valid local address, it still succeeded when this memory was previously used from other dests and with different local addresses. As result, we can use valid local address that is not suitable for our real server. Fix it by providing 0.0.0.0 every time our cache is refreshed. By this way we will get preferred source address from routing. Reported-by: Michael Vallaly Fixes: 026ace060dfe ("ipvs: optimize dst usage for real server") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_xmit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 19986ec5f21ad..5fb7fd9deb8d1 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; From 4ec8fb23158797affae7993c15beba080488482f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 29 Jun 2015 21:51:40 +0300 Subject: [PATCH 1111/2091] ipvs: fix crash if scheduler is changed commit 05f00505a89acd21f5d0d20f5797dfbc4cf85243 upstream. I overlooked the svc->sched_data usage from schedulers when the services were converted to RCU in 3.10. Now the rare ipvsadm -E command can change the scheduler but due to the reverse order of ip_vs_bind_scheduler and ip_vs_unbind_scheduler we provide new sched_data to the old scheduler resulting in a crash. To fix it without changing the scheduler methods we have to use synchronize_rcu() only for the editing case. It means all svc->scheduler readers should expect a NULL value. To avoid breakage for the service listing and ipvsadm -R we can use the "none" name to indicate that scheduler is not assigned, a state when we drop new connections. Reported-by: Alexander Vasiliev Fixes: ceec4c381681 ("ipvs: convert services to rcu") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_core.c | 16 ++++++- net/netfilter/ipvs/ip_vs_ctl.c | 78 ++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_sched.c | 12 ++--- 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806a862e6..38fbc194b9cb7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3a14548..24c554201a766 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (sched->add_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { - if (sched->upd_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->upd_dest) sched->upd_dest(svc, dest); } } @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, struct ip_vs_scheduler *sched; sched = rcu_dereference_protected(svc->scheduler, 1); - if (sched->del_dest) + if (sched && sched->del_dest) sched->del_dest(svc, dest); } } @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_use_count_inc(); /* Lookup the scheduler by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - ret = -ENOENT; - goto out_err; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + ret = -ENOENT; + goto out_err; + } } if (u->pe_name && *u->pe_name) { @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) - goto out_err; - sched = NULL; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) + goto out_err; + sched = NULL; + } /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { - struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* * Lookup the scheduler, by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - return -ENOENT; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + return -ENOENT; + } } old_sched = sched; @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { + if (old_sched) { + ip_vs_unbind_scheduler(svc, old_sched); + RCU_INIT_POINTER(svc->scheduler, NULL); + /* Wait all svc->sched_data users */ + synchronize_rcu(); + } /* Bind the new scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) { - old_sched = sched; - goto out; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + ip_vs_scheduler_put(sched); + goto out; + } } - /* Unbind the old scheduler on success */ - ip_vs_unbind_scheduler(svc, old_sched); } /* @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - sched->name); + sched_name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - sched->name, + sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, sched->name, + svc->fwmark, sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; struct ip_vs_kstats kstats; + char *sched_name; sched = rcu_dereference_protected(src->scheduler, 1); + sched_name = sched ? sched->name : "none"; dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; struct ip_vs_kstats kstats; + char *sched_name; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } sched = rcu_dereference_protected(svc->scheduler, 1); + sched_name = sched ? sched->name : "none"; pe = rcu_dereference_protected(svc->pe, 1); - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c71f399..7e81416479434 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can not be set to NULL */ + /* svc->scheduler can be set to NULL only by caller */ } @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { - struct ip_vs_scheduler *sched; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; - sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - sched->name, svc->fwmark, svc->fwmark, msg); + sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } From 64d449fd0659221fb927bf2257a750052fe1a8e3 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Sun, 5 Jul 2015 14:28:26 -0700 Subject: [PATCH 1112/2091] ipvs: skb_orphan in case of forwarding commit 71563f3414e917c62acd8e0fb0edf8ed6af63e4b upstream. It is possible that we bind against a local socket in early_demux when we are actually going to want to forward it. In this case, the socket serves no purpose and only serves to confuse things (particularly functions which implicitly expect sk_fullsock to be true, like ip_local_out). Additionally, skb_set_owner_w is totally broken for non full-socks. Signed-off-by: Alex Gartrell Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_xmit.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 5fb7fd9deb8d1..e9d0e1b524f71 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -522,6 +522,21 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, return ret; } +/* In the event of a remote destination, it's possible that we would have + * matches against an old socket (particularly a TIME-WAIT socket). This + * causes havoc down the line (ip_local_out et. al. expect regular sockets + * and invalid memory accesses will happen) so simply drop the association + * in this case. +*/ +static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) +{ + /* If dev is set, the packet came from the LOCAL_IN callback and + * not from a local TCP socket. + */ + if (skb->dev) + skb_orphan(skb); +} + /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) @@ -533,12 +548,21 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); + + /* Remove the early_demux association unless it's bound for the + * exact same port and address on this host after translation. + */ + if (!local || cp->vport != cp->dport || + !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) + ip_vs_drop_early_demux_sk(skb); + if (!local) { skb_forward_csum(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; + return ret; } @@ -552,6 +576,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { + ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); @@ -840,6 +865,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, struct ipv6hdr *old_ipv6h = NULL; #endif + ip_vs_drop_early_demux_sk(skb); + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) From 6f2abd993cda96fc118bab69a82f9f73233dae59 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 8 Jul 2015 08:31:33 +0300 Subject: [PATCH 1113/2091] ipvs: fix crash with sync protocol v0 and FTP commit 56184858d1fc95c46723436b455cb7261cd8be6f upstream. Fix crash in 3.5+ if FTP is used after switching sync_version to 0. Fixes: 749c42b620a9 ("ipvs: reduce sync rate with time thresholds") Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 19b9cce6c210c..150047c739fa6 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } From 5dadf3a1f95d56e117e0ac5b1be502bad398c6ad Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 11:15:27 +0300 Subject: [PATCH 1114/2091] ipvs: call skb_sender_cpu_clear commit e3895c0334d0ef46e80f22eaf2a52401ff6d5a67 upstream. Reset XPS's sender_cpu on forwarding. Signed-off-by: Julian Anastasov Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_xmit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index e9d0e1b524f71..258f1e05250fa 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -518,6 +518,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); } return ret; } @@ -558,6 +560,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, if (!local) { skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else @@ -578,6 +582,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (!local) { ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else From 1fc6fc1d5b9dc9c878b2b1c044e079b193e6cb92 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 28 Jul 2015 15:31:12 +0200 Subject: [PATCH 1115/2091] fbdev: select versatile helpers for the integrator commit 2701fa0864ecb9e49d47a4aa1c02f172ab79639a upstream. Commit 11c32d7b6274cb0f554943d65bd4a126c4a86dcd "video: move Versatile CLCD helpers" missed the fact that the Integrator/CP is also using the helper, and as a result the platform got only stubs and no graphics. Add this as a default selection to Kconfig so we have graphics again. Fixes: 11c32d7b6274 (video: move Versatile CLCD helpers) Signed-off-by: Linus Walleij Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 1094623030879..d1e1e1704da1f 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -298,7 +298,7 @@ config FB_ARMCLCD # Helper logic selected only by the ARM Versatile platform family. config PLAT_VERSATILE_CLCD - def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS + def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || ARCH_INTEGRATOR depends on ARM depends on FB_ARMCLCD && FB=y From b046d646f1cbe1453b4ae63dc749e62f6e01c9c7 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 9 Jun 2015 21:24:36 +0800 Subject: [PATCH 1116/2091] batman-adv: fix kernel crash due to missing NULL checks commit 354136bcc3c4f40a2813bba8f57ca5267d812d15 upstream. batadv_softif_vlan_get() may return NULL which has to be verified by the caller. Fixes: 35df3b298fc8 ("batman-adv: fix TT VLAN inconsistency on VLAN re-add") Reported-by: Ryan Thompson Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/soft-interface.c | 3 +++ net/batman-adv/translation-table.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5ec31d7de24f1..271a08dbd4d2e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -449,6 +449,9 @@ void batadv_interface_rx(struct net_device *soft_iface, */ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) { + if (!vlan) + return; + if (atomic_dec_and_test(&vlan->refcount)) { spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); hlist_del_rcu(&vlan->list); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 07b263a437d1b..d41537cb867be 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -575,6 +575,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); + if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", + addr, BATADV_PRINT_VID(vid))) + goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", @@ -1047,6 +1050,9 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) + goto out; + batadv_softif_vlan_free_ref(vlan); batadv_softif_vlan_free_ref(vlan); @@ -1147,8 +1153,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, tt_common_entry->vid); - batadv_softif_vlan_free_ref(vlan); - batadv_softif_vlan_free_ref(vlan); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + } batadv_tt_local_entry_free_ref(tt_local); } @@ -3188,8 +3196,10 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); - batadv_softif_vlan_free_ref(vlan); - batadv_softif_vlan_free_ref(vlan); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + } batadv_tt_local_entry_free_ref(tt_local); } From 103c584bcde170c3035041d85e79a90eae177eaf Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 17 Jun 2015 20:01:36 +0800 Subject: [PATCH 1117/2091] batman-adv: protect tt_local_entry from concurrent delete events commit ef72706a0543d0c3a5ab29bd6378fdfb368118d9 upstream. The tt_local_entry deletion performed in batadv_tt_local_remove() was neither protecting against simultaneous deletes nor checking whether the element was still part of the list before calling hlist_del_rcu(). Replacing the hlist_del_rcu() call with batadv_hash_remove() provides adequate protection via hash spinlocks as well as an is-element-still-in-hash check to avoid 'blind' hash removal. Fixes: 068ee6e204e1 ("batman-adv: roaming handling mechanism redesign") Reported-by: alfonsname@web.de Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d41537cb867be..deb87c6870aaa 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1018,6 +1018,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; struct batadv_softif_vlan *vlan; + void *tt_entry_exists; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1045,7 +1046,15 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, * immediately purge it */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - hlist_del_rcu(&tt_local_entry->common.hash_entry); + + tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_local_entry->common); + if (!tt_entry_exists) + goto out; + + /* extra call to free the local tt entry */ batadv_tt_local_entry_free_ref(tt_local_entry); /* decrease the reference held for this vlan */ From 7abb2eeda8ce0ad0516d638e7bbeaacf745d4d75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 16 Jun 2015 17:10:22 +0200 Subject: [PATCH 1118/2091] batman-adv: Make DAT capability changes atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 65d7d46050704bcdb8121ddbf4110bfbf2b38baa upstream. Bitwise OR/AND assignments in C aren't guaranteed to be atomic. One OGM handler might undo the set/clear of a specific bit from another handler run in between. Fix this by using the atomic set_bit()/clear_bit()/test_bit() functions. Fixes: 17cf0ea455f1 ("batman-adv: tvlv - add distributed arp table container") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/distributed-arp-table.c | 7 ++++--- net/batman-adv/types.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index aad022dd15df5..95b3167cf036b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include #include #include @@ -422,7 +423,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int j; /* check if orig node candidate is running DAT */ - if (!(candidate->capabilities & BATADV_ORIG_CAPA_HAS_DAT)) + if (!test_bit(BATADV_ORIG_CAPA_HAS_DAT, &candidate->capabilities)) goto out; /* Check if this node has already been selected... */ @@ -682,9 +683,9 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint16_t tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) - orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_DAT; + clear_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities); else - orig->capabilities |= BATADV_ORIG_CAPA_HAS_DAT; + set_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities); } /** diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9398c3fb41747..4fff13cc66b4e 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -256,7 +256,7 @@ struct batadv_orig_node { struct hlist_node mcast_want_all_ipv4_node; struct hlist_node mcast_want_all_ipv6_node; #endif - uint8_t capabilities; + unsigned long capabilities; uint8_t capa_initialized; atomic_t last_ttvn; unsigned char *tt_buff; @@ -296,7 +296,7 @@ struct batadv_orig_node { * (= orig node announces a tvlv of type BATADV_TVLV_MCAST) */ enum batadv_orig_capabilities { - BATADV_ORIG_CAPA_HAS_DAT = BIT(0), + BATADV_ORIG_CAPA_HAS_DAT, BATADV_ORIG_CAPA_HAS_NC = BIT(1), BATADV_ORIG_CAPA_HAS_TT = BIT(2), BATADV_ORIG_CAPA_HAS_MCAST = BIT(3), From 8468a2989dbef6e40cf3a56721b228f5e8ae436e Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 25 Jun 2015 11:25:07 +0300 Subject: [PATCH 1119/2091] mmc: dw_mmc: handle data blocks > than 4kB if IDMAC is used commit 5959b32e3636f9bfe3f869d1e440bc4a4d660965 upstream. As per DW MobileStorage databook "each descriptor can transfer up to 4kB of data in chained mode", moreover buffer size that is put in "des1" is limited to 13 bits, i.e. for example on attempt to IDMAC_SET_BUFFER1_SIZE(desc, 8192) size value that's effectively written will be 0. On the platform with 8kB PAGE_SIZE I see dw_mmc gets data blocks in SG-list of 8kB size and that leads to unpredictable behavior of the SD/MMC controller. In particular on write to FAT partition of SD-card the controller will stuck in the middle of DMA transaction. Solution to the problem is simple - we need to pass large (> 4kB) data buffers to the controller via multiple descriptors. And that's what that change does. What's interesting I did try original driver on same platform but configured with 4kB PAGE_SIZE and may confirm that data blocks passed in SG-list to dw_mmc never exeed 4kB limit - that explains why nobody ever faced a problem I did. Signed-off-by: Alexey Brodkin Cc: Seungwon Jeon Cc: Jaehoon Chung Cc: Ulf Hansson Cc: arc-linux-dev@synopsys.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Jaehoon Chung Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc.c | 109 +++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 5f5adafb253af..b354c8bffb9e1 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -99,6 +99,9 @@ struct idmac_desc { __le32 des3; /* buffer 2 physical address */ }; + +/* Each descriptor can transfer up to 4KB of data in chained mode */ +#define DW_MCI_DESC_DATA_LENGTH 0x1000 #endif /* CONFIG_MMC_DW_IDMAC */ static bool dw_mci_reset(struct dw_mci *host); @@ -462,66 +465,96 @@ static void dw_mci_idmac_complete_dma(struct dw_mci *host) static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data, unsigned int sg_len) { + unsigned int desc_len; int i; if (host->dma_64bit_address == 1) { - struct idmac_desc_64addr *desc = host->sg_cpu; + struct idmac_desc_64addr *desc_first, *desc_last, *desc; + + desc_first = desc_last = desc = host->sg_cpu; - for (i = 0; i < sg_len; i++, desc++) { + for (i = 0; i < sg_len; i++) { unsigned int length = sg_dma_len(&data->sg[i]); u64 mem_addr = sg_dma_address(&data->sg[i]); - /* - * Set the OWN bit and disable interrupts for this - * descriptor - */ - desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | - IDMAC_DES0_CH; - /* Buffer length */ - IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, length); - - /* Physical address to DMA to/from */ - desc->des4 = mem_addr & 0xffffffff; - desc->des5 = mem_addr >> 32; + for ( ; length ; desc++) { + desc_len = (length <= DW_MCI_DESC_DATA_LENGTH) ? + length : DW_MCI_DESC_DATA_LENGTH; + + length -= desc_len; + + /* + * Set the OWN bit and disable interrupts + * for this descriptor + */ + desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | + IDMAC_DES0_CH; + + /* Buffer length */ + IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, desc_len); + + /* Physical address to DMA to/from */ + desc->des4 = mem_addr & 0xffffffff; + desc->des5 = mem_addr >> 32; + + /* Update physical address for the next desc */ + mem_addr += desc_len; + + /* Save pointer to the last descriptor */ + desc_last = desc; + } } /* Set first descriptor */ - desc = host->sg_cpu; - desc->des0 |= IDMAC_DES0_FD; + desc_first->des0 |= IDMAC_DES0_FD; /* Set last descriptor */ - desc = host->sg_cpu + (i - 1) * - sizeof(struct idmac_desc_64addr); - desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC); - desc->des0 |= IDMAC_DES0_LD; + desc_last->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC); + desc_last->des0 |= IDMAC_DES0_LD; } else { - struct idmac_desc *desc = host->sg_cpu; + struct idmac_desc *desc_first, *desc_last, *desc; + + desc_first = desc_last = desc = host->sg_cpu; - for (i = 0; i < sg_len; i++, desc++) { + for (i = 0; i < sg_len; i++) { unsigned int length = sg_dma_len(&data->sg[i]); u32 mem_addr = sg_dma_address(&data->sg[i]); - /* - * Set the OWN bit and disable interrupts for this - * descriptor - */ - desc->des0 = cpu_to_le32(IDMAC_DES0_OWN | - IDMAC_DES0_DIC | IDMAC_DES0_CH); - /* Buffer length */ - IDMAC_SET_BUFFER1_SIZE(desc, length); + for ( ; length ; desc++) { + desc_len = (length <= DW_MCI_DESC_DATA_LENGTH) ? + length : DW_MCI_DESC_DATA_LENGTH; + + length -= desc_len; + + /* + * Set the OWN bit and disable interrupts + * for this descriptor + */ + desc->des0 = cpu_to_le32(IDMAC_DES0_OWN | + IDMAC_DES0_DIC | + IDMAC_DES0_CH); + + /* Buffer length */ + IDMAC_SET_BUFFER1_SIZE(desc, desc_len); - /* Physical address to DMA to/from */ - desc->des2 = cpu_to_le32(mem_addr); + /* Physical address to DMA to/from */ + desc->des2 = cpu_to_le32(mem_addr); + + /* Update physical address for the next desc */ + mem_addr += desc_len; + + /* Save pointer to the last descriptor */ + desc_last = desc; + } } /* Set first descriptor */ - desc = host->sg_cpu; - desc->des0 |= cpu_to_le32(IDMAC_DES0_FD); + desc_first->des0 |= cpu_to_le32(IDMAC_DES0_FD); /* Set last descriptor */ - desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc); - desc->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | IDMAC_DES0_DIC)); - desc->des0 |= cpu_to_le32(IDMAC_DES0_LD); + desc_last->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | + IDMAC_DES0_DIC)); + desc_last->des0 |= cpu_to_le32(IDMAC_DES0_LD); } wmb(); @@ -2406,7 +2439,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) #ifdef CONFIG_MMC_DW_IDMAC mmc->max_segs = host->ring_size; mmc->max_blk_size = 65536; - mmc->max_seg_size = 0x1000; + mmc->max_seg_size = DW_MCI_DESC_DATA_LENGTH; mmc->max_req_size = mmc->max_seg_size * host->ring_size; mmc->max_blk_count = mmc->max_req_size / 512; #else From da81f3e68a13c4ab431119ed590b22e24e3e3469 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 9 May 2015 09:57:08 -0300 Subject: [PATCH 1120/2091] mmc: sdhci-esdhc-imx: Move mmc_of_parse() to the dt probe commit 15064119273735c115fba381823b0746508bae3a upstream. mmc_of_parse() should be placed inside sdhci_esdhc_imx_probe_dt() as it suits only for the dt case. Signed-off-by: Fabio Estevam Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 82f512d87cb89..71ffa520c6e5d 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -903,7 +903,8 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, mmc_of_parse_voltage(np, &host->ocr_mask); - return 0; + /* call to generic mmc_of_parse to support additional capabilities */ + return mmc_of_parse(host->mmc); } #else static inline int @@ -1048,11 +1049,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; } - /* call to generic mmc_of_parse to support additional capabilities */ - err = mmc_of_parse(host->mmc); - if (err) - goto disable_clk; - err = sdhci_add_host(host); if (err) goto disable_clk; From 9a2d8c9f46a31ace6a2bc66b3b367938a177af84 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 9 May 2015 09:57:09 -0300 Subject: [PATCH 1121/2091] mmc: sdhci-esdhc-imx: Do not break platform data boards commit 7ccddeb08a632c713eca0a5f13bcbfa7e6e83982 upstream. The only user of this driver that has not been converted to fully device tree is the i.MX35 SoC. There is a i.MX35-based board (mach-pcm043.c) that uses platform data to pass wp_gpio and cd_gpio information. Commit 8d86e4fcccf61ba ("mmc: sdhci-esdhc-imx: Call mmc_of_parse()") broke the platform data case by removing mmc_gpio_request_ro() and mmc_gpio_request_cd(), so restore the functionality for the non-dt case. Also, restore the check for ESDHC_CD_CONTROLLER so that we can still support the "fsl,cd-controller" property. Signed-off-by: Fabio Estevam Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 36 +++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 71ffa520c6e5d..165a2e7758a00 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -925,6 +925,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) struct esdhc_platform_data *boarddata; int err; struct pltfm_imx_data *imx_data; + bool dt = true; host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata, 0); if (IS_ERR(host)) @@ -1012,11 +1013,44 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) } imx_data->boarddata = *((struct esdhc_platform_data *) host->mmc->parent->platform_data); + dt = false; + } + /* write_protect */ + if (boarddata->wp_type == ESDHC_WP_GPIO && !dt) { + err = mmc_gpio_request_ro(host->mmc, boarddata->wp_gpio); + if (err) { + dev_err(mmc_dev(host->mmc), + "failed to request write-protect gpio!\n"); + goto disable_clk; + } + host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; } /* card_detect */ - if (boarddata->cd_type == ESDHC_CD_CONTROLLER) + switch (boarddata->cd_type) { + case ESDHC_CD_GPIO: + if (dt) + break; + err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0); + if (err) { + dev_err(mmc_dev(host->mmc), + "failed to request card-detect gpio!\n"); + goto disable_clk; + } + /* fall through */ + + case ESDHC_CD_CONTROLLER: + /* we have a working card_detect back */ host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; + break; + + case ESDHC_CD_PERMANENT: + host->mmc->caps |= MMC_CAP_NONREMOVABLE; + break; + + case ESDHC_CD_NONE: + break; + } switch (boarddata->max_bus_width) { case 8: From c5a8210d4b9e5fb39f187409addfe7b00dc1bbb9 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:05 +0800 Subject: [PATCH 1122/2091] mmc: sdhci-esdhc-imx: fix cd regression for dt platform commit 4800e87a2ee886c1972deb73f057d1a6541edb36 upstream. Current card detect probe process is that when driver finds a valid ESDHC_CD_GPIO, it will clear the quirk SDHCI_QUIRK_BROKEN_CARD_DETECTION which is set by default for all esdhc/usdhc controllers. Then host driver will know there's a valid card detect function. Commit 8d86e4fcccf6 ("mmc: sdhci-esdhc-imx: Call mmc_of_parse()") breaks GPIO CD function for dt platform that it will return directly when find ESDHC_CD_GPIO for dt platform which result in the later wrongly to keep SDHCI_QUIRK_BROKEN_CARD_DETECTION for all dt platforms. Then MMC_CAP_NEEDS_POLL will be used instead even there's a valid GPIO card detect. This patch adds back this function and follows the original approach to clear the quirk if find an valid CD GPIO for dt platforms. Fixes: 8d86e4fcccf6 ("mmc: sdhci-esdhc-imx: Call mmc_of_parse()") Signed-off-by: Dong Aisheng Reviewed-by: Johan Derycke Signed-off-by: Ulf Hansson Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 165a2e7758a00..461698b038f75 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -868,6 +868,7 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, struct esdhc_platform_data *boarddata) { struct device_node *np = pdev->dev.of_node; + int ret; if (!np) return -ENODEV; @@ -904,7 +905,14 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, mmc_of_parse_voltage(np, &host->ocr_mask); /* call to generic mmc_of_parse to support additional capabilities */ - return mmc_of_parse(host->mmc); + ret = mmc_of_parse(host->mmc); + if (ret) + return ret; + + if (!IS_ERR_VALUE(mmc_gpio_get_cd(host->mmc))) + host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; + + return 0; } #else static inline int From 67833edcfc736cc89a374959433ffb7fb36bb536 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:00 +0800 Subject: [PATCH 1123/2091] dts: imx51: fix sd card gpio polarity specified in device tree commit aca45c0e95dad1c4ba4d38da192756b0e10cbbbd upstream. cd-gpios polarity should be changed to GPIO_ACTIVE_LOW and wp-gpios should be changed to GPIO_ACTIVE_HIGH. Otherwise, the SD may not work properly due to wrong polarity inversion specified in DT after switch to common parsing function mmc_of_parse(). Signed-off-by: Dong Aisheng Acked-by: Shawn Guo Signed-off-by: Ulf Hansson Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx51-apf51dev.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx51-apf51dev.dts b/arch/arm/boot/dts/imx51-apf51dev.dts index 93d3ea12328c5..0f3fe29b816eb 100644 --- a/arch/arm/boot/dts/imx51-apf51dev.dts +++ b/arch/arm/boot/dts/imx51-apf51dev.dts @@ -98,7 +98,7 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 29 GPIO_ACTIVE_LOW>; bus-width = <4>; status = "okay"; }; From 958847c5ed05cc1feb4b6202db98c893aed8c678 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:01 +0800 Subject: [PATCH 1124/2091] dts: imx53: fix sd card gpio polarity specified in device tree commit 94d76946859b4bcfa0da373357f14feda2af0622 upstream. cd-gpios polarity should be changed to GPIO_ACTIVE_LOW and wp-gpios should be changed to GPIO_ACTIVE_HIGH. Otherwise, the SD may not work properly due to wrong polarity inversion specified in DT after switch to common parsing function mmc_of_parse(). Signed-off-by: Dong Aisheng Acked-by: Shawn Guo Signed-off-by: Ulf Hansson Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx53-ard.dts | 4 ++-- arch/arm/boot/dts/imx53-m53evk.dts | 4 ++-- arch/arm/boot/dts/imx53-qsb-common.dtsi | 4 ++-- arch/arm/boot/dts/imx53-smd.dts | 4 ++-- arch/arm/boot/dts/imx53-tqma53.dtsi | 4 ++-- arch/arm/boot/dts/imx53-tx53.dtsi | 4 ++-- arch/arm/boot/dts/imx53-voipac-bsb.dts | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts index e9337ad52f59b..3bc18835fb4bb 100644 --- a/arch/arm/boot/dts/imx53-ard.dts +++ b/arch/arm/boot/dts/imx53-ard.dts @@ -103,8 +103,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts index d0e0f57eb432e..53f40885c5306 100644 --- a/arch/arm/boot/dts/imx53-m53evk.dts +++ b/arch/arm/boot/dts/imx53-m53evk.dts @@ -124,8 +124,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 181ae5ebf23f6..1f55187ed9ce3 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -147,8 +147,8 @@ &esdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc3>; - cd-gpios = <&gpio3 11 0>; - wp-gpios = <&gpio3 12 0>; + cd-gpios = <&gpio3 11 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio3 12 GPIO_ACTIVE_HIGH>; bus-width = <8>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-smd.dts b/arch/arm/boot/dts/imx53-smd.dts index 1d325576bcc04..fc89ce1e5763a 100644 --- a/arch/arm/boot/dts/imx53-smd.dts +++ b/arch/arm/boot/dts/imx53-smd.dts @@ -41,8 +41,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio3 13 0>; - wp-gpios = <&gpio4 11 0>; + cd-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 11 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi index 4f1f0e2868bf1..e03373a58760f 100644 --- a/arch/arm/boot/dts/imx53-tqma53.dtsi +++ b/arch/arm/boot/dts/imx53-tqma53.dtsi @@ -41,8 +41,8 @@ pinctrl-0 = <&pinctrl_esdhc2>, <&pinctrl_esdhc2_cdwp>; vmmc-supply = <®_3p3v>; - wp-gpios = <&gpio1 2 0>; - cd-gpios = <&gpio1 4 0>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx53-tx53.dtsi b/arch/arm/boot/dts/imx53-tx53.dtsi index 704bd72cbfec8..d3e50b22064f2 100644 --- a/arch/arm/boot/dts/imx53-tx53.dtsi +++ b/arch/arm/boot/dts/imx53-tx53.dtsi @@ -183,7 +183,7 @@ }; &esdhc1 { - cd-gpios = <&gpio3 24 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; @@ -191,7 +191,7 @@ }; &esdhc2 { - cd-gpios = <&gpio3 25 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; diff --git a/arch/arm/boot/dts/imx53-voipac-bsb.dts b/arch/arm/boot/dts/imx53-voipac-bsb.dts index c17d3ad6dba50..fc51b87ad2087 100644 --- a/arch/arm/boot/dts/imx53-voipac-bsb.dts +++ b/arch/arm/boot/dts/imx53-voipac-bsb.dts @@ -119,8 +119,8 @@ &esdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; - cd-gpios = <&gpio3 25 0>; - wp-gpios = <&gpio2 19 0>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; From 1c65e22e91c6adcb8bdf2e3097c2f4cffa3f3102 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:03 +0800 Subject: [PATCH 1125/2091] dts: imx25: fix sd card gpio polarity specified in device tree commit cf75eb15be2bdd054a76aeef6458beaa4a6ab770 upstream. cd-gpios polarity should be changed to GPIO_ACTIVE_LOW and wp-gpios should be changed to GPIO_ACTIVE_HIGH. Otherwise, the SD may not work properly due to wrong polarity inversion specified in DT after switch to common parsing function mmc_of_parse(). Signed-off-by: Dong Aisheng Acked-by: Shawn Guo Signed-off-by: Ulf Hansson Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx25-pdk.dts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts index dd45e6971bc35..9351296356dcc 100644 --- a/arch/arm/boot/dts/imx25-pdk.dts +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -10,6 +10,7 @@ */ /dts-v1/; +#include #include #include "imx25.dtsi" @@ -114,8 +115,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 1 0>; - wp-gpios = <&gpio2 0 0>; + cd-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>; status = "okay"; }; From 76ff51ce2e55873e479c550127e405442fa09a7e Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 16 Sep 2015 14:46:32 +0800 Subject: [PATCH 1126/2091] usb: chipidea: imx: fix a typo for imx6sx commit 8315b77d72c5f0b18ceb513303d845e73166133c upstream. Use imx6sx instead of imx6sl's platform flags for imx6sx. Fixes: e14db48dfcf3 ("usb: chipidea: imx: add runtime power management support") Signed-off-by: Li Jun Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 389f0e0342596..fa774323ebda3 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -56,7 +56,7 @@ static const struct of_device_id ci_hdrc_imx_dt_ids[] = { { .compatible = "fsl,imx27-usb", .data = &imx27_usb_data}, { .compatible = "fsl,imx6q-usb", .data = &imx6q_usb_data}, { .compatible = "fsl,imx6sl-usb", .data = &imx6sl_usb_data}, - { .compatible = "fsl,imx6sx-usb", .data = &imx6sl_usb_data}, + { .compatible = "fsl,imx6sx-usb", .data = &imx6sx_usb_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, ci_hdrc_imx_dt_ids); From 22ee53c3bdb8ab208056cf306fc60527f883b494 Mon Sep 17 00:00:00 2001 From: Peter Seiderer Date: Thu, 17 Sep 2015 21:40:12 +0200 Subject: [PATCH 1127/2091] cifs: use server timestamp for ntlmv2 authentication commit 98ce94c8df762d413b3ecb849e2b966b21606d04 upstream. Linux cifs mount with ntlmssp against an Mac OS X (Yosemite 10.10.5) share fails in case the clocks differ more than +/-2h: digest-service: digest-request: od failed with 2 proto=ntlmv2 digest-service: digest-request: kdc failed with -1561745592 proto=ntlmv2 Fix this by (re-)using the given server timestamp for the ntlmv2 authentication (as Windows 7 does). A related problem was also reported earlier by Namjae Jaen (see below): Windows machine has extended security feature which refuse to allow authentication when there is time difference between server time and client time when ntlmv2 negotiation is used. This problem is prevalent in embedded enviornment where system time is set to default 1970. Modern servers send the server timestamp in the TargetInfo Av_Pair structure in the challenge message [see MS-NLMP 2.2.2.1] In [MS-NLMP 3.1.5.1.2] it is explicitly mentioned that the client must use the server provided timestamp if present OR current time if it is not Reported-by: Namjae Jeon Signed-off-by: Peter Seiderer Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsencrypt.c | 53 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index aa0dc25733741..afa09fce81515 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -444,6 +444,48 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) return 0; } +/* Server has provided av pairs/target info in the type 2 challenge + * packet and we have plucked it and stored within smb session. + * We parse that blob here to find the server given timestamp + * as part of ntlmv2 authentication (or local current time as + * default in case of failure) + */ +static __le64 +find_timestamp(struct cifs_ses *ses) +{ + unsigned int attrsize; + unsigned int type; + unsigned int onesize = sizeof(struct ntlmssp2_name); + unsigned char *blobptr; + unsigned char *blobend; + struct ntlmssp2_name *attrptr; + + if (!ses->auth_key.len || !ses->auth_key.response) + return 0; + + blobptr = ses->auth_key.response; + blobend = blobptr + ses->auth_key.len; + + while (blobptr + onesize < blobend) { + attrptr = (struct ntlmssp2_name *) blobptr; + type = le16_to_cpu(attrptr->type); + if (type == NTLMSSP_AV_EOL) + break; + blobptr += 2; /* advance attr type */ + attrsize = le16_to_cpu(attrptr->length); + blobptr += 2; /* advance attr size */ + if (blobptr + attrsize > blobend) + break; + if (type == NTLMSSP_AV_TIMESTAMP) { + if (attrsize == sizeof(u64)) + return *((__le64 *)blobptr); + } + blobptr += attrsize; /* advance attr value */ + } + + return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); +} + static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, const struct nls_table *nls_cp) { @@ -641,6 +683,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; unsigned char *tiblob = NULL; /* target info blob */ + __le64 rsp_timestamp; if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { if (!ses->domainName) { @@ -659,6 +702,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } } + /* Must be within 5 minutes of the server (or in range +/-2h + * in case of Mac OS X), so simply carry over server timestamp + * (as Windows 7 does) + */ + rsp_timestamp = find_timestamp(ses); + baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); tilen = ses->auth_key.len; tiblob = ses->auth_key.response; @@ -675,8 +724,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); ntlmv2->blob_signature = cpu_to_le32(0x00000101); ntlmv2->reserved = 0; - /* Must be within 5 minutes of the server */ - ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + ntlmv2->time = rsp_timestamp; + get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); ntlmv2->reserved2 = 0; From ba8d3bb3dea5ca82cce82939535c4a65161dc8be Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 21 Sep 2015 15:46:04 +0200 Subject: [PATCH 1128/2091] irqchip/atmel-aic5: Use per chip mask caches in mask/unmask() commit d32dc9aa10c739363c775baf4499416b2e0dc11f upstream. When masking/unmasking interrupts, mask_cache is updated and used later for suspend/resume. Unfortunately, it always was the mask_cache associated with the first irq chip which was updated. So when performing resume, only irqs 0-31 could be enabled. Fixes: b1479ebb7720 ("irqchip: atmel-aic: Add atmel AIC/AIC5 drivers") Signed-off-by: Ludovic Desroches Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1442843173-2390-1-git-send-email-ludovic.desroches@atmel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-atmel-aic5.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index a2e8c3f876cbd..c2c578f0b268a 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -88,28 +88,36 @@ static void aic5_mask(struct irq_data *d) { struct irq_domain *domain = d->domain; struct irq_domain_chip_generic *dgc = domain->gc; - struct irq_chip_generic *gc = dgc->gc[0]; + struct irq_chip_generic *bgc = dgc->gc[0]; + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - /* Disable interrupt on AIC5 */ - irq_gc_lock(gc); + /* + * Disable interrupt on AIC5. We always take the lock of the + * first irq chip as all chips share the same registers. + */ + irq_gc_lock(bgc); irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(gc, 1, AT91_AIC5_IDCR); gc->mask_cache &= ~d->mask; - irq_gc_unlock(gc); + irq_gc_unlock(bgc); } static void aic5_unmask(struct irq_data *d) { struct irq_domain *domain = d->domain; struct irq_domain_chip_generic *dgc = domain->gc; - struct irq_chip_generic *gc = dgc->gc[0]; + struct irq_chip_generic *bgc = dgc->gc[0]; + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - /* Enable interrupt on AIC5 */ - irq_gc_lock(gc); + /* + * Enable interrupt on AIC5. We always take the lock of the + * first irq chip as all chips share the same registers. + */ + irq_gc_lock(bgc); irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(gc, 1, AT91_AIC5_IECR); gc->mask_cache |= d->mask; - irq_gc_unlock(gc); + irq_gc_unlock(bgc); } static int aic5_retrigger(struct irq_data *d) From 28f9166c7e4045bfeeaa188695e2c3f77dc5d773 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Sep 2015 12:14:32 +0100 Subject: [PATCH 1129/2091] irqchip/gic-v3-its: Add missing cache flushes commit 5a9a8915c8888b615521b17d70a4342187eae60b upstream. When the ITS is configured for non-cacheable transactions, make sure that the allocated, zeroed memory is flushed to the Point of Coherency, allowing the ITS to observe the zeros instead of random garbage (or even get its own data overwritten by zeros being evicted from the cache...). Fixes: 241a386c7dbb "irqchip: gicv3-its: Use non-cacheable accesses when no shareability" Reported-and-tested-by: Stuart Yoder Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Pavel Fedin Cc: Jason Cooper Link: http://lkml.kernel.org/r/1442142873-20213-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c00e2db351ba5..9a791dd52199c 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -921,8 +921,10 @@ static int its_alloc_tables(struct its_node *its) * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; - if (!shr) + if (!shr) { cache = GITS_BASER_nC; + __flush_dcache_area(base, alloc_size); + } goto retry_baser; } @@ -1163,6 +1165,8 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, return NULL; } + __flush_dcache_area(itt, sz); + dev->its = its; dev->itt = itt; dev->nr_ites = nr_ites; From 61e7a13ec7bc432a42b283df4b02ff83f163e6e9 Mon Sep 17 00:00:00 2001 From: Mario Carrillo Date: Mon, 24 Aug 2015 09:33:09 -0500 Subject: [PATCH 1130/2091] docs: update HOWTO for 3.x -> 4.x versioning commit e4144fe5d47c91c92d36cdbd5f31ed8d6e3a57ab upstream. The HOWTO document needed updating for the new kernel versioning. Signed-off-by: Mario Carrillo Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/HOWTO | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 93aa8604630e7..21152d397b88e 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -218,16 +218,16 @@ The development process Linux kernel development process currently consists of a few different main kernel "branches" and lots of different subsystem-specific kernel branches. These different branches are: - - main 3.x kernel tree - - 3.x.y -stable kernel tree - - 3.x -git kernel patches + - main 4.x kernel tree + - 4.x.y -stable kernel tree + - 4.x -git kernel patches - subsystem specific kernel trees and patches - - the 3.x -next kernel tree for integration tests + - the 4.x -next kernel tree for integration tests -3.x kernel tree +4.x kernel tree ----------------- -3.x kernels are maintained by Linus Torvalds, and can be found on -kernel.org in the pub/linux/kernel/v3.x/ directory. Its development +4.x kernels are maintained by Linus Torvalds, and can be found on +kernel.org in the pub/linux/kernel/v4.x/ directory. Its development process is as follows: - As soon as a new kernel is released a two weeks window is open, during this period of time maintainers can submit big diffs to @@ -262,20 +262,20 @@ mailing list about kernel releases: released according to perceived bug status, not according to a preconceived timeline." -3.x.y -stable kernel tree +4.x.y -stable kernel tree --------------------------- Kernels with 3-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant -regressions discovered in a given 3.x kernel. +regressions discovered in a given 4.x kernel. This is the recommended branch for users who want the most recent stable kernel and are not interested in helping test development/experimental versions. -If no 3.x.y kernel is available, then the highest numbered 3.x +If no 4.x.y kernel is available, then the highest numbered 4.x kernel is the current stable kernel. -3.x.y are maintained by the "stable" team , and +4.x.y are maintained by the "stable" team , and are released as needs dictate. The normal release period is approximately two weeks, but it can be longer if there are no pressing problems. A security-related problem, instead, can cause a release to happen almost @@ -285,7 +285,7 @@ The file Documentation/stable_kernel_rules.txt in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and how the release process works. -3.x -git patches +4.x -git patches ------------------ These are daily snapshots of Linus' kernel tree which are managed in a git repository (hence the name.) These patches are usually released @@ -317,9 +317,9 @@ revisions to it, and maintainers can mark patches as under review, accepted, or rejected. Most of these patchwork sites are listed at http://patchwork.kernel.org/. -3.x -next kernel tree for integration tests +4.x -next kernel tree for integration tests --------------------------------------------- -Before updates from subsystem trees are merged into the mainline 3.x +Before updates from subsystem trees are merged into the mainline 4.x tree, they need to be integration-tested. For this purpose, a special testing repository exists into which virtually all subsystem trees are pulled on an almost daily basis: From 1f58388f5997f960598a1a899313c9cb970d50dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Tue, 18 Aug 2015 10:59:10 +0200 Subject: [PATCH 1131/2091] mtd: pxa3xx_nand: add a default chunk size commit bc3e00f04cc1fe033a289c2fc2e5c73c0168d360 upstream. When keeping the configuration set by the bootloader (by using the marvell,nand-keep-config property), the pxa3xx_nand_detect_config() function is called and set the chunk size to 512 as a default value if NDCR_PAGE_SZ is not set. In the other case, when not keeping the bootloader configuration, no chunk size is set. Fix this by adding a default chunk size of 512. Fixes: 70ed85232a93 ("mtd: nand: pxa3xx: Introduce multiple page I/O support") Signed-off-by: Antoine Tenart Acked-by: Robert Jarzmik Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/pxa3xx_nand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index a4615fcc3d001..94a357d93baba 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1475,6 +1475,9 @@ static int pxa3xx_nand_scan(struct mtd_info *mtd) if (pdata->keep_config && !pxa3xx_nand_detect_config(info)) goto KEEP_CONFIG; + /* Set a default chunk size */ + info->chunk_size = 512; + ret = pxa3xx_nand_sensing(info); if (ret) { dev_info(&info->pdev->dev, "There is no chip on cs %d!\n", From 43c20144a4b4bd84d49bc1f5819481db9c000b34 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Sun, 13 Sep 2015 18:14:43 +0200 Subject: [PATCH 1132/2091] mtd: nand: sunxi: fix sunxi_nand_chips_cleanup() commit 8e375ccda31ccc73b087134e263c48d2114534f4 upstream. The sunxi_nand_chips_cleanup() function is missing a call to list_del() which generates a double free error. Reported-by: Priit Laes Signed-off-by: Boris Brezillon Fixes: 1fef62c1423b ("mtd: nand: add sunxi NAND flash controller support") Tested-by: Priit Laes Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/sunxi_nand.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 6f93b2990d250..4f67bbe30fe5f 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1312,6 +1312,7 @@ static void sunxi_nand_chips_cleanup(struct sunxi_nfc *nfc) node); nand_release(&chip->mtd); sunxi_nand_ecc_cleanup(&chip->nand.ecc); + list_del(&chip->node); } } From 1a693d9cb531a2d869d767796f1f3934bb951d78 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Mon, 14 Sep 2015 10:41:03 +0200 Subject: [PATCH 1133/2091] mtd: nand: sunxi: fix OOB handling in ->write_xxx() functions commit 03a0e8a7c5ea29b5c4e72dfd64900b47a8fb6f2d upstream. The USER_DATA register cannot be accessed using byte accessors on A13 SoCs, thus triggering a bug when using memcpy_toio on this register. Declare an helper macros to convert an OOB buffer into a suitable USER_DATA value and vice-versa. This patch also fixes an error in the oob_required logic (some OOB data are not written even if the user required it) by removing the oob_required condition, which is perfectly valid since the core already fill ->oob_poi with FFs when oob_required is false. Signed-off-by: Boris Brezillon Fixes: 1fef62c1423b ("mtd: nand: add sunxi NAND flash controller support") Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/sunxi_nand.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 4f67bbe30fe5f..499b8e433d3da 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -138,6 +138,10 @@ #define NFC_ECC_MODE GENMASK(15, 12) #define NFC_RANDOM_SEED GENMASK(30, 16) +/* NFC_USER_DATA helper macros */ +#define NFC_BUF_TO_USER_DATA(buf) ((buf)[0] | ((buf)[1] << 8) | \ + ((buf)[2] << 16) | ((buf)[3] << 24)) + #define NFC_DEFAULT_TIMEOUT_MS 1000 #define NFC_SRAM_SIZE 1024 @@ -632,15 +636,9 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, offset = layout->eccpos[i * ecc->bytes] - 4 + mtd->writesize; /* Fill OOB data in */ - if (oob_required) { - tmp = 0xffffffff; - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp, - 4); - } else { - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, - chip->oob_poi + offset - mtd->writesize, - 4); - } + writel(NFC_BUF_TO_USER_DATA(chip->oob_poi + + layout->oobfree[i].offset), + nfc->regs + NFC_REG_USER_DATA_BASE); chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1); @@ -770,14 +768,8 @@ static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd, offset += ecc->size; /* Fill OOB data in */ - if (oob_required) { - tmp = 0xffffffff; - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp, - 4); - } else { - memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, oob, - 4); - } + writel(NFC_BUF_TO_USER_DATA(oob), + nfc->regs + NFC_REG_USER_DATA_BASE); tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR | (1 << 30); From 453995c8b13b02acc4e82c718c13c98373167655 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 4 Aug 2015 21:36:12 +0200 Subject: [PATCH 1134/2091] PM / AVS: rockchip-io: depend on CONFIG_POWER_AVS commit 28c1f1628ee4b163e615eefe1b6463e3d229a873 upstream. The rockchip io-domain driver currently only depends on ARCH_ROCKCHIP itself. This makes it possible to select the power-domain driver, but not the POWER_AVS class and results in the iodomain-driver not getting build in this case. So add the additional dependency, which also results in the driver config option now being placed nicely into the AVS submenu. Fixes: 662a958638bd ("PM / AVS: rockchip-io: add driver handling Rockchip io domains") Signed-off-by: Heiko Stuebner Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/power/avs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/avs/Kconfig b/drivers/power/avs/Kconfig index 7f3d389bd601e..a67eeace6a89b 100644 --- a/drivers/power/avs/Kconfig +++ b/drivers/power/avs/Kconfig @@ -13,7 +13,7 @@ menuconfig POWER_AVS config ROCKCHIP_IODOMAIN tristate "Rockchip IO domain support" - depends on ARCH_ROCKCHIP && OF + depends on POWER_AVS && ARCH_ROCKCHIP && OF help Say y here to enable support io domains on Rockchip SoCs. It is necessary for the io domain setting of the SoC to match the From bff1c3b39ae30ec13d10a581655ec07af5c76608 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Aug 2015 16:51:11 +0300 Subject: [PATCH 1135/2091] device property: fix potential NULL pointer dereference commit ecc87eed7beeb50c0be0b73322d62135277ea2b0 upstream. In device_add_property_set() we check pset parameter for a NULL, but few lines later we do a pointer arithmetic without check that will crash kernel in the set_secondary_fwnode(). Here we check if pset parameter is NULL and return immediately. Fixes: 16ba08d5c9ec (device property: Introduce firmware node type for platform data) Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 1d0b116cae959..0a60ef1500cdc 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -26,9 +26,10 @@ */ void device_add_property_set(struct device *dev, struct property_set *pset) { - if (pset) - pset->fwnode.type = FWNODE_PDATA; + if (!pset) + return; + pset->fwnode.type = FWNODE_PDATA; set_secondary_fwnode(dev, &pset->fwnode); } EXPORT_SYMBOL_GPL(device_add_property_set); From b25c5418235ec7d7389eb8ca157758639e5db328 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Mon, 3 Aug 2015 11:16:43 +0200 Subject: [PATCH 1136/2091] ath10k: reject 11b tx fragmentation configuration commit 92092fe528e79c9bd25784ca0ef341d5a1d1b642 upstream. Even though there's a WMI enum for fragmentation threshold no known firmware actually implements it. Moreover it is not possible to rely frame fragmentation to mac80211 because firmware clears the "more fragments" bit in frame control making it impossible for remote devices to reassemble frames. Hence implement a dummy callback just to say fragmentation isn't supported. This effectively prevents mac80211 from doing frame fragmentation in software. This fixes Tx becoming broken after setting fragmentation threshold. Fixes: 1010ba4c5d1c ("ath10k: unregister and remove frag_threshold callback") Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 973485bd4121e..5e021b0b3f9e2 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4464,6 +4464,21 @@ static int ath10k_set_rts_threshold(struct ieee80211_hw *hw, u32 value) return ret; } +static int ath10k_mac_op_set_frag_threshold(struct ieee80211_hw *hw, u32 value) +{ + /* Even though there's a WMI enum for fragmentation threshold no known + * firmware actually implements it. Moreover it is not possible to rely + * frame fragmentation to mac80211 because firmware clears the "more + * fragments" bit in frame control making it impossible for remote + * devices to reassemble frames. + * + * Hence implement a dummy callback just to say fragmentation isn't + * supported. This effectively prevents mac80211 from doing frame + * fragmentation in software. + */ + return -EOPNOTSUPP; +} + static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { @@ -5108,6 +5123,7 @@ static const struct ieee80211_ops ath10k_ops = { .remain_on_channel = ath10k_remain_on_channel, .cancel_remain_on_channel = ath10k_cancel_remain_on_channel, .set_rts_threshold = ath10k_set_rts_threshold, + .set_frag_threshold = ath10k_mac_op_set_frag_threshold, .flush = ath10k_flush, .tx_last_beacon = ath10k_tx_last_beacon, .set_antenna = ath10k_set_antenna, From 71cb8ae2808b3f738d01a634f0ac7e18bbb0b0d6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 26 Mar 2015 10:22:20 +0000 Subject: [PATCH 1137/2091] pcmcia: sa11x0: fix missing clk_put() in sa11x0 socket drivers commit 72010aca55264cfe6516a955066c846d3885b0c6 upstream. Fix the lack of clk_put() in sa11xx_base.c's error cleanup paths by converting the driver to the devm_* API. Fixes: 86d88bfca475 ("ARM: 8247/2: pcmcia: sa1100: make use of device clock") Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/sa1100_generic.c | 1 - drivers/pcmcia/sa11xx_base.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c index 803945259da80..42861cc701580 100644 --- a/drivers/pcmcia/sa1100_generic.c +++ b/drivers/pcmcia/sa1100_generic.c @@ -93,7 +93,6 @@ static int sa11x0_drv_pcmcia_remove(struct platform_device *dev) for (i = 0; i < sinfo->nskt; i++) soc_pcmcia_remove_one(&sinfo->skt[i]); - clk_put(sinfo->clk); kfree(sinfo); return 0; } diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index cf6de2c2b3293..553d70a67f808 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -222,7 +222,7 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int i, ret = 0; struct clk *clk; - clk = clk_get(dev, NULL); + clk = devm_clk_get(dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); @@ -251,7 +251,6 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, if (ret) { while (--i >= 0) soc_pcmcia_remove_one(&sinfo->skt[i]); - clk_put(clk); kfree(sinfo); } else { dev_set_drvdata(dev, sinfo); From cd0264a0f8e61cc2ee55600d7878eb65566bb82a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Aug 2015 11:47:06 -0300 Subject: [PATCH 1138/2091] ipr: Enable SIS pipe commands for SIS-32 devices. commit e35d7f27fbd51a09a41a5439e39f22a3d102c00b upstream. Remove unnecessary check that disabled SIS pipe commands for SIS-32 devices. This change was sufficient to enable raw mode and send SIS pipe commands for a 57B3 device. Fixes: f8ee25d7d239 ("ipr: AF DASD raw mode implementation in ipr driver") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Wen Xiong Acked-by: Brian King Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index a9aa38903efef..cccab6188328f 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4554,7 +4554,7 @@ static ssize_t ipr_store_raw_mode(struct device *dev, spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); res = (struct ipr_resource_entry *)sdev->hostdata; if (res) { - if (ioa_cfg->sis64 && ipr_is_af_dasd_device(res)) { + if (ipr_is_af_dasd_device(res)) { res->raw_mode = simple_strtoul(buf, NULL, 10); len = strlen(buf); if (res->sdev) From 86d7065eada688b6b8d2687cbc045ef88c43973f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 19 Sep 2015 07:00:18 -0700 Subject: [PATCH 1139/2091] regmap: debugfs: Ensure we don't underflow when printing access masks commit b763ec17ac762470eec5be8ebcc43e4f8b2c2b82 upstream. If a read is attempted which is smaller than the line length then we may underflow the subtraction we're doing with the unsigned size_t type so move some of the calculation to be additions on the right hand side instead in order to avoid this. Reported-by: Rasmus Villemoes Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 5799a0b9e6cc4..1f6cd1583a809 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -432,7 +432,7 @@ static ssize_t regmap_access_read_file(struct file *file, /* If we're in the region the user is trying to read */ if (p >= *ppos) { /* ...but not beyond it */ - if (buf_pos >= count - 1 - tot_len) + if (buf_pos + tot_len + 1 >= count) break; /* Format the register */ From 6340b49a7a0eade043c80a1c95842273e93eaa67 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 19 Sep 2015 07:12:34 -0700 Subject: [PATCH 1140/2091] regmap: debugfs: Don't bother actually printing when calculating max length commit 176fc2d5770a0990eebff903ba680d2edd32e718 upstream. The in kernel snprintf() will conveniently return the actual length of the printed string even if not given an output beffer at all so just do that rather than relying on the user to pass in a suitable buffer, ensuring that we don't need to worry if the buffer was truncated due to the size of the buffer passed in. Reported-by: Rasmus Villemoes Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 1f6cd1583a809..c8941f39c9190 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -32,8 +32,7 @@ static DEFINE_MUTEX(regmap_debugfs_early_lock); /* Calculate the length of a fixed format */ static size_t regmap_calc_reg_len(int max_val, char *buf, size_t buf_size) { - snprintf(buf, buf_size, "%x", max_val); - return strlen(buf); + return snprintf(NULL, 0, "%x", max_val); } static ssize_t regmap_name_read_file(struct file *file, From a8ca14b73cf8b141c48d3ac47821b260b00d2462 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 18 Sep 2015 23:41:23 +0200 Subject: [PATCH 1141/2091] security: fix typo in security_task_prctl commit b7f76ea2ef6739ee484a165ffbac98deb855d3d3 upstream. Signed-off-by: Jann Horn Reviewed-by: Andy Lutomirski Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/security.h b/include/linux/security.h index 18264ea9e3141..5d45b4fd91d2a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2527,7 +2527,7 @@ static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg4, unsigned long arg5) { - return cap_task_prctl(option, arg2, arg3, arg3, arg5); + return cap_task_prctl(option, arg2, arg3, arg4, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) From 29a6161885e2fe7638e271a406521ba9ddd9e89c Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 16 Sep 2015 14:49:28 -0500 Subject: [PATCH 1142/2091] usb: musb: dsps: fix polling in device-only mode commit b8239dcc03afbd0886c1d9b91ba8fee7c6c9a6cb upstream. Fix the regression caused by commit ad78c918602 ("usb: musb: dsps: just start polling already") which causes polling the ID pin status even in device-only mode. Fixes: ad78c918602c ("usb: musb: dsps: just start polling already") Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 65d931a28a14c..dcac5e7f19e0b 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -225,8 +225,11 @@ static void dsps_musb_enable(struct musb *musb) dsps_writel(reg_base, wrp->epintr_set, epmask); dsps_writel(reg_base, wrp->coreintr_set, coremask); - /* start polling for ID change. */ - mod_timer(&glue->timer, jiffies + msecs_to_jiffies(wrp->poll_timeout)); + /* start polling for ID change in dual-role idle mode */ + if (musb->xceiv->otg->state == OTG_STATE_B_IDLE && + musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE) + mod_timer(&glue->timer, jiffies + + msecs_to_jiffies(wrp->poll_timeout)); dsps_musb_try_idle(musb, 0); } From 7e1f01e6c0e99241b3b03e71385d0f4dfb9180ab Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 24 Aug 2015 14:10:07 +0800 Subject: [PATCH 1143/2091] usb: chipidea: udc: using the correct stall implementation commit 56ffa1d154c7e12af16273f0cdc42690dd05caf5 upstream. According to spec, there are functional and protocol stalls. For functional stall, it is for bulk and interrupt endpoints, below are cases for it: - Host sends SET_FEATURE request for Set-Halt, the udc driver needs to set stall, and return true unconditionally. - The gadget driver may call usb_ep_set_halt to stall certain endpoints, if there is a transfer in pending, the udc driver should not set stall, and return -EAGAIN accordingly. These two kinds of stall need to be cleared by host using CLEAR_FEATURE request (Clear-Halt). For protocol stall, it is for control endpoint, this stall will be set if the control request has failed. This stall will be cleared by next setup request (hardware will do it). It fixed usbtest (drivers/usb/misc/usbtest.c) Test 13 "set/clear halt" test failure, meanwhile, this change has been verified by USB2 CV Compliance Test and MSC Tests. Cc: Alan Stern Cc: Felipe Balbi Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 84 ++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 764f668d45a9b..6e53c24fa1cb2 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -656,6 +656,44 @@ __acquires(hwep->lock) return 0; } +static int _ep_set_halt(struct usb_ep *ep, int value, bool check_transfer) +{ + struct ci_hw_ep *hwep = container_of(ep, struct ci_hw_ep, ep); + int direction, retval = 0; + unsigned long flags; + + if (ep == NULL || hwep->ep.desc == NULL) + return -EINVAL; + + if (usb_endpoint_xfer_isoc(hwep->ep.desc)) + return -EOPNOTSUPP; + + spin_lock_irqsave(hwep->lock, flags); + + if (value && hwep->dir == TX && check_transfer && + !list_empty(&hwep->qh.queue) && + !usb_endpoint_xfer_control(hwep->ep.desc)) { + spin_unlock_irqrestore(hwep->lock, flags); + return -EAGAIN; + } + + direction = hwep->dir; + do { + retval |= hw_ep_set_halt(hwep->ci, hwep->num, hwep->dir, value); + + if (!value) + hwep->wedge = 0; + + if (hwep->type == USB_ENDPOINT_XFER_CONTROL) + hwep->dir = (hwep->dir == TX) ? RX : TX; + + } while (hwep->dir != direction); + + spin_unlock_irqrestore(hwep->lock, flags); + return retval; +} + + /** * _gadget_stop_activity: stops all USB activity, flushes & disables all endpts * @gadget: gadget @@ -1051,7 +1089,7 @@ __acquires(ci->lock) num += ci->hw_ep_max / 2; spin_unlock(&ci->lock); - err = usb_ep_set_halt(&ci->ci_hw_ep[num].ep); + err = _ep_set_halt(&ci->ci_hw_ep[num].ep, 1, false); spin_lock(&ci->lock); if (!err) isr_setup_status_phase(ci); @@ -1110,8 +1148,8 @@ __acquires(ci->lock) if (err < 0) { spin_unlock(&ci->lock); - if (usb_ep_set_halt(&hwep->ep)) - dev_err(ci->dev, "error: ep_set_halt\n"); + if (_ep_set_halt(&hwep->ep, 1, false)) + dev_err(ci->dev, "error: _ep_set_halt\n"); spin_lock(&ci->lock); } } @@ -1142,9 +1180,9 @@ __acquires(ci->lock) err = isr_setup_status_phase(ci); if (err < 0) { spin_unlock(&ci->lock); - if (usb_ep_set_halt(&hwep->ep)) + if (_ep_set_halt(&hwep->ep, 1, false)) dev_err(ci->dev, - "error: ep_set_halt\n"); + "error: _ep_set_halt\n"); spin_lock(&ci->lock); } } @@ -1390,41 +1428,7 @@ static int ep_dequeue(struct usb_ep *ep, struct usb_request *req) */ static int ep_set_halt(struct usb_ep *ep, int value) { - struct ci_hw_ep *hwep = container_of(ep, struct ci_hw_ep, ep); - int direction, retval = 0; - unsigned long flags; - - if (ep == NULL || hwep->ep.desc == NULL) - return -EINVAL; - - if (usb_endpoint_xfer_isoc(hwep->ep.desc)) - return -EOPNOTSUPP; - - spin_lock_irqsave(hwep->lock, flags); - -#ifndef STALL_IN - /* g_file_storage MS compliant but g_zero fails chapter 9 compliance */ - if (value && hwep->type == USB_ENDPOINT_XFER_BULK && hwep->dir == TX && - !list_empty(&hwep->qh.queue)) { - spin_unlock_irqrestore(hwep->lock, flags); - return -EAGAIN; - } -#endif - - direction = hwep->dir; - do { - retval |= hw_ep_set_halt(hwep->ci, hwep->num, hwep->dir, value); - - if (!value) - hwep->wedge = 0; - - if (hwep->type == USB_ENDPOINT_XFER_CONTROL) - hwep->dir = (hwep->dir == TX) ? RX : TX; - - } while (hwep->dir != direction); - - spin_unlock_irqrestore(hwep->lock, flags); - return retval; + return _ep_set_halt(ep, value, true); } /** From 74830c233c3c2dc4b034399f3d4d029e35db9a11 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 21 Sep 2015 17:46:09 +0300 Subject: [PATCH 1144/2091] usb: Use the USB_SS_MULT() macro to get the burst multiplier. commit ff30cbc8da425754e8ab96904db1d295bd034f27 upstream. Bits 1:0 of the bmAttributes are used for the burst multiplier. The rest of the bits used to be reserved (zero), but USB3.1 takes bit 7 into use. Use the existing USB_SS_MULT() macro instead to make sure the mult value and hence max packet calculations are correct for USB3.1 devices. Note that burst multiplier in bmAttributes is zero based and that the USB_SS_MULT() macro adds one. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index b2a540b43f97c..b9ddf0c1ffe59 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -112,7 +112,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 16; } else if (usb_endpoint_xfer_isoc(&ep->desc) && - desc->bmAttributes > 2) { + USB_SS_MULT(desc->bmAttributes) > 3) { dev_warn(ddev, "Isoc endpoint has Mult of %d in " "config %d interface %d altsetting %d ep %d: " "setting to 3\n", desc->bmAttributes + 1, @@ -121,7 +121,8 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, } if (usb_endpoint_xfer_isoc(&ep->desc)) - max_tx = (desc->bMaxBurst + 1) * (desc->bmAttributes + 1) * + max_tx = (desc->bMaxBurst + 1) * + (USB_SS_MULT(desc->bmAttributes)) * usb_endpoint_maxp(&ep->desc); else if (usb_endpoint_xfer_int(&ep->desc)) max_tx = usb_endpoint_maxp(&ep->desc) * From 860964cd6311e32b217a180c262c938d366a28b4 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 13 Aug 2015 13:28:42 +0300 Subject: [PATCH 1145/2091] usb: phy: phy-generic: Fix reset behaviour on legacy boot commit 762982db33b23029e98c844611e2e8beeb75bc0d upstream. The gpio-desc migration done in v4.0 caused a regression with legacy boots due to reversed reset logic. e.g. omap3-beagle USB host breaks on legacy boot. Request the reset GPIO with GPIOF_ACTIVE_LOW flag so that it matches the driver logic and pin behaviour. Fixes: e9f2cefb0cdc ("usb: phy: generic: migrate to gpio_desc") Tested-by: Fabio Estevam Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-generic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index deee68eafb72a..0cd85f2ccddd3 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -230,7 +230,8 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop, clk_rate = pdata->clk_rate; needs_vcc = pdata->needs_vcc; if (gpio_is_valid(pdata->gpio_reset)) { - err = devm_gpio_request_one(dev, pdata->gpio_reset, 0, + err = devm_gpio_request_one(dev, pdata->gpio_reset, + GPIOF_ACTIVE_LOW, dev_name(dev)); if (!err) nop->gpiod_reset = From 4cd1e73994526a6647e16b631ba76bbb4944660c Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 6 Aug 2015 10:51:29 -0500 Subject: [PATCH 1146/2091] usb: musb: cppi41: allow it to work again commit b0a688ddcc5015eb26000c63841db7c46cfb380a upstream. since commit 33c300cb90a6 ("usb: musb: dsps: don't fake of_node to musb core") we have been preventing CPPI 4.1 from probing due to NULL of_node. We can't revert said commit otherwise a different regression would show up, so the fix is to look for the parent device's (glue layer's) of_node instead, since that's the thing which is actually described in DTS. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index 8bd8c5e26921c..d5a1407456400 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -614,7 +614,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) { struct musb *musb = controller->musb; struct device *dev = musb->controller; - struct device_node *np = dev->of_node; + struct device_node *np = dev->parent->of_node; struct cppi41_dma_channel *cppi41_channel; int count; int i; @@ -664,7 +664,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) musb_dma->status = MUSB_DMA_STATUS_FREE; musb_dma->max_len = SZ_4M; - dc = dma_request_slave_channel(dev, str); + dc = dma_request_slave_channel(dev->parent, str); if (!dc) { dev_err(dev, "Failed to request %s.\n", str); ret = -EPROBE_DEFER; @@ -694,7 +694,7 @@ struct dma_controller *dma_controller_create(struct musb *musb, struct cppi41_dma_controller *controller; int ret = 0; - if (!musb->controller->of_node) { + if (!musb->controller->parent->of_node) { dev_err(musb->controller, "Need DT for the DMA engine.\n"); return NULL; } From 4c381d2d524c82bee6c25f277faa2f341c98dbd1 Mon Sep 17 00:00:00 2001 From: Alexander Inyukhin Date: Sat, 26 Sep 2015 15:24:21 +0300 Subject: [PATCH 1147/2091] USB: chaoskey read offset bug commit 1d5c47f555c5ae050fad22e4a99f88856cae5d05 upstream. Rng reads in chaoskey driver could return the same data under the certain conditions. Signed-off-by: Alexander Inyukhin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/chaoskey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 3ad5d19e4d04e..23c794813e6a9 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -472,7 +472,7 @@ static int chaoskey_rng_read(struct hwrng *rng, void *data, if (this_time > max) this_time = max; - memcpy(data, dev->buf, this_time); + memcpy(data, dev->buf + dev->used, this_time); dev->used += this_time; From 866713d4ed8be3ced20229f31ce1ac9d4d96a7e9 Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Thu, 1 Oct 2015 14:10:22 -0700 Subject: [PATCH 1148/2091] usb: Add device quirk for Logitech PTZ cameras commit 72194739f54607bbf8cfded159627a2015381557 upstream. Add a device quirk for the Logitech PTZ Pro Camera and its sibling the ConferenceCam CC3000e Camera. This fixes the failed camera enumeration on some boot, particularly on machines with fast CPU. Tested by connecting a Logitech PTZ Pro Camera to a machine with a Haswell Core i7-4600U CPU @ 2.10GHz, and doing thousands of reboot cycles while recording the kernel logs and taking camera picture after each boot. Before the patch, more than 7% of the boots show some enumeration transfer failures and in a few of them, the kernel is giving up before actually enumerating the webcam. After the patch, the enumeration has been correct on every reboot. Signed-off-by: Vincent Palatin Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d85abfed84cca..0a56de7a88be8 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -54,6 +54,13 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech ConferenceCam CC3000e */ + { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0848), .driver_info = USB_QUIRK_DELAY_INIT }, + + /* Logitech PTZ Pro Camera */ + { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, From 408bfba9544679db7a3c6a4268d4e668e35e5f0e Mon Sep 17 00:00:00 2001 From: Yao-Wen Mao Date: Mon, 31 Aug 2015 14:24:09 +0800 Subject: [PATCH 1149/2091] USB: Add reset-resume quirk for two Plantronics usb headphones. commit 8484bf2981b3d006426ac052a3642c9ce1d8d980 upstream. These two headphones need a reset-resume quirk to properly resume to original volume level. Signed-off-by: Yao-Wen Mao Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 0a56de7a88be8..f5a381945db28 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -85,6 +85,12 @@ static const struct usb_device_id usb_quirk_list[] = { /* Philips PSC805 audio device */ { USB_DEVICE(0x0471, 0x0155), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Plantronic Audio 655 DSP */ + { USB_DEVICE(0x047f, 0xc008), .driver_info = USB_QUIRK_RESET_RESUME }, + + /* Plantronic Audio 648 USB */ + { USB_DEVICE(0x047f, 0xc013), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Artisman Watchdog Dongle */ { USB_DEVICE(0x04b4, 0x0526), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, From 7a84668027d9936f8f8bd0eb6d0c07964ef2ade0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 8 Aug 2015 10:46:02 +0200 Subject: [PATCH 1150/2091] cpu/cacheinfo: Fix teardown path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2110d70c5e58326a10e93cfefdc0b3686e2ada12 upstream. Philip Müller reported a hang when booting 32-bit 4.1 kernel on an AMD box. A fragment of the splat was enough to pinpoint the issue: task: f58e0000 ti: f58e8000 task.ti: f58e800 EIP: 0060:[] EFLAGS: 00010206 CPU: 0 EIP is at free_cache_attributes+0x83/0xd0 EAX: 00000001 EBX: f589d46c ECX: 00000090 EDX: 360c2000 ESI: 00000000 EDI: c1724a80 EBP: f58e9ec0 ESP: f58e9ea0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 000000ac CR3: 01731000 CR4: 000006d0 cache_shared_cpu_map_setup() did check sibling CPUs cacheinfo descriptor while the respective teardown path cache_shared_cpu_map_remove() didn't. Fix that. >From tglx's version: to be on the safe side, move the cacheinfo descriptor check to free_cache_attributes(), thus cleaning up the hotplug path a little and making this even more robust. Reported-and-tested-by: Philip Müller Reviewed-by: Thomas Gleixner Acked-by: Sudeep Holla Cc: Andre Przywara Cc: Guenter Roeck Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: manjaro-dev@manjaro.org Cc: Philip Müller Link: https://lkml.kernel.org/r/55B47BB8.6080202@manjaro.org Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index df0c66cb7ad37..fdba441457ecd 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -148,7 +148,11 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) if (sibling == cpu) /* skip itself */ continue; + sib_cpu_ci = get_cpu_cacheinfo(sibling); + if (!sib_cpu_ci->info_list) + continue; + sib_leaf = sib_cpu_ci->info_list + index; cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); @@ -159,6 +163,9 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) static void free_cache_attributes(unsigned int cpu) { + if (!per_cpu_cacheinfo(cpu)) + return; + cache_shared_cpu_map_remove(cpu); kfree(per_cpu_cacheinfo(cpu)); @@ -514,8 +521,7 @@ static int cacheinfo_cpu_callback(struct notifier_block *nfb, break; case CPU_DEAD: cache_remove_dev(cpu); - if (per_cpu_cacheinfo(cpu)) - free_cache_attributes(cpu); + free_cache_attributes(cpu); break; } return notifier_from_errno(rc); From 9885de37b322fe2b5fde15db6a080b6bde59fa74 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 2 Sep 2015 14:36:50 +0530 Subject: [PATCH 1151/2091] cpufreq: dt: Tolerance applies on both sides of target voltage commit a2022001cebd0825b96aa0f3345ea3ad44ae79d4 upstream. Tolerance applies on both sides of the target voltage, i.e. both min and max sides. But while checking if a voltage is supported by the regulator or not, we haven't taken care of tolerance on the lower side. Fix that. Cc: Lucas Stach Fixes: 045ee45c4ff2 ("cpufreq: cpufreq-dt: disable unsupported OPPs") Signed-off-by: Viresh Kumar Reviewed-by: Lucas Stach Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq-dt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index bab67db54b7eb..663045ce6face 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -255,7 +255,8 @@ static int cpufreq_init(struct cpufreq_policy *policy) rcu_read_unlock(); tol_uV = opp_uV * priv->voltage_tolerance / 100; - if (regulator_is_supported_voltage(cpu_reg, opp_uV, + if (regulator_is_supported_voltage(cpu_reg, + opp_uV - tol_uV, opp_uV + tol_uV)) { if (opp_uV < min_uV) min_uV = opp_uV; From 88108b384f4817c9f935ad0932e2fce80d790a2e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 27 Mar 2015 08:33:43 +0000 Subject: [PATCH 1152/2091] MIPS: dma-default: Fix 32-bit fall back to GFP_DMA commit 53960059d56ecef67d4ddd546731623641a3d2d1 upstream. If there is a DMA zone (usually 24bit = 16MB I believe), but no DMA32 zone, as is the case for some 32-bit kernels, then massage_gfp_flags() will cause DMA memory allocated for devices with a 32..63-bit coherent_dma_mask to fall back to using __GFP_DMA, even though there may only be 32-bits of physical address available anyway. Correct that case to compare against a mask the size of phys_addr_t instead of always using a 64-bit mask. Signed-off-by: James Hogan Fixes: a2e715a86c6d ("MIPS: DMA: Fix computation of DMA flags from device's coherent_dma_mask.") Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9610/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/dma-default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 609d1241b0c47..371eec1136591 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -100,7 +100,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif From 505f068df9dd37bfa86114f746bacedc2b32437a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 16 Jun 2015 17:10:23 +0200 Subject: [PATCH 1153/2091] batman-adv: Make NC capability changes atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4635469f5c617282f18c69643af36cd8c0acf707 upstream. Bitwise OR/AND assignments in C aren't guaranteed to be atomic. One OGM handler might undo the set/clear of a specific bit from another handler run in between. Fix this by using the atomic set_bit()/clear_bit()/test_bit() functions. Fixes: 3f4841ffb336 ("batman-adv: tvlv - add network coding container") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/network-coding.c | 7 ++++--- net/batman-adv/types.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 127cc4d7380a1..a449195c5b2b3 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include #include "main.h" @@ -105,9 +106,9 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint16_t tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) - orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_NC; + clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); else - orig->capabilities |= BATADV_ORIG_CAPA_HAS_NC; + set_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); } /** @@ -871,7 +872,7 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, goto out; /* check if orig node is network coding enabled */ - if (!(orig_node->capabilities & BATADV_ORIG_CAPA_HAS_NC)) + if (!test_bit(BATADV_ORIG_CAPA_HAS_NC, &orig_node->capabilities)) goto out; /* accept ogms from 'good' neighbors and single hop neighbors */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 4fff13cc66b4e..767390318c86a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -297,7 +297,7 @@ struct batadv_orig_node { */ enum batadv_orig_capabilities { BATADV_ORIG_CAPA_HAS_DAT, - BATADV_ORIG_CAPA_HAS_NC = BIT(1), + BATADV_ORIG_CAPA_HAS_NC, BATADV_ORIG_CAPA_HAS_TT = BIT(2), BATADV_ORIG_CAPA_HAS_MCAST = BIT(3), }; From 3dd853ed30efc82a19f3a1b5a05760d5803601e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 16 Jun 2015 17:10:24 +0200 Subject: [PATCH 1154/2091] batman-adv: Make TT capability changes atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ac4eebd48461ec993e7cb614d5afe7df8c72e6b7 upstream. Bitwise OR/AND assignments in C aren't guaranteed to be atomic. One OGM handler might undo the set/clear of a specific bit from another handler run in between. Fix this by using the atomic set_bit()/clear_bit()/test_bit() functions. Fixes: e17931d1a61d ("batman-adv: introduce capability initialization bitfield") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 8 +++++--- net/batman-adv/types.h | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index deb87c6870aaa..4f2a9d2c56dbb 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include "main.h" #include "translation-table.h" #include "soft-interface.h" @@ -1860,7 +1861,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, } spin_unlock_bh(list_lock); } - orig_node->capa_initialized &= ~BATADV_ORIG_CAPA_HAS_TT; + clear_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); } static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global, @@ -2819,7 +2820,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, return; } } - orig_node->capa_initialized |= BATADV_ORIG_CAPA_HAS_TT; + set_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); } static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, @@ -3321,7 +3322,8 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, bool has_tt_init; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff; - has_tt_init = orig_node->capa_initialized & BATADV_ORIG_CAPA_HAS_TT; + has_tt_init = test_bit(BATADV_ORIG_CAPA_HAS_TT, + &orig_node->capa_initialized); /* orig table not initialised AND first diff is in the OGM OR the ttvn * increased by one -> we can apply the attached changes diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 767390318c86a..4d3de8d8f331e 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -257,7 +257,7 @@ struct batadv_orig_node { struct hlist_node mcast_want_all_ipv6_node; #endif unsigned long capabilities; - uint8_t capa_initialized; + unsigned long capa_initialized; atomic_t last_ttvn; unsigned char *tt_buff; int16_t tt_buff_len; @@ -298,7 +298,7 @@ struct batadv_orig_node { enum batadv_orig_capabilities { BATADV_ORIG_CAPA_HAS_DAT, BATADV_ORIG_CAPA_HAS_NC, - BATADV_ORIG_CAPA_HAS_TT = BIT(2), + BATADV_ORIG_CAPA_HAS_TT, BATADV_ORIG_CAPA_HAS_MCAST = BIT(3), }; From 8dbeac75e6c679b87a6b50f0fc05c31cec1ed58c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 16 Jun 2015 17:10:25 +0200 Subject: [PATCH 1155/2091] batman-adv: Make MCAST capability changes atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9c936e3f4c4fad07abb6c082a89508b8f724c88f upstream. Bitwise OR/AND assignments in C aren't guaranteed to be atomic. One OGM handler might undo the set/clear of a specific bit from another handler run in between. Fix this by using the atomic set_bit()/clear_bit()/test_bit() functions. Fixes: 60432d756cf0 ("batman-adv: Announce new capability via multicast TVLV") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/multicast.c | 18 ++++++++++-------- net/batman-adv/types.h | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index b24e4bb64fb5f..6595ec22df31a 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -15,6 +15,7 @@ * along with this program; if not, see . */ +#include #include "main.h" #include "multicast.h" #include "originator.h" @@ -674,29 +675,30 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint8_t mcast_flags = BATADV_NO_FLAGS; bool orig_initialized; - orig_initialized = orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST; + orig_initialized = test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig->capa_initialized); /* If mcast support is turned on decrease the disabled mcast node * counter only if we had increased it for this node before. If this * is a completely new orig_node no need to decrease the counter. */ if (orig_mcast_enabled && - !(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) { + !test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) { if (orig_initialized) atomic_dec(&bat_priv->mcast.num_disabled); - orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST; + set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); /* If mcast support is being switched off or if this is an initial * OGM without mcast support then increase the disabled mcast * node counter. */ } else if (!orig_mcast_enabled && - (orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST || + (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) || !orig_initialized)) { atomic_inc(&bat_priv->mcast.num_disabled); - orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST; + clear_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities); } - orig->capa_initialized |= BATADV_ORIG_CAPA_HAS_MCAST; + set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized); if (orig_mcast_enabled && tvlv_value && (tvlv_value_len >= sizeof(mcast_flags))) @@ -740,8 +742,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; - if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) && - orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST) + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) && + test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized)) atomic_dec(&bat_priv->mcast.num_disabled); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 4d3de8d8f331e..11cbac348e2d8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -299,7 +299,7 @@ enum batadv_orig_capabilities { BATADV_ORIG_CAPA_HAS_DAT, BATADV_ORIG_CAPA_HAS_NC, BATADV_ORIG_CAPA_HAS_TT, - BATADV_ORIG_CAPA_HAS_MCAST = BIT(3), + BATADV_ORIG_CAPA_HAS_MCAST, }; /** From 3e6263c022b2c377bf883c99e278eb8bbe207325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 16 Jun 2015 17:10:26 +0200 Subject: [PATCH 1156/2091] batman-adv: Fix potential synchronization issues in mcast tvlv handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8a4023c5b5e30b11f1f383186f4a7222b3b823cf upstream. So far the mcast tvlv handler did not anticipate the processing of multiple incoming OGMs from the same originator at the same time. This can lead to various issues: * Broken refcounting: For instance two mcast handlers might both assume that an originator just got multicast capabilities and will together wrongly decrease mcast.num_disabled by two, potentially leading to an integer underflow. * Potential kernel panic on hlist_del_rcu(): Two mcast handlers might one after another try to do an hlist_del_rcu(&orig->mcast_want_all_*_node). The second one will cause memory corruption / crashes. (Reported by: Sven Eckelmann ) Right in the beginning the code path makes assumptions about the current multicast related state of an originator and bases all updates on that. The easiest and least error prune way to fix the issues in this case is to serialize multiple mcast handler invocations with a spinlock. Fixes: 60432d756cf0 ("batman-adv: Announce new capability via multicast TVLV") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/multicast.c | 63 +++++++++++++++++++++++++++++-------- net/batman-adv/originator.c | 5 +++ net/batman-adv/types.h | 3 ++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 6595ec22df31a..8653c1a506f4c 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -16,6 +16,7 @@ */ #include +#include #include "main.h" #include "multicast.h" #include "originator.h" @@ -566,19 +567,26 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, * * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator, * orig, has toggled then this method updates counter and list accordingly. + * + * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, uint8_t mcast_flags) { + struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node; + struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list; + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) { atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_add_head_rcu(&orig->mcast_want_all_unsnoopables_node, - &bat_priv->mcast.want_all_unsnoopables_list); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(!hlist_unhashed(node)); + + hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) && @@ -586,7 +594,10 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_del_rcu(&orig->mcast_want_all_unsnoopables_node); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(hlist_unhashed(node)); + + hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } @@ -599,19 +610,26 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, * * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has * toggled then this method updates counter and list accordingly. + * + * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, uint8_t mcast_flags) { + struct hlist_node *node = &orig->mcast_want_all_ipv4_node; + struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list; + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_add_head_rcu(&orig->mcast_want_all_ipv4_node, - &bat_priv->mcast.want_all_ipv4_list); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(!hlist_unhashed(node)); + + hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) && @@ -619,7 +637,10 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, atomic_dec(&bat_priv->mcast.num_want_all_ipv4); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_del_rcu(&orig->mcast_want_all_ipv4_node); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(hlist_unhashed(node)); + + hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } @@ -632,19 +653,26 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, * * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has * toggled then this method updates counter and list accordingly. + * + * Caller needs to hold orig->mcast_handler_lock. */ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, uint8_t mcast_flags) { + struct hlist_node *node = &orig->mcast_want_all_ipv6_node; + struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list; + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) { atomic_inc(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_add_head_rcu(&orig->mcast_want_all_ipv6_node, - &bat_priv->mcast.want_all_ipv6_list); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(!hlist_unhashed(node)); + + hlist_add_head_rcu(node, head); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); /* switched from flag set to unset */ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) && @@ -652,7 +680,10 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, atomic_dec(&bat_priv->mcast.num_want_all_ipv6); spin_lock_bh(&bat_priv->mcast.want_lists_lock); - hlist_del_rcu(&orig->mcast_want_all_ipv6_node); + /* flag checks above + mcast_handler_lock prevents this */ + WARN_ON(hlist_unhashed(node)); + + hlist_del_init_rcu(node); spin_unlock_bh(&bat_priv->mcast.want_lists_lock); } } @@ -675,6 +706,11 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, uint8_t mcast_flags = BATADV_NO_FLAGS; bool orig_initialized; + if (orig_mcast_enabled && tvlv_value && + (tvlv_value_len >= sizeof(mcast_flags))) + mcast_flags = *(uint8_t *)tvlv_value; + + spin_lock_bh(&orig->mcast_handler_lock); orig_initialized = test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized); @@ -700,15 +736,12 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized); - if (orig_mcast_enabled && tvlv_value && - (tvlv_value_len >= sizeof(mcast_flags))) - mcast_flags = *(uint8_t *)tvlv_value; - batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); orig->mcast_flags = mcast_flags; + spin_unlock_bh(&orig->mcast_handler_lock); } /** @@ -742,6 +775,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; + spin_lock_bh(&orig->mcast_handler_lock); + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) && test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized)) atomic_dec(&bat_priv->mcast.num_disabled); @@ -749,4 +784,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); + + spin_unlock_bh(&orig->mcast_handler_lock); } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 90e805aba3795..dfae97408628d 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -678,8 +678,13 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, orig_node->last_seen = jiffies; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; + #ifdef CONFIG_BATMAN_ADV_MCAST orig_node->mcast_flags = BATADV_NO_FLAGS; + INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node); + INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node); + INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node); + spin_lock_init(&orig_node->mcast_handler_lock); #endif /* create a vlan object for the "untagged" LAN */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 11cbac348e2d8..26c37be2aa051 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -204,6 +204,7 @@ struct batadv_orig_bat_iv { * @batadv_dat_addr_t: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset + * @mcast_handler_lock: synchronizes mcast-capability and -flag changes * @mcast_flags: multicast flags announced by the orig node * @mcast_want_all_unsnoop_node: a list node for the * mcast.want_all_unsnoopables list @@ -251,6 +252,8 @@ struct batadv_orig_node { unsigned long last_seen; unsigned long bcast_seqno_reset; #ifdef CONFIG_BATMAN_ADV_MCAST + /* synchronizes mcast tvlv specific orig changes */ + spinlock_t mcast_handler_lock; uint8_t mcast_flags; struct hlist_node mcast_want_all_unsnoopables_node; struct hlist_node mcast_want_all_ipv4_node; From 42719676db08e05831407b4f0990cd98bd3ddc7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 30 Jun 2015 23:45:26 +0200 Subject: [PATCH 1157/2091] batman-adv: Fix potentially broken skb network header access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 53cf037bf846417fd92dc92ddf97267f69b110f4 upstream. The two commits noted below added calls to ip_hdr() and ipv6_hdr(). They need a correctly set skb network header. Unfortunately we cannot rely on the device drivers to set it for us. Therefore setting it in the beginning of the according ndo_start_xmit handler. Fixes: 1d8ab8d3c176 ("batman-adv: Modified forwarding behaviour for multicast packets") Fixes: ab49886e3da7 ("batman-adv: Add IPv4 link-local/IPv6-ll-all-nodes multicast support") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/soft-interface.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 271a08dbd4d2e..a0b1b861b968e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -172,6 +172,7 @@ static int batadv_interface_tx(struct sk_buff *skb, int gw_mode; enum batadv_forw_mode forw_mode; struct batadv_orig_node *mcast_single_orig = NULL; + int network_offset = ETH_HLEN; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -184,14 +185,18 @@ static int batadv_interface_tx(struct sk_buff *skb, case ETH_P_8021Q: vhdr = vlan_eth_hdr(skb); - if (vhdr->h_vlan_encapsulated_proto != ethertype) + if (vhdr->h_vlan_encapsulated_proto != ethertype) { + network_offset += VLAN_HLEN; break; + } /* fall through */ case ETH_P_BATMAN: goto dropped; } + skb_set_network_header(skb, network_offset); + if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; From 41f3fa173272bdc28e47f9e4004c62eb27a9474a Mon Sep 17 00:00:00 2001 From: Kapileshwar Singh Date: Tue, 22 Sep 2015 14:22:03 +0100 Subject: [PATCH 1158/2091] tools lib traceevent: Fix string handling in heterogeneous arch environments commit c2e4b24ff848bb180f9b9cd873a38327cd219ad2 upstream. When a trace recorded on a 32-bit device is processed with a 64-bit binary, the higher 32-bits of the address need to ignored. The lack of this results in the output of the 64-bit pointer value to the trace as the 32-bit address lookup fails in find_printk(). Before: burn-1778 [003] 548.600305: bputs: 0xc0046db2s: 2cec5c058d98c After: burn-1778 [003] 548.600305: bputs: 0xc0046db2s: RT throttling activated The problem occurs in PRINT_FIELD when the field is recognized as a pointer to a string (of the type const char *) Heterogeneous architectures cases below can arise and should be handled: * Traces recorded using 32-bit addresses processed on a 64-bit machine * Traces recorded using 64-bit addresses processed on a 32-bit machine Reported-by: Juri Lelli Signed-off-by: Kapileshwar Singh Reviewed-by: Steven Rostedt Cc: David Ahern Cc: Javi Merino Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1442928123-13824-1-git-send-email-kapileshwar.singh@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/lib/traceevent/event-parse.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 29f94f6f0d9e9..ed5461f065bd8 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3721,7 +3721,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct format_field *field; struct printk_map *printk; long long val, fval; - unsigned long addr; + unsigned long long addr; char *str; unsigned char *hex; int print; @@ -3754,13 +3754,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, */ if (!(field->flags & FIELD_IS_ARRAY) && field->size == pevent->long_size) { - addr = *(unsigned long *)(data + field->offset); + + /* Handle heterogeneous recording and processing + * architectures + * + * CASE I: + * Traces recorded on 32-bit devices (32-bit + * addressing) and processed on 64-bit devices: + * In this case, only 32 bits should be read. + * + * CASE II: + * Traces recorded on 64 bit devices and processed + * on 32-bit devices: + * In this case, 64 bits must be read. + */ + addr = (pevent->long_size == 8) ? + *(unsigned long long *)(data + field->offset) : + (unsigned long long)*(unsigned int *)(data + field->offset); + /* Check if it matches a print format */ printk = find_printk(pevent, addr); if (printk) trace_seq_puts(s, printk->printk); else - trace_seq_printf(s, "%lx", addr); + trace_seq_printf(s, "%llx", addr); break; } str = malloc(len + 1); From e6b5ff2bbb2f2b10860efaa052783eeaa3cd257d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 10 Sep 2015 14:36:21 +1000 Subject: [PATCH 1159/2091] powerpc/MSI: Fix race condition in tearing down MSI interrupts commit e297c939b745e420ef0b9dc989cb87bda617b399 upstream. This fixes a race which can result in the same virtual IRQ number being assigned to two different MSI interrupts. The most visible consequence of that is usually a warning and stack trace from the sysfs code about an attempt to create a duplicate entry in sysfs. The race happens when one CPU (say CPU 0) is disposing of an MSI while another CPU (say CPU 1) is setting up an MSI. CPU 0 calls (for example) pnv_teardown_msi_irqs(), which calls msi_bitmap_free_hwirqs() to indicate that the MSI (i.e. its hardware IRQ number) is no longer in use. Then, before CPU 0 gets to calling irq_dispose_mapping() to free up the virtal IRQ number, CPU 1 comes in and calls msi_bitmap_alloc_hwirqs() to allocate an MSI, and gets the same hardware IRQ number that CPU 0 just freed. CPU 1 then calls irq_create_mapping() to get a virtual IRQ number, which sees that there is currently a mapping for that hardware IRQ number and returns the corresponding virtual IRQ number (which is the same virtual IRQ number that CPU 0 was using). CPU 0 then calls irq_dispose_mapping() and frees that virtual IRQ number. Now, if another CPU comes along and calls irq_create_mapping(), it is likely to get the virtual IRQ number that was just freed, resulting in the same virtual IRQ number apparently being used for two different hardware interrupts. To fix this race, we just move the call to msi_bitmap_free_hwirqs() to after the call to irq_dispose_mapping(). Since virq_to_hw() doesn't work for the virtual IRQ number after irq_dispose_mapping() has been called, we need to call it before irq_dispose_mapping() and remember the result for the msi_bitmap_free_hwirqs() call. The pattern of calling msi_bitmap_free_hwirqs() before irq_dispose_mapping() appears in 5 places under arch/powerpc, and appears to have originated in commit 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") from 2007. Fixes: 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci.c | 5 +++-- arch/powerpc/sysdev/fsl_msi.c | 5 +++-- arch/powerpc/sysdev/mpic_pasemi_msi.c | 6 ++++-- arch/powerpc/sysdev/mpic_u3msi.c | 5 +++-- arch/powerpc/sysdev/ppc4xx_msi.c | 5 +++-- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bca2aeb6e4b6a..3ff29cf6d05c9 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index f086c6f22dc96..fd16cb5d83f35 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index a3f660eed6dea..89496cf4e04dd 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -65,6 +65,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -72,10 +73,11 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index b2cef18093893..13a34b2375591 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 6e2e6aa378bbe..02a137daa1824 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } From a58897f9e663bb9b64b703f7dd451c7089cb28bb Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Fri, 21 Aug 2015 23:14:26 +0200 Subject: [PATCH 1160/2091] rsi: Fix possible leak when loading firmware commit a8b9774571d46506a0774b1ced3493b1245cf893 upstream. Commit 5d5cd85ff441 ("rsi: Fix failure to load firmware after memory leak fix and fix the leak") also added a check on the allocation of DMA-accessible memory that may directly return. In that case the already allocated firmware data is leaked. Make sure the data is always freed correctly. Detected by Coverity CID 1316519. Fixes: 5d5cd85ff441 ("rsi: Fix failure to load firmware after memory leak fix and fix the leak") Signed-off-by: Christian Engelmayer Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rsi/rsi_91x_sdio_ops.c | 8 ++++++-- drivers/net/wireless/rsi/rsi_91x_usb_ops.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c index 1c6788aecc626..40d72312f3df2 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c @@ -203,8 +203,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) /* Copy firmware into DMA-accessible memory */ fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); - if (!fw) - return -ENOMEM; + if (!fw) { + status = -ENOMEM; + goto out; + } len = fw_entry->size; if (len % 4) @@ -217,6 +219,8 @@ static int rsi_load_ta_instructions(struct rsi_common *common) status = rsi_copy_to_card(common, fw, len, num_blocks); kfree(fw); + +out: release_firmware(fw_entry); return status; } diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c index 30c2cf7fa93b0..de4900862836a 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c @@ -148,8 +148,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) /* Copy firmware into DMA-accessible memory */ fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); - if (!fw) - return -ENOMEM; + if (!fw) { + status = -ENOMEM; + goto out; + } len = fw_entry->size; if (len % 4) @@ -162,6 +164,8 @@ static int rsi_load_ta_instructions(struct rsi_common *common) status = rsi_copy_to_card(common, fw, len, num_blocks); kfree(fw); + +out: release_firmware(fw_entry); return status; } From d3a1196bfc462943694623412d8e03aaf172bdc1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Aug 2015 15:44:51 -0700 Subject: [PATCH 1161/2091] inet: fix potential deadlock in reqsk_queue_unlink() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 83fccfc3940c4a2db90fd7e7079f5b465cd8c6af upstream. When replacing del_timer() with del_timer_sync(), I introduced a deadlock condition : reqsk_queue_unlink() is called from inet_csk_reqsk_queue_drop() inet_csk_reqsk_queue_drop() can be called from many contexts, one being the timer handler itself (reqsk_timer_handler()). In this case, del_timer_sync() loops forever. Simple fix is to test if timer is pending. Fixes: 2235f2ac75fd ("inet: fix races with reqsk timers") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Cc: Holger Hoffstätte Cc: Andre Tomt Cc: Chris Caputo Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b27fc401c6a9a..e664706b350cd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -584,7 +584,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, } spin_unlock(&queue->syn_wait_lock); - if (del_timer_sync(&req->rsk_timer)) + if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; } From 207663ca0d556e09d14c743cdf507e50b12e76fe Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 8 Jul 2015 11:46:36 +0200 Subject: [PATCH 1162/2091] UBIFS: Kill unneeded locking in ubifs_init_security commit cf6f54e3f133229f02a90c04fe0ff9dd9d3264b4 upstream. Fixes the following lockdep splat: [ 1.244527] ============================================= [ 1.245193] [ INFO: possible recursive locking detected ] [ 1.245193] 4.2.0-rc1+ #37 Not tainted [ 1.245193] --------------------------------------------- [ 1.245193] cp/742 is trying to acquire lock: [ 1.245193] (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] ubifs_init_security+0x29/0xb0 [ 1.245193] [ 1.245193] but task is already holding lock: [ 1.245193] (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] path_openat+0x3af/0x1280 [ 1.245193] [ 1.245193] other info that might help us debug this: [ 1.245193] Possible unsafe locking scenario: [ 1.245193] [ 1.245193] CPU0 [ 1.245193] ---- [ 1.245193] lock(&sb->s_type->i_mutex_key#9); [ 1.245193] lock(&sb->s_type->i_mutex_key#9); [ 1.245193] [ 1.245193] *** DEADLOCK *** [ 1.245193] [ 1.245193] May be due to missing lock nesting notation [ 1.245193] [ 1.245193] 2 locks held by cp/742: [ 1.245193] #0: (sb_writers#5){.+.+.+}, at: [] mnt_want_write+0x1f/0x50 [ 1.245193] #1: (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] path_openat+0x3af/0x1280 [ 1.245193] [ 1.245193] stack backtrace: [ 1.245193] CPU: 2 PID: 742 Comm: cp Not tainted 4.2.0-rc1+ #37 [ 1.245193] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140816_022509-build35 04/01/2014 [ 1.245193] ffffffff8252d530 ffff88007b023a38 ffffffff814f6f49 ffffffff810b56c5 [ 1.245193] ffff88007c30cc80 ffff88007b023af8 ffffffff810a150d ffff88007b023a68 [ 1.245193] 000000008101302a ffff880000000000 00000008f447e23f ffffffff8252d500 [ 1.245193] Call Trace: [ 1.245193] [] dump_stack+0x4c/0x65 [ 1.245193] [] ? console_unlock+0x1c5/0x510 [ 1.245193] [] __lock_acquire+0x1a6d/0x1ea0 [ 1.245193] [] ? __lock_is_held+0x58/0x80 [ 1.245193] [] lock_acquire+0xd3/0x270 [ 1.245193] [] ? ubifs_init_security+0x29/0xb0 [ 1.245193] [] mutex_lock_nested+0x6b/0x3a0 [ 1.245193] [] ? ubifs_init_security+0x29/0xb0 [ 1.245193] [] ? ubifs_init_security+0x29/0xb0 [ 1.245193] [] ubifs_init_security+0x29/0xb0 [ 1.245193] [] ubifs_create+0xa6/0x1f0 [ 1.245193] [] ? path_openat+0x3af/0x1280 [ 1.245193] [] vfs_create+0x95/0xc0 [ 1.245193] [] path_openat+0x7cc/0x1280 [ 1.245193] [] ? __lock_acquire+0x543/0x1ea0 [ 1.245193] [] ? sched_clock_cpu+0x90/0xc0 [ 1.245193] [] ? calc_global_load_tick+0x60/0x90 [ 1.245193] [] ? sched_clock_cpu+0x90/0xc0 [ 1.245193] [] ? __alloc_fd+0xaf/0x180 [ 1.245193] [] do_filp_open+0x75/0xd0 [ 1.245193] [] ? _raw_spin_unlock+0x26/0x40 [ 1.245193] [] ? __alloc_fd+0xaf/0x180 [ 1.245193] [] do_sys_open+0x129/0x200 [ 1.245193] [] SyS_open+0x19/0x20 [ 1.245193] [] entry_SYSCALL_64_fastpath+0x12/0x6f While the lockdep splat is a false positive, becuase path_openat holds i_mutex of the parent directory and ubifs_init_security() tries to acquire i_mutex of a new inode, it reveals that taking i_mutex in ubifs_init_security() is in vain because it is only being called in the inode allocation path and therefore nobody else can see the inode yet. Reported-and-tested-by: Boris Brezillon Reviewed-and-tested-by: Dongsheng Yang Signed-off-by: Richard Weinberger Signed-off-by: dedekind1@gmail.com Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 96f3448b6eb40..fd65b3f1923cc 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, { int err; - mutex_lock(&inode->i_mutex); err = security_inode_init_security(inode, dentry, qstr, &init_xattrs, 0); - mutex_unlock(&inode->i_mutex); - if (err) { struct ubifs_info *c = dentry->i_sb->s_fs_info; ubifs_err(c, "cannot initialize security for inode %lu, error %d", From 189c815c3535412f80f3cc628149aa8dda50b3d2 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 22 Sep 2015 23:58:07 +0200 Subject: [PATCH 1163/2091] UBI: Validate data_size commit 281fda27673f833a01d516658a64d22a32c8e072 upstream. Make sure that data_size is less than LEB size. Otherwise a handcrafted UBI image is able to trigger an out of bounds memory access in ubi_compare_lebs(). Signed-off-by: Richard Weinberger Reviewed-by: David Gstir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/io.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 5bbd1f094f4e3..1fc23e48fe8e4 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -926,6 +926,11 @@ static int validate_vid_hdr(const struct ubi_device *ubi, goto bad; } + if (data_size > ubi->leb_size) { + ubi_err(ubi, "bad data_size"); + goto bad; + } + if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may From ef110859613fdbe6e866b09c3b2fd9081faa25dd Mon Sep 17 00:00:00 2001 From: shengyong Date: Mon, 28 Sep 2015 17:57:19 +0000 Subject: [PATCH 1164/2091] UBI: return ENOSPC if no enough space available commit 7c7feb2ebfc9c0552c51f0c050db1d1a004faac5 upstream. UBI: attaching mtd1 to ubi0 UBI: scanning is finished UBI error: init_volumes: not enough PEBs, required 706, available 686 UBI error: ubi_wl_init: no enough physical eraseblocks (-20, need 1) UBI error: ubi_attach_mtd_dev: failed to attach mtd1, error -12 <= NOT ENOMEM UBI error: ubi_init: cannot attach mtd1 If available PEBs are not enough when initializing volumes, return -ENOSPC directly. If available PEBs are not enough when initializing WL, return -ENOSPC instead of -ENOMEM. Signed-off-by: Sheng Yong Signed-off-by: Richard Weinberger Reviewed-by: David Gstir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/vtbl.c | 1 + drivers/mtd/ubi/wl.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 68c9c5ea676f7..bf2f916df4e2e 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -646,6 +646,7 @@ static int init_volumes(struct ubi_device *ubi, if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + return -ENOSPC; } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 16214d3d57a4d..18fef94542f89 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1601,6 +1601,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + err = -ENOSPC; goto out_free; } ubi->avail_pebs -= reserved_pebs; From c1d40e01ad8c01eb36557dab69cefd972f5f0415 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 25 Aug 2015 10:02:11 +0800 Subject: [PATCH 1165/2091] mmc: sdhci: fix dma memory leak in sdhci_pre_req() commit d31911b9374a76560d2c8ea4aa6ce5781621e81d upstream. Currently one mrq->data maybe execute dma_map_sg() twice when mmc subsystem prepare over one new request, and the following log show up: sdhci[sdhci_pre_dma_transfer] invalid cookie: 24, next-cookie 25 In this condition, mrq->date map a dma-memory(1) in sdhci_pre_req for the first time, and map another dma-memory(2) in sdhci_prepare_data for the second time. But driver only unmap the dma-memory(2), and dma-memory(1) never unmapped, which cause the dma memory leak issue. This patch use another method to map the dma memory for the mrq->data which can fix this dma memory leak issue. Fixes: 348487cb28e6 ("mmc: sdhci: use pipeline mmc requests to improve performance") Reported-and-tested-by: Jiri Slaby Signed-off-by: Haibo Chen Signed-off-by: Ulf Hansson Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 67 +++++++++++++++------------------------- drivers/mmc/host/sdhci.h | 8 ++--- include/linux/mmc/core.h | 1 + 3 files changed, 30 insertions(+), 46 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fd41b91436ec2..cbaf3df3ebd98 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -55,8 +55,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode); static void sdhci_tuning_timer(unsigned long data); static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable); static int sdhci_pre_dma_transfer(struct sdhci_host *host, - struct mmc_data *data, - struct sdhci_host_next *next); + struct mmc_data *data); static int sdhci_do_get_cd(struct sdhci_host *host); #ifdef CONFIG_PM @@ -510,7 +509,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, goto fail; BUG_ON(host->align_addr & host->align_mask); - host->sg_count = sdhci_pre_dma_transfer(host, data, NULL); + host->sg_count = sdhci_pre_dma_transfer(host, data); if (host->sg_count < 0) goto unmap_align; @@ -649,9 +648,11 @@ static void sdhci_adma_table_post(struct sdhci_host *host, } } - if (!data->host_cookie) + if (data->host_cookie == COOKIE_MAPPED) { dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, direction); + data->host_cookie = COOKIE_UNMAPPED; + } } static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) @@ -847,7 +848,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) } else { int sg_cnt; - sg_cnt = sdhci_pre_dma_transfer(host, data, NULL); + sg_cnt = sdhci_pre_dma_transfer(host, data); if (sg_cnt <= 0) { /* * This only happens when someone fed @@ -963,11 +964,13 @@ static void sdhci_finish_data(struct sdhci_host *host) if (host->flags & SDHCI_USE_ADMA) sdhci_adma_table_post(host, data); else { - if (!data->host_cookie) + if (data->host_cookie == COOKIE_MAPPED) { dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, (data->flags & MMC_DATA_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + data->host_cookie = COOKIE_UNMAPPED; + } } } @@ -2131,49 +2134,36 @@ static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq, struct mmc_data *data = mrq->data; if (host->flags & SDHCI_REQ_USE_DMA) { - if (data->host_cookie) + if (data->host_cookie == COOKIE_GIVEN || + data->host_cookie == COOKIE_MAPPED) dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - mrq->data->host_cookie = 0; + data->host_cookie = COOKIE_UNMAPPED; } } static int sdhci_pre_dma_transfer(struct sdhci_host *host, - struct mmc_data *data, - struct sdhci_host_next *next) + struct mmc_data *data) { int sg_count; - if (!next && data->host_cookie && - data->host_cookie != host->next_data.cookie) { - pr_debug(DRIVER_NAME "[%s] invalid cookie: %d, next-cookie %d\n", - __func__, data->host_cookie, host->next_data.cookie); - data->host_cookie = 0; + if (data->host_cookie == COOKIE_MAPPED) { + data->host_cookie = COOKIE_GIVEN; + return data->sg_count; } - /* Check if next job is already prepared */ - if (next || - (!next && data->host_cookie != host->next_data.cookie)) { - sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, - data->flags & MMC_DATA_WRITE ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - - } else { - sg_count = host->next_data.sg_count; - host->next_data.sg_count = 0; - } + WARN_ON(data->host_cookie == COOKIE_GIVEN); + sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + data->flags & MMC_DATA_WRITE ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (sg_count == 0) - return -EINVAL; + return -ENOSPC; - if (next) { - next->sg_count = sg_count; - data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie; - } else - host->sg_count = sg_count; + data->sg_count = sg_count; + data->host_cookie = COOKIE_MAPPED; return sg_count; } @@ -2183,16 +2173,10 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq, { struct sdhci_host *host = mmc_priv(mmc); - if (mrq->data->host_cookie) { - mrq->data->host_cookie = 0; - return; - } + mrq->data->host_cookie = COOKIE_UNMAPPED; if (host->flags & SDHCI_REQ_USE_DMA) - if (sdhci_pre_dma_transfer(host, - mrq->data, - &host->next_data) < 0) - mrq->data->host_cookie = 0; + sdhci_pre_dma_transfer(host, mrq->data); } static void sdhci_card_event(struct mmc_host *mmc) @@ -3090,7 +3074,6 @@ int sdhci_add_host(struct sdhci_host *host) host->max_clk = host->ops->get_max_clock(host); } - host->next_data.cookie = 1; /* * In case of Host Controller v3.00, find out whether clock * multiplier is supported. diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index e639b7f435e56..eea23f62356ac 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -309,9 +309,10 @@ struct sdhci_adma2_64_desc { */ #define SDHCI_MAX_SEGS 128 -struct sdhci_host_next { - unsigned int sg_count; - s32 cookie; +enum sdhci_cookie { + COOKIE_UNMAPPED, + COOKIE_MAPPED, + COOKIE_GIVEN, }; struct sdhci_host { @@ -506,7 +507,6 @@ struct sdhci_host { #define SDHCI_TUNING_MODE_1 0 struct timer_list tuning_timer; /* Timer for tuning */ - struct sdhci_host_next next_data; unsigned long private[0] ____cacheline_aligned; }; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index de722d4e9d61b..258daf914c6df 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -121,6 +121,7 @@ struct mmc_data { struct mmc_request *mrq; /* associated request */ unsigned int sg_len; /* size of scatter list */ + int sg_count; /* mapped sg entries */ struct scatterlist *sg; /* I/O scatter list */ s32 host_cookie; /* host private data */ }; From 344fa142ddc8d2316d0fc30eddf32bf56d19c01f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 14 Sep 2015 12:18:55 +0200 Subject: [PATCH 1166/2091] mmc: core: Don't return an error for CD/WP GPIOs when GPIOLIB is unset commit 43934ece2ea72c1dd279c0b0478c1a036d5d77ee upstream. When CONFIG_GPIOLIB is unset, its stubs will return -ENOSYS. That means when the mmc core parses DT for CD/WP GPIOs via mmc_of_parse(), -ENOSYS becomes propagated to the caller. Typically this means that the mmc host driver fails to probe. As the CD/WP GPIOs are already treated as optional, let's extend that to cover the case when CONFIG_GPIOLIB is unset. Reported-by: Michal Simek Fixes: 16b23787fc70 ("mmc: sdhci-of-arasan: Call OF parsing for MMC") Signed-off-by: Ulf Hansson Tested-by: Michal Simek Acked-by: Venu Byravarasu Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 8be0df758e682..a0b1b460377de 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -373,7 +373,7 @@ int mmc_of_parse(struct mmc_host *host) 0, &cd_gpio_invert); if (!ret) dev_info(host->parent, "Got CD GPIO\n"); - else if (ret != -ENOENT) + else if (ret != -ENOENT && ret != -ENOSYS) return ret; /* @@ -397,7 +397,7 @@ int mmc_of_parse(struct mmc_host *host) ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &ro_gpio_invert); if (!ret) dev_info(host->parent, "Got WP GPIO\n"); - else if (ret != -ENOENT) + else if (ret != -ENOENT && ret != -ENOSYS) return ret; /* See the comment on CD inversion above */ From 6f4e45e35c02fd23589a62aab0dc84286cc1302c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 15 Aug 2015 13:36:12 -0500 Subject: [PATCH 1167/2091] dcache: Handle escaped paths in prepend_path commit cde93be45a8a90d8c264c776fab63487b5038a65 upstream. A rename can result in a dentry that by walking up d_parent will never reach it's mnt_root. For lack of a better term I call this an escaped path. prepend_path is called by four different functions __d_path, d_absolute_path, d_path, and getcwd. __d_path only wants to see paths are connected to the root it passes in. So __d_path needs prepend_path to return an error. d_absolute_path similarly wants to see paths that are connected to some root. Escaped paths are not connected to any mnt_root so d_absolute_path needs prepend_path to return an error greater than 1. So escaped paths will be treated like paths on lazily unmounted mounts. getcwd needs to prepend "(unreachable)" so getcwd also needs prepend_path to return an error. d_path is the interesting hold out. d_path just wants to print something, and does not care about the weird cases. Which raises the question what should be printed? Given that / should result in -ENOENT I believe it is desirable for escaped paths to be printed as empty paths. As there are not really any meaninful path components when considered from the perspective of a mount tree. So tweak prepend_path to return an empty path with an new error code of 3 when it encounters an escaped path. Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index 5d03eb0ec0acc..2e8ddc1d09e92 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2923,6 +2923,13 @@ static int prepend_path(const struct path *path, if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); + /* Escaped? */ + if (dentry != vfsmnt->mnt_root) { + bptr = *buffer; + blen = *buflen; + error = 3; + break; + } /* Global root? */ if (mnt != parent) { dentry = ACCESS_ONCE(mnt->mnt_mountpoint); From eed13ce27f481cba65352e73403ca15e67bc26a4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 15 Aug 2015 20:27:13 -0500 Subject: [PATCH 1168/2091] vfs: Test for and handle paths that are unreachable from their mnt_root commit 397d425dc26da728396e66d392d5dcb8dac30c37 upstream. In rare cases a directory can be renamed out from under a bind mount. In those cases without special handling it becomes possible to walk up the directory tree to the root dentry of the filesystem and down from the root dentry to every other file or directory on the filesystem. Like division by zero .. from an unconnected path can not be given a useful semantic as there is no predicting at which path component the code will realize it is unconnected. We certainly can not match the current behavior as the current behavior is a security hole. Therefore when encounting .. when following an unconnected path return -ENOENT. - Add a function path_connected to verify path->dentry is reachable from path->mnt.mnt_root. AKA to validate that rename did not do something nasty to the bind mount. To avoid races path_connected must be called after following a path component to it's next path component. Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index fe30d3be43a8b..acdab610521b4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -505,6 +505,24 @@ struct nameidata { char *saved_names[MAX_NESTED_LINKS + 1]; }; +/** + * path_connected - Verify that a path->dentry is below path->mnt.mnt_root + * @path: nameidate to verify + * + * Rename can sometimes move a file or directory outside of a bind + * mount, path_connected allows those cases to be detected. + */ +static bool path_connected(const struct path *path) +{ + struct vfsmount *mnt = path->mnt; + + /* Only bind mounts can have disconnected paths */ + if (mnt->mnt_root == mnt->mnt_sb->s_root) + return true; + + return is_subdir(path->dentry, mnt->mnt_root); +} + /* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't @@ -1194,6 +1212,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) goto failed; nd->path.dentry = parent; nd->seq = seq; + if (unlikely(!path_connected(&nd->path))) + goto failed; break; } if (!follow_up_rcu(&nd->path)) @@ -1290,7 +1310,7 @@ static void follow_mount(struct path *path) } } -static void follow_dotdot(struct nameidata *nd) +static int follow_dotdot(struct nameidata *nd) { if (!nd->root.mnt) set_root(nd); @@ -1306,6 +1326,10 @@ static void follow_dotdot(struct nameidata *nd) /* rare case of legitimate dget_parent()... */ nd->path.dentry = dget_parent(nd->path.dentry); dput(old); + if (unlikely(!path_connected(&nd->path))) { + path_put(&nd->path); + return -ENOENT; + } break; } if (!follow_up(&nd->path)) @@ -1313,6 +1337,7 @@ static void follow_dotdot(struct nameidata *nd) } follow_mount(&nd->path); nd->inode = nd->path.dentry->d_inode; + return 0; } /* @@ -1541,7 +1566,7 @@ static inline int handle_dots(struct nameidata *nd, int type) if (follow_dotdot_rcu(nd)) return -ECHILD; } else - follow_dotdot(nd); + return follow_dotdot(nd); } return 0; } @@ -2290,7 +2315,7 @@ mountpoint_last(struct nameidata *nd, struct path *path) if (unlikely(nd->last_type != LAST_NORM)) { error = handle_dots(nd, nd->last_type); if (error) - goto out; + return error; dentry = dget(nd->path.dentry); goto done; } From b23b63c222d2fc0d342ff8f4d6b0bf9bd894135f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 25 Sep 2015 23:02:19 +0100 Subject: [PATCH 1169/2091] arm64/efi: Fix boot crash by not padding between EFI_MEMORY_RUNTIME regions commit 0ce3cc008ec04258b6a6314b09f1a6012810881a upstream. The new Properties Table feature introduced in UEFIv2.5 may split memory regions that cover PE/COFF memory images into separate code and data regions. Since these regions only differ in the type (runtime code vs runtime data) and the permission bits, but not in the memory type attributes (UC/WC/WT/WB), the spec does not require them to be aligned to 64 KB. Since the relative offset of PE/COFF .text and .data segments cannot be changed on the fly, this means that we can no longer pad out those regions to be mappable using 64 KB pages. Unfortunately, there is no annotation in the UEFI memory map that identifies data regions that were split off from a code region, so we must apply this logic to all adjacent runtime regions whose attributes only differ in the permission bits. So instead of rounding each memory region to 64 KB alignment at both ends, only round down regions that are not directly preceded by another runtime region with the same type attributes. Since the UEFI spec does not mandate that the memory map be sorted, this means we also need to sort it first. Note that this change will result in all EFI_MEMORY_RUNTIME regions whose start addresses are not aligned to the OS page size to be mapped with executable permissions (i.e., on kernels compiled with 64 KB pages). However, since these mappings are only active during the time that UEFI Runtime Services are being invoked, the window for abuse is rather small. Tested-by: Mark Salter Tested-by: Mark Rutland [UEFI 2.4 only] Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Reviewed-by: Mark Salter Reviewed-by: Mark Rutland Cc: Catalin Marinas Cc: Leif Lindholm Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443218539-7610-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/efi.c | 3 +- drivers/firmware/efi/libstub/arm-stub.c | 88 ++++++++++++++++++++----- 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 352962bc2e78a..5170fd5c8e971 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -257,7 +257,8 @@ static bool __init efi_virtmap_init(void) */ if (!is_normal_ram(md)) prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) prot = PAGE_KERNEL_EXEC; else prot = PAGE_KERNEL; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index e29560e6b40b0..950c87f5d2793 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -13,6 +13,7 @@ */ #include +#include #include #include "efistub.h" @@ -305,6 +306,44 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, */ #define EFI_RT_VIRTUAL_BASE 0x40000000 +static int cmp_mem_desc(const void *l, const void *r) +{ + const efi_memory_desc_t *left = l, *right = r; + + return (left->phys_addr > right->phys_addr) ? 1 : -1; +} + +/* + * Returns whether region @left ends exactly where region @right starts, + * or false if either argument is NULL. + */ +static bool regions_are_adjacent(efi_memory_desc_t *left, + efi_memory_desc_t *right) +{ + u64 left_end; + + if (left == NULL || right == NULL) + return false; + + left_end = left->phys_addr + left->num_pages * EFI_PAGE_SIZE; + + return left_end == right->phys_addr; +} + +/* + * Returns whether region @left and region @right have compatible memory type + * mapping attributes, and are both EFI_MEMORY_RUNTIME regions. + */ +static bool regions_have_compatible_memory_type_attrs(efi_memory_desc_t *left, + efi_memory_desc_t *right) +{ + static const u64 mem_type_mask = EFI_MEMORY_WB | EFI_MEMORY_WT | + EFI_MEMORY_WC | EFI_MEMORY_UC | + EFI_MEMORY_RUNTIME; + + return ((left->attribute ^ right->attribute) & mem_type_mask) == 0; +} + /* * efi_get_virtmap() - create a virtual mapping for the EFI memory map * @@ -317,33 +356,52 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, int *count) { u64 efi_virt_base = EFI_RT_VIRTUAL_BASE; - efi_memory_desc_t *out = runtime_map; + efi_memory_desc_t *in, *prev = NULL, *out = runtime_map; int l; - for (l = 0; l < map_size; l += desc_size) { - efi_memory_desc_t *in = (void *)memory_map + l; + /* + * To work around potential issues with the Properties Table feature + * introduced in UEFI 2.5, which may split PE/COFF executable images + * in memory into several RuntimeServicesCode and RuntimeServicesData + * regions, we need to preserve the relative offsets between adjacent + * EFI_MEMORY_RUNTIME regions with the same memory type attributes. + * The easiest way to find adjacent regions is to sort the memory map + * before traversing it. + */ + sort(memory_map, map_size / desc_size, desc_size, cmp_mem_desc, NULL); + + for (l = 0; l < map_size; l += desc_size, prev = in) { u64 paddr, size; + in = (void *)memory_map + l; if (!(in->attribute & EFI_MEMORY_RUNTIME)) continue; + paddr = in->phys_addr; + size = in->num_pages * EFI_PAGE_SIZE; + /* * Make the mapping compatible with 64k pages: this allows * a 4k page size kernel to kexec a 64k page size kernel and * vice versa. */ - paddr = round_down(in->phys_addr, SZ_64K); - size = round_up(in->num_pages * EFI_PAGE_SIZE + - in->phys_addr - paddr, SZ_64K); - - /* - * Avoid wasting memory on PTEs by choosing a virtual base that - * is compatible with section mappings if this region has the - * appropriate size and physical alignment. (Sections are 2 MB - * on 4k granule kernels) - */ - if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) - efi_virt_base = round_up(efi_virt_base, SZ_2M); + if (!regions_are_adjacent(prev, in) || + !regions_have_compatible_memory_type_attrs(prev, in)) { + + paddr = round_down(in->phys_addr, SZ_64K); + size += in->phys_addr - paddr; + + /* + * Avoid wasting memory on PTEs by choosing a virtual + * base that is compatible with section mappings if this + * region has the appropriate size and physical + * alignment. (Sections are 2 MB on 4k granule kernels) + */ + if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) + efi_virt_base = round_up(efi_virt_base, SZ_2M); + else + efi_virt_base = round_up(efi_virt_base, SZ_64K); + } in->virt_addr = efi_virt_base + in->phys_addr - paddr; efi_virt_base += size; From 249af812dcf37ef58f24f152d07c585cdb4f2153 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Wed, 30 Sep 2015 10:49:55 +0800 Subject: [PATCH 1170/2091] arm64: ftrace: fix function_graph tracer panic commit ee556d00cf20012e889344a0adbbf809ab5015a3 upstream. When function graph tracer is enabled, the following operation will trigger panic: mount -t debugfs nodev /sys/kernel echo next_tgid > /sys/kernel/tracing/set_ftrace_filter echo function_graph > /sys/kernel/tracing/current_tracer ls /proc/ ------------[ cut here ]------------ [ 198.501417] Unable to handle kernel paging request at virtual address cb88537fdc8ba316 [ 198.506126] pgd = ffffffc008f79000 [ 198.509363] [cb88537fdc8ba316] *pgd=00000000488c6003, *pud=00000000488c6003, *pmd=0000000000000000 [ 198.517726] Internal error: Oops: 94000005 [#1] SMP [ 198.518798] Modules linked in: [ 198.520582] CPU: 1 PID: 1388 Comm: ls Tainted: G [ 198.521800] Hardware name: linux,dummy-virt (DT) [ 198.522852] task: ffffffc0fa9e8000 ti: ffffffc0f9ab0000 task.ti: ffffffc0f9ab0000 [ 198.524306] PC is at next_tgid+0x30/0x100 [ 198.525205] LR is at return_to_handler+0x0/0x20 [ 198.526090] pc : [] lr : [] pstate: 60000145 [ 198.527392] sp : ffffffc0f9ab3d40 [ 198.528084] x29: ffffffc0f9ab3d40 x28: ffffffc0f9ab0000 [ 198.529406] x27: ffffffc000d6a000 x26: ffffffc000b786e8 [ 198.530659] x25: ffffffc0002a1900 x24: ffffffc0faf16c00 [ 198.531942] x23: ffffffc0f9ab3ea0 x22: 0000000000000002 [ 198.533202] x21: ffffffc000d85050 x20: 0000000000000002 [ 198.534446] x19: 0000000000000002 x18: 0000000000000000 [ 198.535719] x17: 000000000049fa08 x16: ffffffc000242efc [ 198.537030] x15: 0000007fa472b54c x14: ffffffffff000000 [ 198.538347] x13: ffffffc0fada84a0 x12: 0000000000000001 [ 198.539634] x11: ffffffc0f9ab3d70 x10: ffffffc0f9ab3d70 [ 198.540915] x9 : ffffffc0000907c0 x8 : ffffffc0f9ab3d40 [ 198.542215] x7 : 0000002e330f08f0 x6 : 0000000000000015 [ 198.543508] x5 : 0000000000000f08 x4 : ffffffc0f9835ec0 [ 198.544792] x3 : cb88537fdc8ba316 x2 : cb88537fdc8ba306 [ 198.546108] x1 : 0000000000000002 x0 : ffffffc000d85050 [ 198.547432] [ 198.547920] Process ls (pid: 1388, stack limit = 0xffffffc0f9ab0020) [ 198.549170] Stack: (0xffffffc0f9ab3d40 to 0xffffffc0f9ab4000) [ 198.582568] Call trace: [ 198.583313] [] next_tgid+0x30/0x100 [ 198.584359] [] ftrace_graph_caller+0x6c/0x70 [ 198.585503] [] ftrace_graph_caller+0x6c/0x70 [ 198.586574] [] ftrace_graph_caller+0x6c/0x70 [ 198.587660] [] ftrace_graph_caller+0x6c/0x70 [ 198.588896] Code: aa0003f5 2a0103f4 b4000102 91004043 (885f7c60) [ 198.591092] ---[ end trace 6a346f8f20949ac8 ]--- This is because when using function graph tracer, if the traced function return value is in multi regs ([x0-x7]), return_to_handler may corrupt them. So in return_to_handler, the parameter regs should be protected properly. Signed-off-by: Li Bin Acked-by: AKASHI Takahiro Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry-ftrace.S | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 08cafc518b9a5..0f03a8fe23144 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -178,6 +178,24 @@ ENTRY(ftrace_stub) ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* save return value regs*/ + .macro save_return_regs + sub sp, sp, #64 + stp x0, x1, [sp] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + add sp, sp, #64 + .endm + /* * void ftrace_graph_caller(void) * @@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller) * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) - str x0, [sp, #-16]! + save_return_regs mov x0, x29 // parent's fp bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); mov x30, x0 // restore the original return address - ldr x0, [sp], #16 + restore_return_regs ret END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From f01570729d1ee1293c25517087672f73277d354f Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Mon, 21 Sep 2015 21:39:50 +0100 Subject: [PATCH 1171/2091] arm64: readahead: fault retry breaks mmap file read random detection commit 569ba74a7ba69f46ce2950bf085b37fea2408385 upstream. This is the arm64 portion of commit 45cac65b0fcd ("readahead: fault retry breaks mmap file read random detection"), which was absent from the initial port and has since gone unnoticed. The original commit says: > .fault now can retry. The retry can break state machine of .fault. In > filemap_fault, if page is miss, ra->mmap_miss is increased. In the second > try, since the page is in page cache now, ra->mmap_miss is decreased. And > these are done in one fault, so we can't detect random mmap file access. > > Add a new flag to indicate .fault is tried once. In the second try, skip > ra->mmap_miss decreasing. The filemap_fault state machine is ok with it. With this change, Mark reports that: > Random read improves by 250%, sequential read improves by 40%, and > random write by 400% to an eMMC device with dm crypto wrapped around it. Cc: Shaohua Li Cc: Rik van Riel Cc: Wu Fengguang Signed-off-by: Mark Salyzyn Signed-off-by: Riley Andrews Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 96da13167d4a5..fa5efaa5c3ac5 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -279,6 +279,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } From 59c73a0acf07e06480bb0e253f059317827aa623 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 23 Sep 2015 23:12:09 +0200 Subject: [PATCH 1172/2091] m68k: Define asmlinkage_protect commit 8474ba74193d302e8340dddd1e16c85cc4b98caf upstream. Make sure the compiler does not modify arguments of syscall functions. This can happen if the compiler generates a tailcall to another function. For example, without asmlinkage_protect sys_openat is compiled into this function: sys_openat: clr.l %d0 move.w 18(%sp),%d0 move.l %d0,16(%sp) jbra do_sys_open Note how the fourth argument is modified in place, modifying the register %d4 that gets restored from this stack slot when the function returns to user-space. The caller may expect the register to be unmodified across system calls. Signed-off-by: Andreas Schwab Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/linkage.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f72..066e74f666ae9 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif From 9373e7b420ec7dc8b9c20c5eae472c2906c67893 Mon Sep 17 00:00:00 2001 From: Chas Williams <3chas3@gmail.com> Date: Thu, 27 Aug 2015 12:28:46 -0400 Subject: [PATCH 1173/2091] net/xen-netfront: only napi_synchronize() if running commit 274b045509175db0405c784be85e8cce116e6f7d upstream. If an interface isn't running napi_synchronize() will hang forever. [ 392.248403] rmmod R running task 0 359 343 0x00000000 [ 392.257671] ffff88003760fc88 ffff880037193b40 ffff880037193160 ffff88003760fc88 [ 392.267644] ffff880037610000 ffff88003760fcd8 0000000100014c22 ffffffff81f75c40 [ 392.277524] 0000000000bc7010 ffff88003760fca8 ffffffff81796927 ffffffff81f75c40 [ 392.287323] Call Trace: [ 392.291599] [] schedule+0x37/0x90 [ 392.298553] [] schedule_timeout+0x14b/0x280 [ 392.306421] [] ? irq_free_descs+0x69/0x80 [ 392.314006] [] ? internal_add_timer+0xb0/0xb0 [ 392.322125] [] msleep+0x37/0x50 [ 392.329037] [] xennet_disconnect_backend.isra.24+0xda/0x390 [xen_netfront] [ 392.339658] [] xennet_remove+0x2c/0x80 [xen_netfront] [ 392.348516] [] xenbus_dev_remove+0x59/0xc0 [ 392.356257] [] __device_release_driver+0x87/0x120 [ 392.364645] [] driver_detach+0xb8/0xc0 [ 392.371989] [] bus_remove_driver+0x59/0xe0 [ 392.379883] [] driver_unregister+0x30/0x70 [ 392.387495] [] xenbus_unregister_driver+0x12/0x20 [ 392.395908] [] netif_exit+0x10/0x775 [xen_netfront] [ 392.404877] [] SyS_delete_module+0x1d8/0x230 [ 392.412804] [] system_call_fastpath+0x12/0x71 Signed-off-by: Chas Williams <3chas3@gmail.com> Signed-off-by: David S. Miller Cc: "Kamata, Munehisa" Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e031c943286ef..52f081f4dfd58 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1353,7 +1353,8 @@ static void xennet_disconnect_backend(struct netfront_info *info) queue->tx_evtchn = queue->rx_evtchn = 0; queue->tx_irq = queue->rx_irq = 0; - napi_synchronize(&queue->napi); + if (netif_running(info->netdev)) + napi_synchronize(&queue->napi); xennet_release_tx_bufs(queue); xennet_release_rx_bufs(queue); From 5f9611c8005b4d066f7d7fd8384d9a5486acc5f6 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Fri, 10 Jul 2015 15:01:12 +0200 Subject: [PATCH 1174/2091] igb: do not re-init SR-IOV during probe commit 6423fc34160939142d72ffeaa2db6408317f54df upstream. During driver probing the following code path is triggered. igb_probe ->igb_sw_init ->igb_probe_vfs ->igb_pci_enable_sriov ->igb_sriov_reinit Doing the SR-IOV re-init is not necessary during probing since we're starting from scratch. Here we can call igb_enable_sriov() right away. Running igb_sriov_reinit() during igb_probe() also seems to cause occasional packet loss on some onboard 82576 NICs. Reproduced on Dell and HP servers with onboard 82576 NICs. Example: Intel Corporation 82576 Gigabit Network Connection [8086:10c9] (rev 01) Subsystem: Dell Device [1028:0481] Signed-off-by: Stefan Assmann Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Daniel J Blueman Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 4f6bf996851ec..7dfbcde345093 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2864,7 +2864,7 @@ static void igb_probe_vfs(struct igb_adapter *adapter) return; pci_sriov_set_totalvfs(pdev, 7); - igb_pci_enable_sriov(pdev, max_vfs); + igb_enable_sriov(pdev, max_vfs); #endif /* CONFIG_PCI_IOV */ } From 0cf68c236f11431677c2dee17161cec5df42a5b9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 26 Sep 2015 12:23:56 +0100 Subject: [PATCH 1175/2091] genirq: Fix race in register_irq_proc() commit 95c2b17534654829db428f11bcf4297c059a2a7e upstream. Per-IRQ directories in procfs are created only when a handler is first added to the irqdesc, not when the irqdesc is created. In the case of a shared IRQ, multiple tasks can race to create a directory. This race condition seems to have been present forever, but is easier to hit with async probing. Signed-off-by: Ben Hutchings Link: http://lkml.kernel.org/r/1443266636.2004.2.camel@decadent.org.uk Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/proc.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index df2f4642d1e7b..5c38f59741e25 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internals.h" @@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_MUTEX(register_lock); char name [MAX_NAMELEN]; - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + memset(name, 0, MAX_NAMELEN); sprintf(name, "%d", irq); /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - return; + goto out_unlock; #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ @@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) From 583c46f9ce3818c85da204eeb19adfe1d4ea860a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 1 Oct 2015 16:03:38 +1000 Subject: [PATCH 1176/2091] md/bitmap: don't pass -1 to bitmap_storage_alloc. commit da6fb7a9e5bd6f04f7e15070f630bdf1ea502841 upstream. Passing -1 to bitmap_storage_alloc() causes page->index to be set to -1, which is quite problematic. So only pass ->cluster_slot if mddev_is_clustered(). Fixes: b97e92574c0b ("Use separate bitmaps for each nodes in the cluster") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/bitmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index c90118e907081..a7621a2589365 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -2000,7 +2000,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) ret = bitmap_storage_alloc(&store, chunks, !bitmap->mddev->bitmap_info.external, - bitmap->cluster_slot); + mddev_is_clustered(bitmap->mddev) + ? bitmap->cluster_slot : 0); if (ret) goto err; From 0d2ea357d7aaaa93c81bae718f3eb88f90e337e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Jun 2015 14:32:23 +0100 Subject: [PATCH 1177/2091] overlay: Call ovl_drop_write() earlier in ovl_dentry_open() commit f25801ee4680ef1db21e15c112e6e5fe3ffe8da5 upstream. Call ovl_drop_write() earlier in ovl_dentry_open() before we call vfs_open() as we've done the copy up for which we needed the freeze-write lock by that point. Signed-off-by: David Howells Signed-off-by: Al Viro Cc: "Kamata, Munehisa" Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/inode.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 04f1248846877..a39224f9c90cc 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -342,31 +342,25 @@ static int ovl_dentry_open(struct dentry *dentry, struct file *file, int err; struct path realpath; enum ovl_path_type type; - bool want_write = false; type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { - want_write = true; err = ovl_want_write(dentry); if (err) - goto out; + return err; if (file->f_flags & O_TRUNC) err = ovl_copy_up_last(dentry, NULL, true); else err = ovl_copy_up(dentry); + ovl_drop_write(dentry); if (err) - goto out_drop_write; + return err; ovl_path_upper(dentry, &realpath); } - err = vfs_open(&realpath, file, cred); -out_drop_write: - if (want_write) - ovl_drop_write(dentry); -out: - return err; + return vfs_open(&realpath, file, cred); } static const struct inode_operations ovl_file_inode_operations = { From 9abb3b81094857a1e2d7dea5b2a8605e29d8c77d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Jun 2015 14:32:31 +0100 Subject: [PATCH 1178/2091] overlayfs: Make f_path always point to the overlay and f_inode to the underlay commit 4bacc9c9234c7c8eec44f5ed4e960d9f96fa0f01 upstream. Make file->f_path always point to the overlay dentry so that the path in /proc/pid/fd is correct and to ensure that label-based LSMs have access to the overlay as well as the underlay (path-based LSMs probably don't need it). Using my union testsuite to set things up, before the patch I see: [root@andromeda union-testsuite]# bash 5 /a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 13381 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 13381 Links: 1 ... After the patch: [root@andromeda union-testsuite]# bash 5 /mnt/a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 40346 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 40346 Links: 1 ... Note the change in where /proc/$$/fd/5 points to in the ls command. It was pointing to /a/foo107 (which doesn't exist) and now points to /mnt/a/foo107 (which is correct). The inode accessed, however, is the lower layer. The union layer is on device 25h/37d and the upper layer on 24h/36d. Signed-off-by: David Howells Signed-off-by: Al Viro Cc: "Kamata, Munehisa" Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 5 +++- fs/internal.h | 1 + fs/open.c | 49 +++++++++++++++++++++------------------- fs/overlayfs/inode.c | 14 +++++------- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/super.c | 1 + include/linux/dcache.h | 2 ++ include/linux/fs.h | 2 -- 8 files changed, 41 insertions(+), 34 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 2e8ddc1d09e92..0046ab7d4f3de 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1676,7 +1676,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_DELETE )); + DCACHE_OP_DELETE | + DCACHE_OP_SELECT_INODE)); dentry->d_op = op; if (!op) return; @@ -1692,6 +1693,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; + if (op->d_select_inode) + dentry->d_flags |= DCACHE_OP_SELECT_INODE; } EXPORT_SYMBOL(d_set_d_op); diff --git a/fs/internal.h b/fs/internal.h index 01dce1d1476b7..4d5af583ab031 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -107,6 +107,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); extern int open_check_o_direct(struct file *f); +extern int vfs_open(const struct path *, struct file *, const struct cred *); /* * inode.c diff --git a/fs/open.c b/fs/open.c index 98e5a52dc68c9..f9d2bf9350996 100644 --- a/fs/open.c +++ b/fs/open.c @@ -678,18 +678,18 @@ int open_check_o_direct(struct file *f) } static int do_dentry_open(struct file *f, + struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; - struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); - inode = f->f_inode = f->f_path.dentry->d_inode; + f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { @@ -793,7 +793,8 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - error = do_dentry_open(file, open, current_cred()); + error = do_dentry_open(file, d_backing_inode(dentry), open, + current_cred()); if (!error) *opened |= FILE_OPENED; @@ -822,6 +823,28 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); +/** + * vfs_open - open the file at the given path + * @path: path to open + * @file: newly allocated file with f_flag initialized + * @cred: credentials to use + */ +int vfs_open(const struct path *path, struct file *file, + const struct cred *cred) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = dentry->d_inode; + + file->f_path = *path; + if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { + inode = dentry->d_op->d_select_inode(dentry, file->f_flags); + if (IS_ERR(inode)) + return PTR_ERR(inode); + } + + return do_dentry_open(file, inode, NULL, cred); +} + struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { @@ -853,26 +876,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -/** - * vfs_open - open the file at the given path - * @path: path to open - * @filp: newly allocated file with f_flag initialized - * @cred: credentials to use - */ -int vfs_open(const struct path *path, struct file *filp, - const struct cred *cred) -{ - struct inode *inode = path->dentry->d_inode; - - if (inode->i_op->dentry_open) - return inode->i_op->dentry_open(path->dentry, filp, cred); - else { - filp->f_path = *path; - return do_dentry_open(filp, NULL, cred); - } -} -EXPORT_SYMBOL(vfs_open); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a39224f9c90cc..4febea2941024 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -336,31 +336,30 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, return true; } -static int ovl_dentry_open(struct dentry *dentry, struct file *file, - const struct cred *cred) +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) { int err; struct path realpath; enum ovl_path_type type; type = ovl_path_real(dentry, &realpath); - if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { + if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); if (err) - return err; + return ERR_PTR(err); - if (file->f_flags & O_TRUNC) + if (file_flags & O_TRUNC) err = ovl_copy_up_last(dentry, NULL, true); else err = ovl_copy_up(dentry); ovl_drop_write(dentry); if (err) - return err; + return ERR_PTR(err); ovl_path_upper(dentry, &realpath); } - return vfs_open(&realpath, file, cred); + return d_backing_inode(realpath.dentry); } static const struct inode_operations ovl_file_inode_operations = { @@ -371,7 +370,6 @@ static const struct inode_operations ovl_file_inode_operations = { .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, .removexattr = ovl_removexattr, - .dentry_open = ovl_dentry_open, }; static const struct inode_operations ovl_symlink_inode_operations = { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 17ac5afc9ffbc..ea5a40b06e3ad 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,6 +173,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, struct ovl_entry *oe); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 155989455a721..33f2d27a6792f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -275,6 +275,7 @@ static void ovl_dentry_release(struct dentry *dentry) static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, + .d_select_inode = ovl_d_select_inode, }; static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d0..167ec0934049d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct inode *(*d_select_inode)(struct dentry *, unsigned); } ____cacheline_aligned; /* @@ -225,6 +226,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ extern seqlock_t rename_lock; diff --git a/include/linux/fs.h b/include/linux/fs.h index 571aab91bfc03..f93192333b37f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,7 +1641,6 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); /* WARNING: probably going away soon, do not use! */ - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2193,7 +2192,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); From aaf19f122d2e826cab6a56dabfdfcd485430a9bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:39:45 -0400 Subject: [PATCH 1179/2091] fix a braino in ovl_d_select_inode() commit 9391dd00d13c853ab4f2a85435288ae2202e0e43 upstream. when opening a directory we want the overlayfs inode, not one from the topmost layer. Reported-By: Andrey Jr. Melnikov Tested-By: Andrey Jr. Melnikov Signed-off-by: Al Viro Cc: "Kamata, Munehisa" Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 4febea2941024..ba0db26389465 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -342,6 +342,9 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) struct path realpath; enum ovl_path_type type; + if (d_is_dir(dentry)) + return d_backing_inode(dentry); + type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); From 863e9b4f5ab7e6c7230b289397b06ca4c77a671d Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 14 Sep 2015 20:12:21 +0800 Subject: [PATCH 1180/2091] nfs/filelayout: Fix NULL reference caused by double freeing of fh_array commit 3ec0c97959abff33a42db9081c22132bcff5b4f2 upstream. If filelayout_decode_layout fail, _filelayout_free_lseg will causes a double freeing of fh_array. [ 1179.279800] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1179.280198] IP: [] filelayout_free_fh_array.isra.11+0x1d/0x70 [nfs_layout_nfsv41_files] [ 1179.281010] PGD 0 [ 1179.281443] Oops: 0000 [#1] [ 1179.281831] Modules linked in: nfs_layout_nfsv41_files(OE) nfsv4(OE) nfs(OE) fscache(E) xfs libcrc32c coretemp nfsd crct10dif_pclmul ppdev crc32_pclmul crc32c_intel auth_rpcgss ghash_clmulni_intel nfs_acl lockd vmw_balloon grace sunrpc parport_pc vmw_vmci parport shpchp i2c_piix4 vmwgfx drm_kms_helper ttm drm serio_raw mptspi scsi_transport_spi mptscsih e1000 mptbase ata_generic pata_acpi [last unloaded: fscache] [ 1179.283891] CPU: 0 PID: 13336 Comm: cat Tainted: G OE 4.3.0-rc1-pnfs+ #244 [ 1179.284323] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/20/2014 [ 1179.285206] task: ffff8800501d48c0 ti: ffff88003e3c4000 task.ti: ffff88003e3c4000 [ 1179.285668] RIP: 0010:[] [] filelayout_free_fh_array.isra.11+0x1d/0x70 [nfs_layout_nfsv41_files] [ 1179.286612] RSP: 0018:ffff88003e3c77f8 EFLAGS: 00010202 [ 1179.287092] RAX: 0000000000000000 RBX: ffff88001fe78900 RCX: 0000000000000000 [ 1179.287731] RDX: ffffea0000f40760 RSI: ffff88001fe789c8 RDI: ffff88001fe789c0 [ 1179.288383] RBP: ffff88003e3c7810 R08: ffffea0000f40760 R09: 0000000000000000 [ 1179.289170] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88001fe789c8 [ 1179.289959] R13: ffff88001fe789c0 R14: ffff88004ec05a80 R15: ffff88004f935b88 [ 1179.290791] FS: 00007f4e66bb5700(0000) GS:ffffffff81c29000(0000) knlGS:0000000000000000 [ 1179.291580] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1179.292209] CR2: 0000000000000000 CR3: 00000000203f8000 CR4: 00000000001406f0 [ 1179.292731] Stack: [ 1179.293195] ffff88001fe78900 00000000000000d0 ffff88001fe78178 ffff88003e3c7868 [ 1179.293676] ffffffffa0272737 0000000000000001 0000000000000001 ffff88001fe78800 [ 1179.294151] 00000000614fffce ffffffff81727671 ffff88001fe78100 ffff88001fe78100 [ 1179.294623] Call Trace: [ 1179.295092] [] filelayout_alloc_lseg+0xa7/0x2d0 [nfs_layout_nfsv41_files] [ 1179.295625] [] ? out_of_line_wait_on_bit+0x81/0xb0 [ 1179.296133] [] pnfs_layout_process+0xae/0x320 [nfsv4] [ 1179.296632] [] nfs4_proc_layoutget+0x2b1/0x360 [nfsv4] [ 1179.297134] [] pnfs_update_layout+0x853/0xb30 [nfsv4] [ 1179.297632] [] ? nfs_get_lock_context+0x74/0x170 [nfs] [ 1179.298158] [] filelayout_pg_init_read+0x37/0x50 [nfs_layout_nfsv41_files] [ 1179.298834] [] __nfs_pageio_add_request+0x119/0x460 [nfs] [ 1179.299385] [] ? nfs_create_request.part.9+0x37/0x2e0 [nfs] [ 1179.299872] [] nfs_pageio_add_request+0xa3/0x1b0 [nfs] [ 1179.300362] [] readpage_async_filler+0x85/0x260 [nfs] [ 1179.300907] [] read_cache_pages+0x91/0xd0 [ 1179.301391] [] ? nfs_read_completion+0x220/0x220 [nfs] [ 1179.301867] [] nfs_readpages+0x128/0x200 [nfs] [ 1179.302330] [] __do_page_cache_readahead+0x203/0x280 [ 1179.302784] [] ? __do_page_cache_readahead+0xd8/0x280 [ 1179.303413] [] ondemand_readahead+0x1a6/0x2f0 [ 1179.303855] [] page_cache_sync_readahead+0x31/0x50 [ 1179.304286] [] generic_file_read_iter+0x4a6/0x5c0 [ 1179.304711] [] ? __nfs_revalidate_mapping+0x1f6/0x240 [nfs] [ 1179.305132] [] nfs_file_read+0x52/0xa0 [nfs] [ 1179.305540] [] __vfs_read+0xcc/0x100 [ 1179.305936] [] vfs_read+0x85/0x130 [ 1179.306326] [] SyS_read+0x58/0xd0 [ 1179.306708] [] entry_SYSCALL_64_fastpath+0x12/0x76 [ 1179.307094] Code: c4 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 8b 07 49 89 f4 85 c0 74 47 48 8b 06 49 89 fd <48> 8b 38 48 85 ff 74 22 31 db eb 0c 48 63 d3 48 8b 3c d0 48 85 [ 1179.308357] RIP [] filelayout_free_fh_array.isra.11+0x1d/0x70 [nfs_layout_nfsv41_files] [ 1179.309177] RSP [ 1179.309582] CR2: 0000000000000000 Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust Cc: William Dauchy Signed-off-by: Greg Kroah-Hartman --- fs/nfs/filelayout/filelayout.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a46bf6de9ce45..fb1fb2774d346 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -628,23 +628,18 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } -static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) { int i; - for (i = 0; i < fl->num_fh; i++) { - if (!fl->fh_array[i]) - break; - kfree(fl->fh_array[i]); + if (fl->fh_array) { + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); } - kfree(fl->fh_array); - fl->fh_array = NULL; -} - -static void -_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) -{ - filelayout_free_fh_array(fl); kfree(fl); } @@ -715,21 +710,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, /* Do we want to use a mempool here? */ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) - goto out_err_free; + goto out_err; p = xdr_inline_decode(&stream, 4); if (unlikely(!p)) - goto out_err_free; + goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); - goto out_err_free; + goto out_err; } p = xdr_inline_decode(&stream, fl->fh_array[i]->size); if (unlikely(!p)) - goto out_err_free; + goto out_err; memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); dprintk("DEBUG: %s: fh len %d\n", __func__, fl->fh_array[i]->size); @@ -738,8 +733,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, __free_page(scratch); return 0; -out_err_free: - filelayout_free_fh_array(fl); out_err: __free_page(scratch); return -EIO; From 645b9d3806330fc9fb5bc0b30ff6845e344888de Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 29 Sep 2015 15:01:08 +0100 Subject: [PATCH 1181/2091] clk: ti: fix dual-registration of uart4_ick commit 19e79687de22f23bcfb5e79cce3daba20af228d1 upstream. On the OMAP AM3517 platform the uart4_ick gets registered twice, causing any power management to /dev/ttyO3 to fail when trying to wake the device up. This solves the following oops: [] Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa09e008 [] PC is at serial_omap_pm+0x48/0x15c [] LR is at _raw_spin_unlock_irqrestore+0x30/0x5c Fixes: aafd900cab87 ("CLK: TI: add omap3 clock init file") Cc: mturquette@baylibre.com Cc: sboyd@codeaurora.org Cc: linux-clk@vger.kernel.org Cc: linux-omap@vger.kernel.org Cc: linux-kernel@lists.codethink.co.uk Signed-off-by: Ben Dooks Signed-off-by: Tero Kristo Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ti/clk-3xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 757636d166cff..4ab28cfb8d2a6 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -163,7 +163,6 @@ static struct ti_dt_clk omap3xxx_clks[] = { DT_CLK(NULL, "gpio2_ick", "gpio2_ick"), DT_CLK(NULL, "wdt3_ick", "wdt3_ick"), DT_CLK(NULL, "uart3_ick", "uart3_ick"), - DT_CLK(NULL, "uart4_ick", "uart4_ick"), DT_CLK(NULL, "gpt9_ick", "gpt9_ick"), DT_CLK(NULL, "gpt8_ick", "gpt8_ick"), DT_CLK(NULL, "gpt7_ick", "gpt7_ick"), @@ -308,6 +307,7 @@ static struct ti_dt_clk am35xx_clks[] = { static struct ti_dt_clk omap36xx_clks[] = { DT_CLK(NULL, "omap_192m_alwon_fck", "omap_192m_alwon_fck"), DT_CLK(NULL, "uart4_fck", "uart4_fck"), + DT_CLK(NULL, "uart4_ick", "uart4_ick"), { .node_name = NULL }, }; From 2058efbcb0eb148db2086266cfb048da42fd0a93 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Oct 2015 13:44:34 -0400 Subject: [PATCH 1182/2091] namei: results of d_is_negative() should be checked after dentry revalidation commit daf3761c9fcde0f4ca64321cbed6c1c86d304193 upstream. Leandro Awa writes: "After switching to version 4.1.6, our parallelized and distributed workflows now fail consistently with errors of the form: T34: ./regex.c:39:22: error: config.h: No such file or directory From our 'git bisect' testing, the following commit appears to be the possible cause of the behavior we've been seeing: commit 766c4cbfacd8" Al Viro says: "What happens is that 766c4cbfacd8 got the things subtly wrong. We used to treat d_is_negative() after lookup_fast() as "fall with ENOENT". That was wrong - checking ->d_flags outside of ->d_seq protection is unreliable and failing with hard error on what should've fallen back to non-RCU pathname resolution is a bug. Unfortunately, we'd pulled the test too far up and ran afoul of another kind of staleness. The dentry might have been absolutely stable from the RCU point of view (and we might be on UP, etc), but stale from the remote fs point of view. If ->d_revalidate() returns "it's actually stale", dentry gets thrown away and the original code wouldn't even have looked at its ->d_flags. What we need is to check ->d_flags where 766c4cbfacd8 does (prior to ->d_seq validation) but only use the result in cases where we do not discard this dentry outright" Reported-by: Leandro Awa Link: https://bugzilla.kernel.org/show_bug.cgi?id=104911 Fixes: 766c4cbfacd8 ("namei: d_is_negative() should be checked...") Tested-by: Leandro Awa Signed-off-by: Trond Myklebust Acked-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index acdab610521b4..ccd7f98d85b9f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1453,8 +1453,6 @@ static int lookup_fast(struct nameidata *nd, negative = d_is_negative(dentry); if (read_seqcount_retry(&dentry->d_seq, seq)) return -ECHILD; - if (negative) - return -ENOENT; /* * This sequence count validates that the parent had no @@ -1475,6 +1473,12 @@ static int lookup_fast(struct nameidata *nd, goto unlazy; } } + /* + * Note: do negative dentry check after revalidation in + * case that drops it. + */ + if (negative) + return -ENOENT; path->mnt = mnt; path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode))) From 16d4c27cb8ca5b7955efbf4389e1537ff948f7ef Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Thu, 1 Oct 2015 08:31:51 +0000 Subject: [PATCH 1183/2091] dm: fix AB-BA deadlock in __dm_destroy() commit 2a708cff93f1845b9239bc7d6310aef54e716c6a upstream. __dm_destroy() takes io_barrier SRCU lock (dm_get_live_table) and suspend_lock in reverse order. Doing so can cause AB-BA deadlock: __dm_destroy dm_swap_table --------------------------------------------------- mutex_lock(suspend_lock) dm_get_live_table() srcu_read_lock(io_barrier) dm_sync_table() synchronize_srcu(io_barrier) .. waiting for dm_put_live_table() mutex_lock(suspend_lock) .. waiting for suspend_lock Fix this by taking the locks in proper order. Signed-off-by: Jun'ichi Nomura Fixes: ab7c7bb6f4ab ("dm: hold suspend_lock while suspending device during device deletion") Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 697f34fba06b7..8b72ceee0f61e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2925,8 +2925,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - map = dm_get_live_table(md, &srcu_idx); - spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); @@ -2940,14 +2938,14 @@ static void __dm_destroy(struct mapped_device *md, bool wait) * do not race with internal suspend. */ mutex_lock(&md->suspend_lock); + map = dm_get_live_table(md, &srcu_idx); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } - mutex_unlock(&md->suspend_lock); - /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); + mutex_unlock(&md->suspend_lock); /* * Rare, but there may be I/O requests still going to complete, From 383f72c17c5f1b4e27f8dd5887a78ce6ce577179 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 9 Oct 2015 14:03:38 +0100 Subject: [PATCH 1184/2091] dm cache: fix NULL pointer when switching from cleaner policy commit 2bffa1503c5c06192eb1459180fac4416575a966 upstream. The cleaner policy doesn't make use of the per cache block hint space in the metadata (unlike the other policies). When switching from the cleaner policy to mq or smq a NULL pointer crash (in dm_tm_new_block) was observed. The crash was caused by bugs in dm-cache-metadata.c when trying to skip creation of the hint btree. The minimal fix is to change hint size for the cleaner policy to 4 bytes (only hint size supported). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-policy-cleaner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index 004e463c9423c..8308f4b434ec3 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -435,7 +435,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, static struct dm_cache_policy_type wb_policy_type = { .name = "cleaner", .version = {1, 0, 0}, - .hint_size = 0, + .hint_size = 4, .owner = THIS_MODULE, .create = wb_create }; From a0533fb8cf60edd38790a8b4a6ebf4b78d1f771c Mon Sep 17 00:00:00 2001 From: "covici@ccs.covici.com" Date: Wed, 20 May 2015 05:44:11 -0400 Subject: [PATCH 1185/2091] staging: speakup: fix speakup-r regression commit b1d562acc78f0af46de0dfe447410bc40bdb7ece upstream. Here is a patch to make speakup-r work again. It broke in 3.6 due to commit 4369c64c79a22b98d3b7eff9d089196cd878a10a "Input: Send events one packet at a time) The problem was that the fakekey.c routine to fake a down arrow no longer functioned properly and putting the input_sync fixed it. Fixes: 4369c64c79a22b98d3b7eff9d089196cd878a10a Acked-by: Samuel Thibault Signed-off-by: John Covici Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/fakekey.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/speakup/fakekey.c b/drivers/staging/speakup/fakekey.c index 4299cf45f947d..5e1f16c36b49a 100644 --- a/drivers/staging/speakup/fakekey.c +++ b/drivers/staging/speakup/fakekey.c @@ -81,6 +81,7 @@ void speakup_fake_down_arrow(void) __this_cpu_write(reporting_keystroke, true); input_report_key(virt_keyboard, KEY_DOWN, PRESSED); input_report_key(virt_keyboard, KEY_DOWN, RELEASED); + input_sync(virt_keyboard); __this_cpu_write(reporting_keystroke, false); /* reenable preemption */ From 614ea4ea2c3face1f54dcc8e9a0ba134adce427e Mon Sep 17 00:00:00 2001 From: Kosuke Tatsukawa Date: Fri, 2 Oct 2015 08:27:05 +0000 Subject: [PATCH 1186/2091] tty: fix stall caused by missing memory barrier in drivers/tty/n_tty.c commit e81107d4c6bd098878af9796b24edc8d4a9524fd upstream. My colleague ran into a program stall on a x86_64 server, where n_tty_read() was waiting for data even if there was data in the buffer in the pty. kernel stack for the stuck process looks like below. #0 [ffff88303d107b58] __schedule at ffffffff815c4b20 #1 [ffff88303d107bd0] schedule at ffffffff815c513e #2 [ffff88303d107bf0] schedule_timeout at ffffffff815c7818 #3 [ffff88303d107ca0] wait_woken at ffffffff81096bd2 #4 [ffff88303d107ce0] n_tty_read at ffffffff8136fa23 #5 [ffff88303d107dd0] tty_read at ffffffff81368013 #6 [ffff88303d107e20] __vfs_read at ffffffff811a3704 #7 [ffff88303d107ec0] vfs_read at ffffffff811a3a57 #8 [ffff88303d107f00] sys_read at ffffffff811a4306 #9 [ffff88303d107f50] entry_SYSCALL_64_fastpath at ffffffff815c86d7 There seems to be two problems causing this issue. First, in drivers/tty/n_tty.c, __receive_buf() stores the data and updates ldata->commit_head using smp_store_release() and then checks the wait queue using waitqueue_active(). However, since there is no memory barrier, __receive_buf() could return without calling wake_up_interactive_poll(), and at the same time, n_tty_read() could start to wait in wait_woken() as in the following chart. __receive_buf() n_tty_read() ------------------------------------------------------------------------ if (waitqueue_active(&tty->read_wait)) /* Memory operations issued after the RELEASE may be completed before the RELEASE operation has completed */ add_wait_queue(&tty->read_wait, &wait); ... if (!input_available_p(tty, 0)) { smp_store_release(&ldata->commit_head, ldata->read_head); ... timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); ------------------------------------------------------------------------ The second problem is that n_tty_read() also lacks a memory barrier call and could also cause __receive_buf() to return without calling wake_up_interactive_poll(), and n_tty_read() to wait in wait_woken() as in the chart below. __receive_buf() n_tty_read() ------------------------------------------------------------------------ spin_lock_irqsave(&q->lock, flags); /* from add_wait_queue() */ ... if (!input_available_p(tty, 0)) { /* Memory operations issued after the RELEASE may be completed before the RELEASE operation has completed */ smp_store_release(&ldata->commit_head, ldata->read_head); if (waitqueue_active(&tty->read_wait)) __add_wait_queue(q, wait); spin_unlock_irqrestore(&q->lock,flags); /* from add_wait_queue() */ ... timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); ------------------------------------------------------------------------ There are also other places in drivers/tty/n_tty.c which have similar calls to waitqueue_active(), so instead of adding many memory barrier calls, this patch simply removes the call to waitqueue_active(), leaving just wake_up*() behind. This fixes both problems because, even though the memory access before or after the spinlocks in both wake_up*() and add_wait_queue() can sneak into the critical section, it cannot go past it and the critical section assures that they will be serialized (please see "INTER-CPU ACQUIRING BARRIER EFFECTS" in Documentation/memory-barriers.txt for a better explanation). Moreover, the resulting code is much simpler. Latency measurement using a ping-pong test over a pty doesn't show any visible performance drop. Signed-off-by: Kosuke Tatsukawa Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 16ed0b6c7f9ce..6b6c6606af5f9 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -343,8 +343,7 @@ static void n_tty_packet_mode_flush(struct tty_struct *tty) spin_lock_irqsave(&tty->ctrl_lock, flags); tty->ctrl_status |= TIOCPKT_FLUSHREAD; spin_unlock_irqrestore(&tty->ctrl_lock, flags); - if (waitqueue_active(&tty->link->read_wait)) - wake_up_interruptible(&tty->link->read_wait); + wake_up_interruptible(&tty->link->read_wait); } } @@ -1383,8 +1382,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c) put_tty_queue(c, ldata); smp_store_release(&ldata->canon_head, ldata->read_head); kill_fasync(&tty->fasync, SIGIO, POLL_IN); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible_poll(&tty->read_wait, POLLIN); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); return 0; } } @@ -1670,8 +1668,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp, if ((read_cnt(ldata) >= ldata->minimum_to_wake) || L_EXTPROC(tty)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible_poll(&tty->read_wait, POLLIN); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); } } @@ -1890,10 +1887,8 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) } /* The termios change make the tty ready for I/O */ - if (waitqueue_active(&tty->write_wait)) - wake_up_interruptible(&tty->write_wait); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible(&tty->read_wait); + wake_up_interruptible(&tty->write_wait); + wake_up_interruptible(&tty->read_wait); } /** From 9f98531e220596953ce62c5df084bf073d88f901 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sun, 4 Oct 2015 19:29:12 +0200 Subject: [PATCH 1187/2091] drivers/tty: require read access for controlling terminal commit 0c55627167870255158db1cde0d28366f91c8872 upstream. This is mostly a hardening fix, given that write-only access to other users' ttys is usually only given through setgid tty executables. Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index e5695467598f9..21837f14a403a 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2144,8 +2144,24 @@ static int tty_open(struct inode *inode, struct file *filp) if (!noctty && current->signal->leader && !current->signal->tty && - tty->session == NULL) - __proc_set_tty(tty); + tty->session == NULL) { + /* + * Don't let a process that only has write access to the tty + * obtain the privileges associated with having a tty as + * controlling terminal (being able to reopen it with full + * access through /dev/tty, being able to perform pushback). + * Many distributions set the group of all ttys to "tty" and + * grant write-only access to all terminals for setgid tty + * binaries, which should not imply full privileges on all ttys. + * + * This could theoretically break old code that performs open() + * on a write-only file descriptor. In that case, it might be + * necessary to also permit this if + * inode_permission(inode, MAY_READ) == 0. + */ + if (filp->f_mode & FMODE_READ) + __proc_set_tty(tty); + } spin_unlock_irq(¤t->sighand->siglock); read_unlock(&tasklist_lock); tty_unlock(tty); @@ -2434,7 +2450,7 @@ static int fionbio(struct file *file, int __user *p) * Takes ->siglock() when updating signal->tty */ -static int tiocsctty(struct tty_struct *tty, int arg) +static int tiocsctty(struct tty_struct *tty, struct file *file, int arg) { int ret = 0; @@ -2468,6 +2484,13 @@ static int tiocsctty(struct tty_struct *tty, int arg) goto unlock; } } + + /* See the comment in tty_open(). */ + if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto unlock; + } + proc_set_tty(tty); unlock: read_unlock(&tasklist_lock); @@ -2860,7 +2883,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) no_tty(); return 0; case TIOCSCTTY: - return tiocsctty(tty, arg); + return tiocsctty(tty, file, arg); case TIOCGPGRP: return tiocgpgrp(tty, real_tty, p); case TIOCSPGRP: From 5e2b2e1c44da4a44931466eb0a47b097840be43a Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 2 Oct 2015 17:50:31 +0100 Subject: [PATCH 1188/2091] serial: 8250: add uart_config entry for PORT_RT2880 commit 3c5a0357fdb3a9116a48dbdb0abb91fd23fbff80 upstream. This adds an entry to the uart_config table for PORT_RT2880 enabling rx/tx FIFOs. The UART is actually a Palmchip BK-3103 which is found in several devices from Alchemy/RMI, Ralink, and Sigma Designs. Signed-off-by: Mans Rullgard Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 4506e405c8f39..b4fd8debf941b 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -339,6 +339,14 @@ configured less than Maximum supported fifo bytes */ UART_FCR7_64BYTE, .flags = UART_CAP_FIFO, }, + [PORT_RT2880] = { + .name = "Palmchip BK-3103", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .rxtrig_bytes = {1, 4, 8, 14}, + .flags = UART_CAP_FIFO, + }, }; /* Uart divisor latch read */ From 8a1d5ab8258cfd3143eae034ee2a07e08ae2cf42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 23 Sep 2015 08:57:40 +0200 Subject: [PATCH 1189/2091] serial: atmel: fix error path of probe function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8f1bd8f2ad2358d6a88c115481ff3e69817d1bde upstream. If atmel_init_gpios fails the port has already been marked as busy (in line 2629), so this must be undone in the error path. This bug was introduced because I created the patch that finally became 722ccf416ac2 ("serial: atmel: fix error handling when mctrl_gpio_init fails") on top of 3.19 which didn't have commit 6fbb9bdf0f3f ("tty/serial: at91: fix error handling in atmel_serial_probe()") yet. Signed-off-by: Uwe Kleine-König Fixes: 722ccf416ac2 ("serial: atmel: fix error handling when mctrl_gpio_init fails") Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 5ca1dfb0561cd..85323ff75edf6 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2640,7 +2640,7 @@ static int atmel_serial_probe(struct platform_device *pdev) ret = atmel_init_gpios(port, &pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Failed to initialize GPIOs."); - goto err; + goto err_clear_bit; } ret = atmel_init_port(port, pdev); From 13bc967d64d56b232189218b435178a3c2388de3 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 15 Jun 2015 13:43:29 -0400 Subject: [PATCH 1190/2091] intel_pstate: Fix overflow in busy_scaled due to long delay commit 7180dddf7c32c49975c7e7babf2b60ed450cb760 upstream. The kernel may delay interrupts for a long time which can result in timers being delayed. If this occurs the intel_pstate driver will crash with a divide by zero error: divide error: 0000 [#1] SMP Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod CPU: 113 PID: 0 Comm: swapper/113 Tainted: G W -------------- 3.10.0-229.1.2.el7.x86_64 #1 Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014 task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000 RIP: 0010:[] [] intel_pstate_timer_func+0x179/0x3d0 RSP: 0018:ffff883fff4e3db8 EFLAGS: 00010206 RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001 R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5 R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246 FS: 0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071 ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100 Call Trace: [] ? run_posix_cpu_timers+0x51/0x840 [] ? trigger_load_balance+0x5d/0x200 [] ? pid_param_set+0x130/0x130 [] call_timer_fn+0x36/0x110 [] ? pid_param_set+0x130/0x130 [] run_timer_softirq+0x21f/0x320 [] __do_softirq+0xef/0x280 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x115/0x120 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x6d/0x80 [] ? cpuidle_enter_state+0x52/0xc0 [] ? cpuidle_enter_state+0x48/0xc0 [] cpuidle_idle_call+0xc5/0x200 [] arch_cpu_idle+0xe/0x30 [] cpu_startup_entry+0xf1/0x290 [] start_secondary+0x1ba/0x230 Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29 RIP [] intel_pstate_timer_func+0x179/0x3d0 RSP The kernel values for cpudata for CPU 113 were: struct cpudata { cpu = 113, timer = { entry = { next = 0x0, prev = 0xdead000000200200 }, expires = 8357799745, base = 0xffff883fe84ec001, function = 0xffffffff814a9100 , data = 18446612406765768960, i_gain = 0, d_gain = 0, deadband = 0, last_err = 22489 }, last_sample_time = { tv64 = 4063132438017305 }, prev_aperf = 287326796397463, prev_mperf = 251427432090198, sample = { core_pct_busy = 23081, aperf = 2937407, mperf = 3257884, freq = 2524484, time = { tv64 = 4063149215234118 } } } which results in the time between samples = last_sample_time - sample.time = 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds. The duration between reads of the APERF and MPERF registers overflowed a s32 sized integer in intel_pstate_get_scaled_busy()'s call to div_fp(). The result is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0. While the kernel shouldn't be delaying for a long time, it can and does happen and the intel_pstate driver should not panic in this situation. This patch changes the div_fp() function to use div64_s64() to allow for "long" division. This will avoid the overflow condition on long delays. [v2]: use div64_s64() in div_fp() Signed-off-by: Prarit Bhargava Signed-off-by: Rafael J. Wysocki Cc: Thomas Renninger Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6f9d27f9001c7..e8d16997c5cbc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,9 +48,9 @@ static inline int32_t mul_fp(int32_t x, int32_t y) return ((int64_t)x * (int64_t)y) >> FRAC_BITS; } -static inline int32_t div_fp(int32_t x, int32_t y) +static inline int32_t div_fp(s64 x, s64 y) { - return div_s64((int64_t)x << FRAC_BITS, y); + return div64_s64((int64_t)x << FRAC_BITS, y); } static inline int ceiling_fp(int32_t x) @@ -795,7 +795,7 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; - u32 duration_us; + s64 duration_us; u32 sample_time; /* @@ -822,8 +822,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) * to adjust our busyness. */ sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; - duration_us = (u32) ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); + duration_us = ktime_us_delta(cpu->sample.time, + cpu->last_sample_time); if (duration_us > sample_time * 3) { sample_ratio = div_fp(int_tofp(sample_time), int_tofp(duration_us)); From e3f916d20c2dc6169a14cb8266bbdb88a97bbbae Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 1 Oct 2015 15:36:54 -0700 Subject: [PATCH 1191/2091] mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1) commit 03a2d2a3eafe4015412cf4e9675ca0e2d9204074 upstream. Commit description is copied from the original post of this bug: http://comments.gmane.org/gmane.linux.kernel.mm/135349 Kernels after v3.9 use kmalloc_size(INDEX_NODE + 1) to get the next larger cache size than the size index INDEX_NODE mapping. In kernels 3.9 and earlier we used malloc_sizes[INDEX_L3 + 1].cs_size. However, sometimes we can't get the right output we expected via kmalloc_size(INDEX_NODE + 1), causing a BUG(). The mapping table in the latest kernel is like: index = {0, 1, 2 , 3, 4, 5, 6, n} size = {0, 96, 192, 8, 16, 32, 64, 2^n} The mapping table before 3.10 is like this: index = {0 , 1 , 2, 3, 4 , 5 , 6, n} size = {32, 64, 96, 128, 192, 256, 512, 2^(n+3)} The problem on my mips64 machine is as follows: (1) When configured DEBUG_SLAB && DEBUG_PAGEALLOC && DEBUG_LOCK_ALLOC && DEBUG_SPINLOCK, the sizeof(struct kmem_cache_node) will be "150", and the macro INDEX_NODE turns out to be "2": #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node)) (2) Then the result of kmalloc_size(INDEX_NODE + 1) is 8. (3) Then "if(size >= kmalloc_size(INDEX_NODE + 1)" will lead to "size = PAGE_SIZE". (4) Then "if ((size >= (PAGE_SIZE >> 3))" test will be satisfied and "flags |= CFLGS_OFF_SLAB" will be covered. (5) if (flags & CFLGS_OFF_SLAB)" test will be satisfied and will go to "cachep->slabp_cache = kmalloc_slab(slab_size, 0u)", and the result here may be NULL while kernel bootup. (6) Finally,"BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));" causes the BUG info as the following shows (may be only mips64 has this problem): This patch fixes the problem of kmalloc_size(INDEX_NODE + 1) and removes the BUG by adding 'size >= 256' check to guarantee that all necessary small sized slabs are initialized regardless sequence of slab size in mapping table. Fixes: e33660165c90 ("slab: Use common kmalloc_index/kmalloc_size...") Signed-off-by: Joonsoo Kim Reported-by: Liuhailong Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 3dd2d1ff9d5d9..330039fdcf18f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2189,9 +2189,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { + /* + * To activate debug pagealloc, off-slab management is necessary + * requirement. In early phase of initialization, small sized slab + * doesn't get initialized so it would not be possible. So, we need + * to check size >= 256. It guarantees that all necessary small + * sized slab is initialized in current slab initialization sequence. + */ + if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && + size >= 256 && cachep->object_size > cache_line_size() && + ALIGN(size, cachep->align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } From a8ef8d4e5bbc5035a1e9789592ca7e8db09c2ea8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Aug 2015 21:19:58 -0700 Subject: [PATCH 1192/2091] MIPS: Fix console output for Fulong2e system commit fc2ca674470bbfe11d72a20a3f19fd3dc43bfca0 upstream. Commit 3adeb2566b9b ("MIPS: Loongson: Improve LEFI firmware interface") made the number of UARTs dynamic if LEFI_FIRMWARE_INTERFACE is configured. Unfortunately, it did not initialize the number of UARTs if LEFI_FIRMWARE_INTERFACE is not configured. As a result, the Fulong2e system has no console. Fixes: 3adeb2566b9b ("MIPS: Loongson: Improve LEFI firmware interface") Acked-by: Huacai Chen Signed-off-by: Guenter Roeck Tested-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11076/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/loongson/common/env.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c index 22f04ca2ff3e5..2efb18aafa4f1 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson/common/env.c @@ -64,6 +64,9 @@ void __init prom_init_env(void) } if (memsize == 0) memsize = 256; + + loongson_sysconf.nr_uarts = 1; + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); #else struct boot_params *boot_p; From baf19f15c8d54395b338a34004ff1f0c1d7c15e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 19:16:07 +0200 Subject: [PATCH 1193/2091] 3w-9xxx: don't unmap bounce buffered commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 15e3d5a285ab9283136dba34bbf72886d9146706 upstream. 3w controller don't dma map small single SGL entry commands but instead bounce buffer them. Add a helper to identify these commands and don't call scsi_dma_unmap for them. Based on an earlier patch from James Bottomley. Fixes: 118c85 ("3w-9xxx: fix command completion race") Reported-by: Tóth Attila Tested-by: Tóth Attila Signed-off-by: Christoph Hellwig Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/3w-9xxx.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index add419d6ff349..a56a7b243e91f 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -212,6 +212,17 @@ static const struct file_operations twa_fops = { .llseek = noop_llseek, }; +/* + * The controllers use an inline buffer instead of a mapped SGL for small, + * single entry buffers. Note that we treat a zero-length transfer like + * a mapped SGL. + */ +static bool twa_command_mapped(struct scsi_cmnd *cmd) +{ + return scsi_sg_count(cmd) != 1 || + scsi_bufflen(cmd) >= TW_MIN_SGL_LENGTH; +} + /* This function will complete an aen request from the isr */ static int twa_aen_complete(TW_Device_Extension *tw_dev, int request_id) { @@ -1339,7 +1350,8 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) } /* Now complete the io */ - scsi_dma_unmap(cmd); + if (twa_command_mapped(cmd)) + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); @@ -1582,7 +1594,8 @@ static int twa_reset_device_extension(TW_Device_Extension *tw_dev) struct scsi_cmnd *cmd = tw_dev->srb[i]; cmd->result = (DID_RESET << 16); - scsi_dma_unmap(cmd); + if (twa_command_mapped(cmd)) + scsi_dma_unmap(cmd); cmd->scsi_done(cmd); } } @@ -1765,12 +1778,14 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ retval = twa_scsiop_execute_scsi(tw_dev, request_id, NULL, 0, NULL); switch (retval) { case SCSI_MLQUEUE_HOST_BUSY: - scsi_dma_unmap(SCpnt); + if (twa_command_mapped(SCpnt)) + scsi_dma_unmap(SCpnt); twa_free_request_id(tw_dev, request_id); break; case 1: SCpnt->result = (DID_ERROR << 16); - scsi_dma_unmap(SCpnt); + if (twa_command_mapped(SCpnt)) + scsi_dma_unmap(SCpnt); done(SCpnt); tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); @@ -1831,8 +1846,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, /* Map sglist from scsi layer to cmd packet */ if (scsi_sg_count(srb)) { - if ((scsi_sg_count(srb) == 1) && - (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { + if (!twa_command_mapped(srb)) { if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) scsi_sg_copy_to_buffer(srb, @@ -1905,7 +1919,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re { struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH && + if (!twa_command_mapped(cmd) && (cmd->sc_data_direction == DMA_FROM_DEVICE || cmd->sc_data_direction == DMA_BIDIRECTIONAL)) { if (scsi_sg_count(cmd) == 1) { From 205a8514e63a221c3a5071f27521013444e43e5f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 Oct 2015 14:43:44 -0700 Subject: [PATCH 1194/2091] Linux 4.1.11 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d02f16b510dc5..c7d877b1c248e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 10 +SUBLEVEL = 11 EXTRAVERSION = NAME = Series 4800 From 9cda65f34d0d0aed35effe37d9f85d6d9b5285fd Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 25 Sep 2015 11:52:27 +0400 Subject: [PATCH 1195/2091] net/ibm/emac: bump version numbers for correct work with ethtool [ Upstream commit 661dfc65f7981481ba2e31aaa702371e82336e56 ] The size of the MAC register dump used to be the size specified by the reg property in the device tree. Userland has no good way of finding out that size, and it was not specified consistently for each MAC type, so ethtool would end up printing junk at the end of the register dump if the device tree didn't match the size it assumed. Using the new version numbers indicates unambiguously that the size of the MAC register dump is dependent only on the MAC type. Fixes: 5369c71f7ca2 ("net/ibm/emac: fix size of emac dump memory areas") Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/emac/core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 28df37420da96..ac02c675c59c4 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr { u32 index; }; -#define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4SYNC_ETHTOOL_REGS_VER 2 +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 #endif /* __IBM_NEWEMAC_CORE_H */ From d55cd294a22769515cfc3c97783669369fb4b111 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Mon, 28 Sep 2015 11:32:42 +0200 Subject: [PATCH 1196/2091] l2tp: protect tunnel->del_work by ref_count [ Upstream commit 06a15f51cf3618e32a73871ee6a547ef7fd902b5 ] There is a small chance that tunnel_free() is called before tunnel->del_work scheduled resulting in a zero pointer dereference. Signed-off-by: Alexander Couzens Acked-by: James Chapman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a29a504492af6..e3db498f02330 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) tunnel = container_of(work, struct l2tp_tunnel, del_work); sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) - return; + goto out; sock = sk->sk_socket; @@ -1340,6 +1340,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } l2tp_tunnel_sock_put(sk); +out: + l2tp_tunnel_dec_refcount(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1639,8 +1641,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_inc_refcount(tunnel); l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + if (false == queue_work(l2tp_wq, &tunnel->del_work)) { + l2tp_tunnel_dec_refcount(tunnel); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); From 7017f73bb9837ca8cc5d0a4116aae679bdd8df22 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 22 Sep 2015 12:57:53 -0700 Subject: [PATCH 1197/2091] skbuff: Fix skb checksum flag on skb pull [ Upstream commit 6ae459bdaaeebc632b16e54dcbabb490c6931d61 ] VXLAN device can receive skb with checksum partial. But the checksum offset could be in outer header which is pulled on receive. This results in negative checksum offset for the skb. Such skb can cause the assert failure in skb_checksum_help(). Following patch fixes the bug by setting checksum-none while pulling outer header. Following is the kernel panic msg from old kernel hitting the bug. ------------[ cut here ]------------ kernel BUG at net/core/dev.c:1906! RIP: 0010:[] skb_checksum_help+0x144/0x150 Call Trace: [] queue_userspace_packet+0x408/0x470 [openvswitch] [] ovs_dp_upcall+0x5d/0x60 [openvswitch] [] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch] [] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch] [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] vxlan_rcv+0x53/0x60 [openvswitch] [] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch] [] udp_queue_rcv_skb+0x2dc/0x3b0 [] __udp4_lib_rcv+0x1cf/0x6c0 [] udp_rcv+0x1a/0x20 [] ip_local_deliver_finish+0xdd/0x280 [] ip_local_deliver+0x88/0x90 [] ip_rcv_finish+0x10d/0x370 [] ip_rcv+0x235/0x300 [] __netif_receive_skb+0x55d/0x620 [] netif_receive_skb+0x80/0x90 [] virtnet_poll+0x555/0x6f0 [] net_rx_action+0x134/0x290 [] __do_softirq+0xa8/0x210 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x8e/0xb0 [] do_IRQ+0x63/0xe0 [] common_interrupt+0x6e/0x6e Reported-by: Anupam Chanda Signed-off-by: Pravin B Shelar Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index eb1c55b8255a1..7f04271d7a3bd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2588,6 +2588,9 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) <= len) + skb->ip_summed = CHECKSUM_NONE; } unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); From 9bdfdea4cc4617aaa3a4f291904fe85e6ca2c9a6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 28 Sep 2015 17:24:25 -0700 Subject: [PATCH 1198/2091] skbuff: Fix skb checksum partial check. [ Upstream commit 31b33dfb0a144469dd805514c9e63f4993729a48 ] Earlier patch 6ae459bda tried to detect void ckecksum partial skb by comparing pull length to checksum offset. But it does not work for all cases since checksum-offset depends on updates to skb->data. Following patch fixes it by validating checksum start offset after skb-data pointer is updated. Negative value of checksum offset start means there is no need to checksum. Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") Reported-by: Andrew Vagin Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7f04271d7a3bd..4307e20a4a4af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2589,7 +2589,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) <= len) + skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a2e4e47b28391..075d2e78c87ee 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2976,11 +2976,12 @@ EXPORT_SYMBOL(skb_append_datato_frags); */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { + unsigned char *data = skb->data; + BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; + __skb_pull(skb, len); + skb_postpull_rcsum(skb, data, len); + return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); From 31b8abd140ad0be5f63bae0e4e6beb9d52b16f85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Sep 2015 09:48:04 -0700 Subject: [PATCH 1199/2091] inet: fix races in reqsk_queue_hash_req() [ Upstream commit 29c6852602e259d2c1882f320b29d5c3fec0de04 ] Before allowing lockless LISTEN processing, we need to make sure to arm the SYN_RECV timer before the req socket is visible in hash tables. Also, req->rsk_hash should be written before we set rsk_refcnt to a non zero value. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: Eric Dumazet Cc: Ying Cai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e664706b350cd..390f61d2f00b8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -676,20 +676,20 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue, req->num_timeout = 0; req->sk = NULL; + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); + req->rsk_hash = hash; + /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ smp_wmb(); atomic_set(&req->rsk_refcnt, 2); - setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); - req->rsk_hash = hash; spin_lock(&queue->syn_wait_lock); req->dl_next = lopt->syn_table[hash]; lopt->syn_table[hash] = req; spin_unlock(&queue->syn_wait_lock); - - mod_timer_pinned(&req->rsk_timer, jiffies + timeout); } EXPORT_SYMBOL(reqsk_queue_hash_req); From fac948a1c4188780633c4ef509941876dcc99502 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Sep 2015 18:52:25 -0700 Subject: [PATCH 1200/2091] net: add pfmemalloc check in sk_add_backlog() [ Upstream commit c7c49b8fde26b74277188bdc6c9dca38db6fa35b ] Greg reported crashes hitting the following check in __sk_backlog_rcv() BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); The pfmemalloc bit is currently checked in sk_filter(). This works correctly for TCP, because sk_filter() is ran in tcp_v[46]_rcv() before hitting the prequeue or backlog checks. For UDP or other protocols, this does not work, because the sk_filter() is ran from sock_queue_rcv_skb(), which might be called _after_ backlog queuing if socket is owned by user by the time packet is processed by softirq handler. Fixes: b4b9e35585089 ("netvm: set PF_MEMALLOC as appropriate during SKB processing") Signed-off-by: Eric Dumazet Reported-by: Greg Thelen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 3a4898ec8c67c..ed01a012f8d58 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -826,6 +826,14 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s if (sk_rcvqueues_full(sk, limit)) return -ENOBUFS; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + __sk_add_backlog(sk, skb); sk->sk_backlog.len += skb->truesize; return 0; From bbc89a6195a2310a182cf583e9825dfc1ebe58ce Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 30 Sep 2015 11:45:33 +0200 Subject: [PATCH 1201/2091] ppp: don't override sk->sk_state in pppoe_flush_dev() [ Upstream commit e6740165b8f7f06d8caee0fceab3fb9d790a6fed ] Since commit 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release"), pppoe_release() calls dev_put(po->pppoe_dev) if sk is in the PPPOX_ZOMBIE state. But pppoe_flush_dev() can set sk->sk_state to PPPOX_ZOMBIE _and_ reset po->pppoe_dev to NULL. This leads to the following oops: [ 570.140800] BUG: unable to handle kernel NULL pointer dereference at 00000000000004e0 [ 570.142931] IP: [] pppoe_release+0x50/0x101 [pppoe] [ 570.144601] PGD 3d119067 PUD 3dbc1067 PMD 0 [ 570.144601] Oops: 0000 [#1] SMP [ 570.144601] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pppoe pppox ppp_generic slhc loop crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper acpi_cpufreq evdev serio_raw processor button ext4 crc16 mbcache jbd2 virtio_net virtio_blk virtio_pci virtio_ring virtio [ 570.144601] CPU: 1 PID: 15738 Comm: ppp-apitest Not tainted 4.2.0 #1 [ 570.144601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 570.144601] task: ffff88003d30d600 ti: ffff880036b60000 task.ti: ffff880036b60000 [ 570.144601] RIP: 0010:[] [] pppoe_release+0x50/0x101 [pppoe] [ 570.144601] RSP: 0018:ffff880036b63e08 EFLAGS: 00010202 [ 570.144601] RAX: 0000000000000000 RBX: ffff880034340000 RCX: 0000000000000206 [ 570.144601] RDX: 0000000000000006 RSI: ffff88003d30dd20 RDI: ffff88003d30dd20 [ 570.144601] RBP: ffff880036b63e28 R08: 0000000000000001 R09: 0000000000000000 [ 570.144601] R10: 00007ffee9b50420 R11: ffff880034340078 R12: ffff8800387ec780 [ 570.144601] R13: ffff8800387ec7b0 R14: ffff88003e222aa0 R15: ffff8800387ec7b0 [ 570.144601] FS: 00007f5672f48700(0000) GS:ffff88003fc80000(0000) knlGS:0000000000000000 [ 570.144601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 570.144601] CR2: 00000000000004e0 CR3: 0000000037f7e000 CR4: 00000000000406a0 [ 570.144601] Stack: [ 570.144601] ffffffffa018f240 ffff8800387ec780 ffffffffa018f240 ffff8800387ec7b0 [ 570.144601] ffff880036b63e48 ffffffff812caabe ffff880039e4e000 0000000000000008 [ 570.144601] ffff880036b63e58 ffffffff812cabad ffff880036b63ea8 ffffffff811347f5 [ 570.144601] Call Trace: [ 570.144601] [] sock_release+0x1a/0x75 [ 570.144601] [] sock_close+0xd/0x11 [ 570.144601] [] __fput+0xff/0x1a5 [ 570.144601] [] ____fput+0x9/0xb [ 570.144601] [] task_work_run+0x66/0x90 [ 570.144601] [] prepare_exit_to_usermode+0x8c/0xa7 [ 570.144601] [] syscall_return_slowpath+0x16d/0x19b [ 570.144601] [] int_ret_from_sys_call+0x25/0x9f [ 570.144601] Code: 48 8b 83 c8 01 00 00 a8 01 74 12 48 89 df e8 8b 27 14 e1 b8 f7 ff ff ff e9 b7 00 00 00 8a 43 12 a8 0b 74 1c 48 8b 83 a8 04 00 00 <48> 8b 80 e0 04 00 00 65 ff 08 48 c7 83 a8 04 00 00 00 00 00 00 [ 570.144601] RIP [] pppoe_release+0x50/0x101 [pppoe] [ 570.144601] RSP [ 570.144601] CR2: 00000000000004e0 [ 570.200518] ---[ end trace 46956baf17349563 ]--- pppoe_flush_dev() has no reason to override sk->sk_state with PPPOX_ZOMBIE. pppox_unbind_sock() already sets sk->sk_state to PPPOX_DEAD, which is the correct state given that sk is unbound and po->pppoe_dev is NULL. Fixes: 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release") Tested-by: Oleksii Berezhniak Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index b62a5e3a1c652..db2c3cdf2c40d 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -313,7 +313,6 @@ static void pppoe_flush_dev(struct net_device *dev) if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { pppox_unbind_sock(sk); - sk->sk_state = PPPOX_ZOMBIE; sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); From 8ae3dfacdd8257691332d98f258ac8858bcd6585 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Oct 2015 05:39:26 -0700 Subject: [PATCH 1202/2091] inet: fix race in reqsk_queue_unlink() [ Upstream commit 2306c704ce280c97a60d1f45333b822b40281dea ] reqsk_timer_handler() tests if icsk_accept_queue.listen_opt is NULL at its beginning. By the time it calls inet_csk_reqsk_queue_drop() and reqsk_queue_unlink(), listener might have been closed and inet_csk_listen_stop() had called reqsk_queue_yank_acceptq() which sets icsk_accept_queue.listen_opt to NULL We therefore need to correctly check listen_opt being NULL after holding syn_wait_lock for proper synchronization. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Fixes: b357a364c57c ("inet: fix possible panic in reqsk_queue_unlink()") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 390f61d2f00b8..4d2bc8c6694f6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -568,21 +568,22 @@ EXPORT_SYMBOL(inet_rtx_syn_ack); static bool reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock *req) { - struct listen_sock *lopt = queue->listen_opt; struct request_sock **prev; + struct listen_sock *lopt; bool found = false; spin_lock(&queue->syn_wait_lock); - - for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; - prev = &(*prev)->dl_next) { - if (*prev == req) { - *prev = req->dl_next; - found = true; - break; + lopt = queue->listen_opt; + if (lopt) { + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } } } - spin_unlock(&queue->syn_wait_lock); if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); From a54d12792c87a5ebe32f5e54c46a0932f4ff774a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 12:06:03 +0200 Subject: [PATCH 1203/2091] bpf: fix panic in SO_GET_FILTER with native ebpf programs [ Upstream commit 93d08b6966cf730ea669d4d98f43627597077153 ] When sockets have a native eBPF program attached through setsockopt(sk, SOL_SOCKET, SO_ATTACH_BPF, ...), and then try to dump these over getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, ...), the following panic appears: [49904.178642] BUG: unable to handle kernel NULL pointer dereference at (null) [49904.178762] IP: [] sk_get_filter+0x39/0x90 [49904.182000] PGD 86fc9067 PUD 531a1067 PMD 0 [49904.185196] Oops: 0000 [#1] SMP [...] [49904.224677] Call Trace: [49904.226090] [] sock_getsockopt+0x319/0x740 [49904.227535] [] ? sock_has_perm+0x63/0x70 [49904.228953] [] ? release_sock+0x108/0x150 [49904.230380] [] ? selinux_socket_getsockopt+0x23/0x30 [49904.231788] [] SyS_getsockopt+0xa6/0xc0 [49904.233267] [] entry_SYSCALL_64_fastpath+0x12/0x71 The underlying issue is the very same as in commit b382c0865600 ("sock, diag: fix panic in sock_diag_put_filterinfo"), that is, native eBPF programs don't store an original program since this is only needed in cBPF ones. However, sk_get_filter() wasn't updated to test for this at the time when eBPF could be attached. Just throw an error to the user to indicate that eBPF cannot be dumped over this interface. That way, it can also be known that a program _is_ attached (as opposed to just return 0), and a different (future) method needs to be consulted for a dump. Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/filter.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index bf831a85c3159..0fa2613b5e350 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1526,9 +1526,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; /* We're copying the filter that has been originally attached, - * so no conversion/decode needed anymore. + * so no conversion/decode needed anymore. eBPF programs that + * have no original program cannot be dumped through this. */ + ret = -EACCES; fprog = filter->prog->orig_prog; + if (!fprog) + goto out; ret = fprog->len; if (!len) From bc845f677c04e09fa988ac95ce3f335df9d2ba0b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 2 Oct 2015 13:18:22 +0300 Subject: [PATCH 1204/2091] ovs: do not allocate memory from offline numa node [ Upstream commit 598c12d0ba6de9060f04999746eb1e015774044b ] When openvswitch tries allocate memory from offline numa node 0: stats = kmem_cache_alloc_node(flow_stats_cache, GFP_KERNEL | __GFP_ZERO, 0) It catches VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)) [ replaced with VM_WARN_ON(!node_online(nid)) recently ] in linux/gfp.h This patch disables numa affinity in this case. Signed-off-by: Konstantin Khlebnikov Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index aa349514e4cb5..eed562295c788 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -92,7 +92,8 @@ struct sw_flow *ovs_flow_alloc(void) /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, - GFP_KERNEL | __GFP_ZERO, 0); + GFP_KERNEL | __GFP_ZERO, + node_online(0) ? 0 : NUMA_NO_NODE); if (!stats) goto err; From 3043def89ac220e0719f0c1b97f27b139a44f2b5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 Oct 2015 17:23:47 -0700 Subject: [PATCH 1205/2091] act_mirred: clear sender cpu before sending to tx [ Upstream commit d40496a56430eac0d330378816954619899fe303 ] Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") the skb->sender_cpu needs to be cleared when moving from Rx Tx, otherwise kernel could crash. Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Cc: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_mirred.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3f63ceac8e014..844dd85426dca 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -166,6 +166,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; + skb_sender_cpu_clear(skb2); err = dev_queue_xmit(skb2); out: From 3f90898b43b4e7fea7a58f34f886a78b9f753e52 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 14 Oct 2015 01:09:40 -0700 Subject: [PATCH 1206/2091] ethtool: Use kcalloc instead of kmalloc for ethtool_get_strings [ Upstream commit 077cb37fcf6f00a45f375161200b5ee0cd4e937b ] It seems that kernel memory can leak into userspace by a kmalloc, ethtool_get_strings, then copy_to_user sequence. Avoid this by using kcalloc to zero fill the copied buffer. Signed-off-by: Joe Perches Acked-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1d00b89229024..4a6824767f3d4 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1273,7 +1273,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) gstrings.len = ret; - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); if (!data) return -ENOMEM; From 7b61554c25cbc110338d3191809daecd584682fa Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 Oct 2015 09:23:18 -0400 Subject: [PATCH 1207/2091] tipc: move fragment importance field to new header position [ Upstream commit dde4b5ae65de659b9ec64bafdde0430459fcb495 ] In commit e3eea1eb47a ("tipc: clean up handling of message priorities") we introduced a field in the packet header for keeping track of the priority of fragments, since this value is not present in the specified protocol header. Since the value so far only is used at the transmitting end of the link, we have not yet officially defined it as part of the protocol. Unfortunately, the field we use for keeping this value, bits 13-15 in in word 5, has turned out to be a poor choice; it is already used by the broadcast protocol for carrying the 'network id' field of the sending node. Since packet fragments also need to be transported across the broadcast protocol, the risk of conflict is obvious, and we see this happen when we use network identities larger than 2^13-1. This has escaped our testing because we have so far only been using small network id values. We now move this field to bits 0-2 in word 9, a field that is guaranteed to be unused by all involved protocols. Fixes: e3eea1eb47a ("tipc: clean up handling of message priorities") Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/msg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index e1d3595e2ee95..4cbb0fbad0462 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -353,7 +353,7 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) static inline u32 msg_importance(struct tipc_msg *m) { if (unlikely(msg_user(m) == MSG_FRAGMENTER)) - return msg_bits(m, 5, 13, 0x7); + return msg_bits(m, 9, 0, 0x7); if (likely(msg_isdata(m) && !msg_errcode(m))) return msg_user(m); return TIPC_SYSTEM_IMPORTANCE; @@ -362,7 +362,7 @@ static inline u32 msg_importance(struct tipc_msg *m) static inline void msg_set_importance(struct tipc_msg *m, u32 i) { if (unlikely(msg_user(m) == MSG_FRAGMENTER)) - msg_set_bits(m, 5, 13, 0x7, i); + msg_set_bits(m, 9, 0, 0x7, i); else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) msg_set_user(m, i); else From e962218b42896856642f8d37bb69c4f1feaa8ad9 Mon Sep 17 00:00:00 2001 From: "Arad, Ronen" Date: Thu, 15 Oct 2015 01:55:17 -0700 Subject: [PATCH 1208/2091] netlink: Trim skb to alloc size to avoid MSG_TRUNC [ Upstream commit db65a3aaf29ecce2e34271d52e8d2336b97bd9fe ] netlink_dump() allocates skb based on the calculated min_dump_alloc or a per socket max_recvmsg_len. min_alloc_size is maximum space required for any single netdev attributes as calculated by rtnl_calcit(). max_recvmsg_len tracks the user provided buffer to netlink_recvmsg. It is capped at 16KiB. The intention is to avoid small allocations and to minimize the number of calls required to obtain dump information for all net devices. netlink_dump packs as many small messages as could fit within an skb that was sized for the largest single netdev information. The actual space available within an skb is larger than what is requested. It could be much larger and up to near 2x with align to next power of 2 approach. Allowing netlink_dump to use all the space available within the allocated skb increases the buffer size a user has to provide to avoid truncaion (i.e. MSG_TRUNG flag set). It was observed that with many VLANs configured on at least one netdev, a larger buffer of near 64KiB was necessary to avoid "Message truncated" error in "ip link" or "bridge [-c[ompressvlans]] vlan show" when min_alloc_size was only little over 32KiB. This patch trims skb to allocated size in order to allow the user to avoid truncation with more reasonable buffer size. Signed-off-by: Ronen Arad Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 980121e75d2ec..d139c43ac6e50 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2683,6 +2683,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; + int alloc_min_size; int alloc_size; mutex_lock(nlk->cb_mutex); @@ -2691,9 +2692,6 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - cb = &nlk->cb; - alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; @@ -2703,23 +2701,35 @@ static int netlink_dump(struct sock *sk) * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ - if (alloc_size < nlk->max_recvmsg_len) { - skb = netlink_alloc_skb(sk, - nlk->max_recvmsg_len, - nlk->portid, + cb = &nlk->cb; + alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + + if (alloc_min_size < nlk->max_recvmsg_len) { + alloc_size = nlk->max_recvmsg_len; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - /* available room should be exact amount to avoid MSG_TRUNC */ - if (skb) - skb_reserve(skb, skb_tailroom(skb) - - nlk->max_recvmsg_len); } - if (!skb) + if (!skb) { + alloc_size = alloc_min_size; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + } if (!skb) goto errout_skb; + + /* Trim skb to allocated size. User is expected to provide buffer as + * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * netlink_recvmsg())). dump will pack as many smaller messages as + * could fit within the allocated skb. skb is typically allocated + * with larger space than required (could be as much as near 2x the + * requested size with align to next power of 2 approach). Allowing + * dump to use the excess space makes it difficult for a user to have a + * reasonable static buffer based on the expected largest dump of a + * single netdev. The outcome is MSG_TRUNC error. + */ + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); From a96cb1c6030aa565bcf15e0b03dee3ca5bb1d05a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:42 -0400 Subject: [PATCH 1209/2091] af_unix: Convert the unix_sk macro to an inline function for type safety [ Upstream commit 4613012db1d911f80897f9446a49de817b2c4c47 ] As suggested by Eric Dumazet this change replaces the #define with a static inline function to enjoy complaints by the compiler when misusing the API. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/af_unix.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a175ba4a7adbc..dfe4ddfbb43c5 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -64,7 +64,11 @@ struct unix_sock { #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; }; -#define unix_sk(__sk) ((struct unix_sock *)__sk) + +static inline struct unix_sock *unix_sk(struct sock *sk) +{ + return (struct unix_sock *)sk; +} #define peer_wait peer_wq.wait From 9bf31c538f5f370013cadbb18a40398b0c00d9e2 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:43 -0400 Subject: [PATCH 1210/2091] af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag [ Upstream commit 9f389e35674f5b086edd70ed524ca0f287259725 ] AF_UNIX sockets now return multiple skbs from recv() when MSG_PEEK flag is set. This is referenced in kernel bugzilla #12323 @ https://bugzilla.kernel.org/show_bug.cgi?id=12323 As described both in the BZ and lkml thread @ http://lkml.org/lkml/2008/1/8/444 calling recv() with MSG_PEEK on an AF_UNIX socket only reads a single skb, where the desired effect is to return as much skb data has been queued, until hitting the recv buffer size (whichever comes first). The modified MSG_PEEK path will now move to the next skb in the tree and jump to the again: label, rather than following the natural loop structure. This requires duplicating some of the loop head actions. This was tested using the python socketpair python code attached to the bugzilla issue. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 06430598cf512..56501541784d8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2046,8 +2046,20 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - sk_peek_offset_fwd(sk, chunk); + if (skip) { + sk_peek_offset_fwd(sk, chunk); + skip -= chunk; + } + + if (UNIXCB(skb).fp) + break; + last = skb; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) + goto again; + unix_state_unlock(sk); break; } } while (size); From e09e88967de067453d19bdf102808ce3cfb266d4 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 2 Oct 2015 00:05:36 +0300 Subject: [PATCH 1211/2091] net/unix: fix logic about sk_peek_offset [ Upstream commit e9193d60d363e4dff75ff6d43a48f22be26d59c7 ] Now send with MSG_PEEK can return data from multiple SKBs. Unfortunately we take into account the peek offset for each skb, that is wrong. We need to apply the peek offset only once. In addition, the peek offset should be used only if MSG_PEEK is set. Cc: "David S. Miller" (maintainer:NETWORKING Cc: Eric Dumazet (commit_signer:1/14=7%) Cc: Aaron Conole Fixes: 9f389e35674f ("af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag") Signed-off-by: Andrey Vagin Tested-by: Aaron Conole Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 56501541784d8..76e66695621cc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1938,6 +1938,11 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, goto out; } + if (flags & MSG_PEEK) + skip = sk_peek_offset(sk, flags); + else + skip = 0; + do { int chunk; struct sk_buff *skb, *last; @@ -1984,7 +1989,6 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, break; } - skip = sk_peek_offset(sk, flags); while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; @@ -2046,14 +2050,12 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - if (skip) { - sk_peek_offset_fwd(sk, chunk); - skip -= chunk; - } + sk_peek_offset_fwd(sk, chunk); if (UNIXCB(skb).fp) break; + skip = 0; last = skb; unix_state_lock(sk); skb = skb_peek_next(skb, &sk->sk_receive_queue); From d96105a5fefafefc110e964a75889e2dda8a8434 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Sep 2015 09:56:53 +0200 Subject: [PATCH 1212/2091] drm: Fix locking for sysfs dpms file commit 621bd0f6982badd6483acb191eb7b6226a578328 upstream. With atomic drivers we need to make sure that (at least in general) property reads hold the right locks. But the legacy dpms property is special and can be read locklessly. Since userspace loves to just randomly look at that all the time (like with "status") do that. To make it clear that we play tricks use the READ_ONCE compiler barrier (and also for paranoia). Note that there's not really anything bad going on since even with the new atomic paths we eventually end up not chasing any pointers (and hence possibly freed memory and other fun stuff). The locking WARNING has been added in commit 88a48e297b3a3bac6022c03babfb038f1a886cea Author: Rob Clark Date: Thu Dec 18 16:01:50 2014 -0500 drm: add atomic properties but since drivers are converting not everyone will have seen this from the start. Jens reported this and submitted a patch to just grab the mode_config.connection_mutex, but we can do a bit better. v2: Remove unused variables I failed to git add for real. Reference: http://mid.gmane.org/20150928194822.GA3930@kernel.dk Reported-by: Jens Axboe Tested-by: Jens Axboe Cc: Rob Clark Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_sysfs.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index eb7e61078a5b6..92586b0af3ab5 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -235,18 +235,12 @@ static ssize_t dpms_show(struct device *device, char *buf) { struct drm_connector *connector = to_drm_connector(device); - struct drm_device *dev = connector->dev; - uint64_t dpms_status; - int ret; + int dpms; - ret = drm_object_property_get_value(&connector->base, - dev->mode_config.dpms_property, - &dpms_status); - if (ret) - return 0; + dpms = READ_ONCE(connector->dpms); return snprintf(buf, PAGE_SIZE, "%s\n", - drm_get_dpms_name((int)dpms_status)); + drm_get_dpms_name(dpms)); } static ssize_t enabled_show(struct device *device, From 6ebf06eb1dec3ed5603107d0b7c192e3a515479a Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 5 Oct 2015 10:08:51 -0500 Subject: [PATCH 1213/2091] crypto: sparc - initialize blkcipher.ivsize commit a66d7f724a96d6fd279bfbd2ee488def6b081bea upstream. Some of the crypto algorithms write to the initialization vector, but no space has been allocated for it. This clobbers adjacent memory. Signed-off-by: Dave Kleikamp Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/sparc/crypto/aes_glue.c | 2 ++ arch/sparc/crypto/camellia_glue.c | 1 + arch/sparc/crypto/des_glue.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 2e48eb8813ffa..c90930de76ba8 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -433,6 +433,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, .setkey = aes_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, @@ -452,6 +453,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, .setkey = aes_set_key, .encrypt = ctr_crypt, .decrypt = ctr_crypt, diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 6bf2479a12fbe..561a84d93cf68 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -274,6 +274,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, .setkey = camellia_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index dd6a34fa6e19d..61af794aa2d31 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -429,6 +429,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, .setkey = des_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, @@ -485,6 +486,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, .setkey = des3_ede_set_key, .encrypt = cbc3_encrypt, .decrypt = cbc3_decrypt, From 3ae2c7951214fd5926101e817432feabb7f72feb Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 9 Oct 2015 20:43:33 +0100 Subject: [PATCH 1214/2091] crypto: ahash - ensure statesize is non-zero commit 8996eafdcbad149ac0f772fb1649fbb75c482a6a upstream. Unlike shash algorithms, ahash drivers must implement export and import as their descriptors may contain hardware state and cannot be exported as is. Unfortunately some ahash drivers did not provide them and end up causing crashes with algif_hash. This patch adds a check to prevent these drivers from registering ahash algorithms until they are fixed. Signed-off-by: Russell King Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 8acb886032ae7..9c1dc8d6106a8 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -544,7 +544,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg) struct crypto_alg *base = &alg->halg.base; if (alg->halg.digestsize > PAGE_SIZE / 8 || - alg->halg.statesize > PAGE_SIZE / 8) + alg->halg.statesize > PAGE_SIZE / 8 || + alg->halg.statesize == 0) return -EINVAL; base->cra_type = &crypto_ahash_type; From 2105f9aec785cbc4679132abb44645bb3c59a93e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 15 Oct 2015 15:28:29 -0700 Subject: [PATCH 1215/2091] memcg: convert threshold to bytes commit 424cdc14138088ada1b0e407a2195b2783c6e5ef upstream. page_counter_memparse() returns pages for the threshold, while mem_cgroup_usage() returns bytes for memory usage. Convert the threshold to bytes. Fixes: 3e32cb2e0a12b6915 ("memcg: rename cgroup_event to mem_cgroup_event"). Signed-off-by: Shaohua Li Cc: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a04225d372ba3..68dea90334cb9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3677,6 +3677,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, ret = page_counter_memparse(args, "-1", &threshold); if (ret) return ret; + threshold <<= PAGE_SHIFT; mutex_lock(&memcg->thresholds_lock); From 2382a147ed804d66fbfd1c1c97b88bc5f15c49db Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 12 Oct 2015 16:55:54 +0200 Subject: [PATCH 1216/2091] btrfs: check unsupported filters in balance arguments commit 8eb934591f8bf584969454a658f629cd06e59f3a upstream. We don't verify that all the balance filter arguments supplemented by the flags are actually known to the kernel. Thus we let it silently pass and do nothing. At the moment this means only the 'limit' filter, but we're going to add a few more soon so it's better to have that fixed. Also in older stable kernels so that it works with newer userspace tools. Signed-off-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 5 +++++ fs/btrfs/volumes.h | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 37d456a9a3b8c..af3dd3c55ef18 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4492,6 +4492,11 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) bctl->flags |= BTRFS_BALANCE_TYPE_MASK; } + if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { + ret = -EINVAL; + goto out_bargs; + } + do_balance: /* * Ownership of bctl and mutually_exclusive_operation_running diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ebc31331a8374..e1cc5b45069af 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -372,6 +372,14 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) +#define BTRFS_BALANCE_ARGS_MASK \ + (BTRFS_BALANCE_ARGS_PROFILES | \ + BTRFS_BALANCE_ARGS_USAGE | \ + BTRFS_BALANCE_ARGS_DEVID | \ + BTRFS_BALANCE_ARGS_DRANGE | \ + BTRFS_BALANCE_ARGS_VRANGE | \ + BTRFS_BALANCE_ARGS_LIMIT) + /* * Profile changing flags. When SOFT is set we won't relocate chunk if * it already has the target profile (even though it may be From 6780e0d1b043cad12625e0cb293b8d2ce5e39973 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Oct 2015 14:06:48 -0400 Subject: [PATCH 1217/2091] btrfs: fix use after free iterating extrefs commit dc6c5fb3b514221f2e9d21ee626a9d95d3418dff upstream. The code for btrfs inode-resolve has never worked properly for files with enough hard links to trigger extrefs. It was trying to get the leaf out of a path after freeing the path: btrfs_release_path(path); leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, slot); The fix here is to use the extent buffer we cloned just a little higher up to avoid deadlocks caused by using the leaf in the path. Signed-off-by: Chris Mason cc: Mark Fasheh Reviewed-by: Filipe Manana Reviewed-by: Mark Fasheh Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/backref.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 614aaa1969bdf..723470850b946 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1786,7 +1786,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, int found = 0; struct extent_buffer *eb; struct btrfs_inode_extref *extref; - struct extent_buffer *leaf; u32 item_size; u32 cur_offset; unsigned long ptr; @@ -1814,9 +1813,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, slot); - ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); cur_offset = 0; while (cur_offset < item_size) { @@ -1830,7 +1828,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, if (ret) break; - cur_offset += btrfs_inode_extref_name_len(leaf, extref); + cur_offset += btrfs_inode_extref_name_len(eb, extref); cur_offset += sizeof(*extref); } btrfs_tree_read_unlock_blocking(eb); From a20f5eac880c215ef9aeaa147aa991e0f42dd8b0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 8 Oct 2015 11:11:17 +0100 Subject: [PATCH 1218/2091] arm64: errata: use KBUILD_CFLAGS_MODULE for erratum #843419 commit b6dd8e0719c0d2d01429639a11b7bc2677de240c upstream. Commit df057cc7b4fa ("arm64: errata: add module build workaround for erratum #843419") sets CFLAGS_MODULE to ensure that the large memory model is used by the compiler when building kernel modules. However, CFLAGS_MODULE is an environment variable and intended to be overridden on the command line, which appears to be the case with the Ubuntu kernel packaging system, so use KBUILD_CFLAGS_MODULE instead. Cc: Ard Biesheuvel Fixes: df057cc7b4fa ("arm64: errata: add module build workaround for erratum #843419") Reported-by: Dann Frazier Tested-by: Dann Frazier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 81151663ef385..3258174e61526 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -31,7 +31,7 @@ endif CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) -CFLAGS_MODULE += -mcmodel=large +KBUILD_CFLAGS_MODULE += -mcmodel=large endif # Default value From 271759afb5a49f1d1e5a3cef5c0fdec867f9bba4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Oct 2015 15:03:26 +0200 Subject: [PATCH 1219/2091] nfsd/blocklayout: accept any minlength commit 8c3ad9cb7343dc5f61b8cf3cdbe1016c5e7c2c8b upstream. Recent Linux clients have started to send GETLAYOUT requests with minlength less than blocksize. Servers aren't really allowed to impose this kind of restriction on layouts; see RFC 5661 section 18.43.3 for details. This has been observed to cause indefinite hangs on fsx runs on some clients. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/blocklayout.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index cdefaa331a071..c29d9421bd5e1 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -56,14 +56,6 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, u32 device_generation = 0; int error; - /* - * We do not attempt to support I/O smaller than the fs block size, - * or not aligned to it. - */ - if (args->lg_minlength < block_size) { - dprintk("pnfsd: I/O too small\n"); - goto out_layoutunavailable; - } if (seg->offset & (block_size - 1)) { dprintk("pnfsd: I/O misaligned\n"); goto out_layoutunavailable; From b6af04a2106867e49fa1351bf7ab7426a8c2b63c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 21 Sep 2015 14:26:54 +0200 Subject: [PATCH 1220/2091] mfd: max77843: Fix max77843_chg_init() return on error commit 1b52e50f2a402a266f1ba2281f0a57e87637a047 upstream. If i2c_new_dummy() fails in max77843_chg_init(), an PTR_ERR(NULL) is returned which is 0. So the function was wrongly returning a success value instead of an error code. Fixes: c7f585fe46d8 ("mfd: max77843: Add max77843 MFD driver core driver") Signed-off-by: Javier Martinez Canillas Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/max77843.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/max77843.c b/drivers/mfd/max77843.c index a354ac677ec70..1074a0d68680a 100644 --- a/drivers/mfd/max77843.c +++ b/drivers/mfd/max77843.c @@ -79,7 +79,7 @@ static int max77843_chg_init(struct max77843 *max77843) if (!max77843->i2c_chg) { dev_err(&max77843->i2c->dev, "Cannot allocate I2C device for Charger\n"); - return PTR_ERR(max77843->i2c_chg); + return -ENODEV; } i2c_set_clientdata(max77843->i2c_chg, max77843); From 4689c5051cc7caf95d51916d191fa575931c5adc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 9 Oct 2015 10:39:25 +0100 Subject: [PATCH 1221/2091] i2c: rcar: enable RuntimePM before registering to the core commit 4f7effddf4549d57114289f273710f077c4c330a upstream. The core may register clients attached to this master which may use funtionality from the master. So, RuntimePM must be enabled before, otherwise this will fail. While here, move drvdata, too. Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-rcar.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 5a84bea5b8451..d9d022cdfff00 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -688,15 +688,16 @@ static int rcar_i2c_probe(struct platform_device *pdev) return ret; } + pm_runtime_enable(dev); + platform_set_drvdata(pdev, priv); + ret = i2c_add_numbered_adapter(adap); if (ret < 0) { dev_err(dev, "reg adap failed: %d\n", ret); + pm_runtime_disable(dev); return ret; } - pm_runtime_enable(dev); - platform_set_drvdata(pdev, priv); - dev_info(dev, "probed\n"); return 0; From d07e907e6a7a6d887f9641915529f8415195d468 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 10 Oct 2015 08:24:23 +0100 Subject: [PATCH 1222/2091] i2c: s3c2410: enable RuntimePM before registering to the core commit eadd709f5d2e8aebb1b7bf49460e97a68d81a9b0 upstream. The core may register clients attached to this master which may use funtionality from the master. So, RuntimePM must be enabled before, otherwise this will fail. While here, move drvdata, too. Signed-off-by: Wolfram Sang Tested-by: Krzysztof Kozlowski Acked-by: Kukjin Kim Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-s3c2410.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 297e9c9ac9432..424794271703a 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -1243,17 +1243,19 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) i2c->adap.nr = i2c->pdata->bus_num; i2c->adap.dev.of_node = pdev->dev.of_node; + platform_set_drvdata(pdev, i2c); + + pm_runtime_enable(&pdev->dev); + ret = i2c_add_numbered_adapter(&i2c->adap); if (ret < 0) { dev_err(&pdev->dev, "failed to add bus to i2c core\n"); + pm_runtime_disable(&pdev->dev); s3c24xx_i2c_deregister_cpufreq(i2c); clk_unprepare(i2c->clk); return ret; } - platform_set_drvdata(pdev, i2c); - - pm_runtime_enable(&pdev->dev); pm_runtime_enable(&i2c->adap.dev); dev_info(&pdev->dev, "%s: S3C I2C adapter\n", dev_name(&i2c->adap.dev)); From 67e6df4ba36ac3f15a229a7aab69349e6b82cf10 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 24 Sep 2015 12:06:54 +0300 Subject: [PATCH 1223/2091] i2c: designware: Do not use parameters from ACPI on Dell Inspiron 7348 commit 56d4b8a24cef5d66f0d10ac778a520d3c2c68a48 upstream. ACPI SSCN/FMCN methods were originally added because then the platform can provide the most accurate HCNT/LCNT values to the driver. However, this seems not to be true for Dell Inspiron 7348 where using these causes the touchpad to fail in boot: i2c_hid i2c-DLL0675:00: failed to retrieve report from device. i2c_designware INT3433:00: i2c_dw_handle_tx_abort: lost arbitration i2c_hid i2c-DLL0675:00: failed to retrieve report from device. i2c_designware INT3433:00: controller timed out The values received from ACPI are (in fast mode): HCNT: 72 LCNT: 160 this translates to following timings (input clock is 100MHz on Broadwell): tHIGH: 720 ns (spec min 600 ns) tLOW: 1600 ns (spec min 1300 ns) Bus period: 2920 ns (assuming 300 ns tf and tr) Bus speed: 342.5 kHz Both tHIGH and tLOW are within the I2C specification. The calculated values when ACPI parameters are not used are (in fast mode): HCNT: 87 LCNT: 159 which translates to: tHIGH: 870 ns (spec min 600 ns) tLOW: 1590 ns (spec min 1300 ns) Bus period 3060 ns (assuming 300 ns tf and tr) Bus speed 326.8 kHz These values are also within the I2C specification. Since both ACPI and calculated values meet the I2C specification timing requirements it is hard to say why the touchpad does not function properly with the ACPI values except that the bus speed is higher in this case (but still well below the max 400kHz). Solve this by adding DMI quirk to the driver that disables using ACPI parameters on this particulare machine. Reported-by: Pavel Roskin Signed-off-by: Mika Westerberg Tested-by: Pavel Roskin Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-designware-platdrv.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 0a80e4aabaed9..834ddace0cee5 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,22 @@ static u32 i2c_dw_get_clk_rate_khz(struct dw_i2c_dev *dev) } #ifdef CONFIG_ACPI +/* + * The HCNT/LCNT information coming from ACPI should be the most accurate + * for given platform. However, some systems get it wrong. On such systems + * we get better results by calculating those based on the input clock. + */ +static const struct dmi_system_id dw_i2c_no_acpi_params[] = { + { + .ident = "Dell Inspiron 7348", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7348"), + }, + }, + { } +}; + static void dw_i2c_acpi_params(struct platform_device *pdev, char method[], u16 *hcnt, u16 *lcnt, u32 *sda_hold) { @@ -58,6 +75,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[], acpi_handle handle = ACPI_HANDLE(&pdev->dev); union acpi_object *obj; + if (dmi_check_system(dw_i2c_no_acpi_params)) + return; + if (ACPI_FAILURE(acpi_evaluate_object(handle, method, NULL, &buf))) return; From 90e210c2de3e70d92f1b77b707714f0ad5bd4540 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 9 Oct 2015 10:39:24 +0100 Subject: [PATCH 1224/2091] i2c: designware-platdrv: enable RuntimePM before registering to the core commit 36d48fb5766aee9717e429f772046696b215282d upstream. The core may register clients attached to this master which may use funtionality from the master. So, RuntimePM must be enabled before, otherwise this will fail. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Acked-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-designware-platdrv.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 834ddace0cee5..3f7d4876937e7 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -273,12 +273,6 @@ static int dw_i2c_probe(struct platform_device *pdev) adap->dev.parent = &pdev->dev; adap->dev.of_node = pdev->dev.of_node; - r = i2c_add_numbered_adapter(adap); - if (r) { - dev_err(&pdev->dev, "failure adding adapter\n"); - return r; - } - if (dev->pm_runtime_disabled) { pm_runtime_forbid(&pdev->dev); } else { @@ -288,6 +282,13 @@ static int dw_i2c_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); } + r = i2c_add_numbered_adapter(adap); + if (r) { + dev_err(&pdev->dev, "failure adding adapter\n"); + pm_runtime_disable(&pdev->dev); + return r; + } + return 0; } From 65825ff6388e1905ea128cb5341822840ddda62e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 30 Sep 2015 09:05:30 -0700 Subject: [PATCH 1225/2091] workqueue: make sure delayed work run in local cpu commit 874bbfe600a660cba9c776b3957b1ce393151b76 upstream. My system keeps crashing with below message. vmstat_update() schedules a delayed work in current cpu and expects the work runs in the cpu. schedule_delayed_work() is expected to make delayed work run in local cpu. The problem is timer can be migrated with NO_HZ. __queue_work() queues work in timer handler, which could run in a different cpu other than where the delayed work is scheduled. The end result is the delayed work runs in different cpu. The patch makes __queue_delayed_work records local cpu earlier. Where the timer runs doesn't change where the work runs with the change. [ 28.010131] ------------[ cut here ]------------ [ 28.010609] kernel BUG at ../mm/vmstat.c:1392! [ 28.011099] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN [ 28.011860] Modules linked in: [ 28.012245] CPU: 0 PID: 289 Comm: kworker/0:3 Tainted: G W4.3.0-rc3+ #634 [ 28.013065] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153802- 04/01/2014 [ 28.014160] Workqueue: events vmstat_update [ 28.014571] task: ffff880117682580 ti: ffff8800ba428000 task.ti: ffff8800ba428000 [ 28.015445] RIP: 0010:[] []vmstat_update+0x31/0x80 [ 28.016282] RSP: 0018:ffff8800ba42fd80 EFLAGS: 00010297 [ 28.016812] RAX: 0000000000000000 RBX: ffff88011a858dc0 RCX:0000000000000000 [ 28.017585] RDX: ffff880117682580 RSI: ffffffff81f14d8c RDI:ffffffff81f4df8d [ 28.018366] RBP: ffff8800ba42fd90 R08: 0000000000000001 R09:0000000000000000 [ 28.019169] R10: 0000000000000000 R11: 0000000000000121 R12:ffff8800baa9f640 [ 28.019947] R13: ffff88011a81e340 R14: ffff88011a823700 R15:0000000000000000 [ 28.020071] FS: 0000000000000000(0000) GS:ffff88011a800000(0000)knlGS:0000000000000000 [ 28.020071] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 28.020071] CR2: 00007ff6144b01d0 CR3: 00000000b8e93000 CR4:00000000000006f0 [ 28.020071] Stack: [ 28.020071] ffff88011a858dc0 ffff8800baa9f640 ffff8800ba42fe00ffffffff8106bd88 [ 28.020071] ffffffff8106bd0b 0000000000000096 0000000000000000ffffffff82f9b1e8 [ 28.020071] ffffffff829f0b10 0000000000000000 ffffffff81f18460ffff88011a81e340 [ 28.020071] Call Trace: [ 28.020071] [] process_one_work+0x1c8/0x540 [ 28.020071] [] ? process_one_work+0x14b/0x540 [ 28.020071] [] worker_thread+0x114/0x460 [ 28.020071] [] ? process_one_work+0x540/0x540 [ 28.020071] [] kthread+0xf8/0x110 [ 28.020071] [] ?kthread_create_on_node+0x200/0x200 [ 28.020071] [] ret_from_fork+0x3f/0x70 [ 28.020071] [] ?kthread_create_on_node+0x200/0x200 Signed-off-by: Shaohua Li Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 586ad91300b0f..5c01664c26e25 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1451,13 +1451,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, timer_stats_timer_set_start_info(&dwork->timer); dwork->wq = wq; + /* timer isn't guaranteed to run in this cpu, record earlier */ + if (cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); dwork->cpu = cpu; timer->expires = jiffies + delay; - if (unlikely(cpu != WORK_CPU_UNBOUND)) - add_timer_on(timer, cpu); - else - add_timer(timer); + add_timer_on(timer, cpu); } /** From db531487d7f87820533fa37be4e52439034075d1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 2 Oct 2015 14:03:19 +1000 Subject: [PATCH 1226/2091] drm/nouveau/fbcon: take runpm reference when userspace has an open fd commit f231976c2e8964ceaa9250e57d27c35ff03825c2 upstream. We need to do this in order to prevent accesses to the device while it's powered down. Userspace may have an mmap of the fb, and there's no good way (that I know of) to prevent it from touching the device otherwise. This fixes some nasty races between runpm and plymouth on some systems, which result in the GPU getting very upset and hanging the boot. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 6751553abe4af..567791b27d6df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -178,8 +178,30 @@ nouveau_fbcon_sync(struct fb_info *info) return 0; } +static int +nouveau_fbcon_open(struct fb_info *info, int user) +{ + struct nouveau_fbdev *fbcon = info->par; + struct nouveau_drm *drm = nouveau_drm(fbcon->dev); + int ret = pm_runtime_get_sync(drm->dev->dev); + if (ret < 0 && ret != -EACCES) + return ret; + return 0; +} + +static int +nouveau_fbcon_release(struct fb_info *info, int user) +{ + struct nouveau_fbdev *fbcon = info->par; + struct nouveau_drm *drm = nouveau_drm(fbcon->dev); + pm_runtime_put(drm->dev->dev); + return 0; +} + static struct fb_ops nouveau_fbcon_ops = { .owner = THIS_MODULE, + .fb_open = nouveau_fbcon_open, + .fb_release = nouveau_fbcon_release, .fb_check_var = drm_fb_helper_check_var, .fb_set_par = drm_fb_helper_set_par, .fb_fillrect = nouveau_fbcon_fillrect, @@ -195,6 +217,8 @@ static struct fb_ops nouveau_fbcon_ops = { static struct fb_ops nouveau_fbcon_sw_ops = { .owner = THIS_MODULE, + .fb_open = nouveau_fbcon_open, + .fb_release = nouveau_fbcon_release, .fb_check_var = drm_fb_helper_check_var, .fb_set_par = drm_fb_helper_set_par, .fb_fillrect = cfb_fillrect, From 510035b39ccbbbc0e5d9018796d60c009c3becba Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2015 18:51:17 +1000 Subject: [PATCH 1227/2091] drm/dp/mst: make mst i2c transfer code more robust. commit ae491542cbbbcca0ec8938c37d4079a985e58440 upstream. This zeroes the msg so no random stack data ends up getting sent, it also limits the function to not accepting > 4 i2c msgs. Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 ++- include/drm/drm_dp_mst_helper.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 7f467fdc9107a..2a2eb96caedaf 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2766,12 +2766,13 @@ static int drm_dp_mst_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs if (msgs[num - 1].flags & I2C_M_RD) reading = true; - if (!reading) { + if (!reading || (num - 1 > DP_REMOTE_I2C_READ_MAX_TRANSACTIONS)) { DRM_DEBUG_KMS("Unsupported I2C transaction for MST device\n"); ret = -EIO; goto out; } + memset(&msg, 0, sizeof(msg)); msg.req_type = DP_REMOTE_I2C_READ; msg.u.i2c_read.num_transactions = num - 1; msg.u.i2c_read.port_number = port->port_num; diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index 86d0b25ed0547..a89f505c856bc 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -253,6 +253,7 @@ struct drm_dp_remote_dpcd_write { u8 *bytes; }; +#define DP_REMOTE_I2C_READ_MAX_TRANSACTIONS 4 struct drm_dp_remote_i2c_read { u8 num_transactions; u8 port_number; @@ -262,7 +263,7 @@ struct drm_dp_remote_i2c_read { u8 *bytes; u8 no_stop_bit; u8 i2c_transaction_delay; - } transactions[4]; + } transactions[DP_REMOTE_I2C_READ_MAX_TRANSACTIONS]; u8 read_i2c_device_id; u8 num_bytes_read; }; From 17c5761b02dc7c815193d1a66a8a51c860e58e4d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 Oct 2015 09:04:21 +1000 Subject: [PATCH 1228/2091] drm/radeon: attach tile property to mst connector commit bc8c131ccdd62d4ed4f33c6b50f92907e7c32dee upstream. This allows tiled monitors to work with radeon once mst is enabled. Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_dp_mst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index 257b10be5cda9..42986130cc63a 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -283,6 +283,7 @@ static struct drm_connector *radeon_dp_add_mst_connector(struct drm_dp_mst_topol radeon_connector->mst_encoder = radeon_dp_create_fake_mst_encoder(master); drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); + drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); drm_mode_connector_set_path_property(connector, pathprop); drm_reinit_primary_mode_group(dev); From 4e4887f08f2ece9e7f1975af4c3dc797b1ad557b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Sep 2015 16:45:52 -0400 Subject: [PATCH 1229/2091] drm/radeon: add pm sysfs files late commit 51a4726b04e880fdd9b4e0e58b13f70b0a68a7f5 upstream. They were added relatively early in the driver init process which meant that in some cases the driver was not finished initializing before external tools tried to use them which could result in a crash depending on the timing. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_display.c | 14 +----- drivers/gpu/drm/radeon/radeon_pm.c | 63 +++++++++++++++---------- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index d2e9e9efc159c..6743174acdbcd 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1633,18 +1633,8 @@ int radeon_modeset_init(struct radeon_device *rdev) radeon_fbdev_init(rdev); drm_kms_helper_poll_init(rdev->ddev); - if (rdev->pm.dpm_enabled) { - /* do dpm late init */ - ret = radeon_pm_late_init(rdev); - if (ret) { - rdev->pm.dpm_enabled = false; - DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); - } - /* set the dpm state for PX since there won't be - * a modeset to call this. - */ - radeon_pm_compute_clocks(rdev); - } + /* do pm late init */ + ret = radeon_pm_late_init(rdev); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index c1ba83a8dd8c9..948c33105801d 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1331,14 +1331,6 @@ static int radeon_pm_init_old(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->pm.dynpm_idle_work, radeon_dynpm_idle_work_handler); if (rdev->pm.num_power_states > 1) { - /* where's the best place to put these? */ - ret = device_create_file(rdev->dev, &dev_attr_power_profile); - if (ret) - DRM_ERROR("failed to create device file for power profile\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_method); - if (ret) - DRM_ERROR("failed to create device file for power method\n"); - if (radeon_debugfs_pm_init(rdev)) { DRM_ERROR("Failed to register debugfs file for PM!\n"); } @@ -1396,20 +1388,6 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) goto dpm_failed; rdev->pm.dpm_enabled = true; - ret = device_create_file(rdev->dev, &dev_attr_power_dpm_state); - if (ret) - DRM_ERROR("failed to create device file for dpm state\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_dpm_force_performance_level); - if (ret) - DRM_ERROR("failed to create device file for dpm state\n"); - /* XXX: these are noops for dpm but are here for backwards compat */ - ret = device_create_file(rdev->dev, &dev_attr_power_profile); - if (ret) - DRM_ERROR("failed to create device file for power profile\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_method); - if (ret) - DRM_ERROR("failed to create device file for power method\n"); - if (radeon_debugfs_pm_init(rdev)) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); } @@ -1550,9 +1528,44 @@ int radeon_pm_late_init(struct radeon_device *rdev) int ret = 0; if (rdev->pm.pm_method == PM_METHOD_DPM) { - mutex_lock(&rdev->pm.mutex); - ret = radeon_dpm_late_enable(rdev); - mutex_unlock(&rdev->pm.mutex); + if (rdev->pm.dpm_enabled) { + ret = device_create_file(rdev->dev, &dev_attr_power_dpm_state); + if (ret) + DRM_ERROR("failed to create device file for dpm state\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_dpm_force_performance_level); + if (ret) + DRM_ERROR("failed to create device file for dpm state\n"); + /* XXX: these are noops for dpm but are here for backwards compat */ + ret = device_create_file(rdev->dev, &dev_attr_power_profile); + if (ret) + DRM_ERROR("failed to create device file for power profile\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_method); + if (ret) + DRM_ERROR("failed to create device file for power method\n"); + + mutex_lock(&rdev->pm.mutex); + ret = radeon_dpm_late_enable(rdev); + mutex_unlock(&rdev->pm.mutex); + if (ret) { + rdev->pm.dpm_enabled = false; + DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); + } else { + /* set the dpm state for PX since there won't be + * a modeset to call this. + */ + radeon_pm_compute_clocks(rdev); + } + } + } else { + if (rdev->pm.num_power_states > 1) { + /* where's the best place to put these? */ + ret = device_create_file(rdev->dev, &dev_attr_power_profile); + if (ret) + DRM_ERROR("failed to create device file for power profile\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_method); + if (ret) + DRM_ERROR("failed to create device file for power method\n"); + } } return ret; } From 015ec5d44756f18dc887fb9b55288cccb1a659ef Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 13 Oct 2015 12:04:28 -0400 Subject: [PATCH 1230/2091] dm thin: fix missing pool reference count decrement in pool_ctr error path commit ba30670f4d5292c4e7f7980bbd5071f7c4794cdd upstream. Fixes: ac8c3f3df ("dm thin: generate event when metadata threshold passed") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e22e6c892b8a0..7073b22d4cb42 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2959,7 +2959,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) metadata_low_callback, pool); if (r) - goto out_free_pt; + goto out_flags_changed; pt->callbacks.congested_fn = pool_is_congested; dm_table_add_target_callbacks(ti->table, &pt->callbacks); From dd703751ffe7818e43c85f4fea509c2868dce937 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 31 Aug 2015 15:21:39 +0300 Subject: [PATCH 1231/2091] rbd: fix double free on rbd_dev->header_name commit 3ebe138ac642a195c7f2efdb918f464734421fd6 upstream. If rbd_dev_image_probe() in rbd_dev_probe_parent() fails, header_name is freed twice: once in rbd_dev_probe_parent() and then in its caller rbd_dev_image_probe() (rbd_dev_image_probe() is called recursively to handle parent images). rbd_dev_probe_parent() is responsible for probing the parent, so it shouldn't muck with clone's fields. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 010ce0b1f517f..fe8f1e4b4c7c2 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5174,7 +5174,6 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) out_err: if (parent) { rbd_dev_unparent(rbd_dev); - kfree(rbd_dev->header_name); rbd_dev_destroy(parent); } else { rbd_put_client(rbdc); From 3905f7abd0410acdd497661e84c7f6ef672c1e04 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:48 +0200 Subject: [PATCH 1232/2091] sched/preempt: Rename PREEMPT_CHECK_OFFSET to PREEMPT_DISABLE_OFFSET commit 90b62b5129d5cb50f62f40e684de7a1961e57197 upstream. "CHECK" suggests it's only used as a comparison mask. But now it's used further as a config-conditional preempt disabler offset. Lets disambiguate this name. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Mike Galbraith Signed-off-by: Greg Kroah-Hartman --- include/linux/preempt_mask.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h index dbeec4d4a3bea..1f654ee836b7d 100644 --- a/include/linux/preempt_mask.h +++ b/include/linux/preempt_mask.h @@ -72,9 +72,9 @@ #define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_CHECK_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET 1 #else -# define PREEMPT_CHECK_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif /* @@ -90,7 +90,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot @@ -106,7 +106,7 @@ * (used by the scheduler, *after* releasing the kernel lock) */ #define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) #ifdef CONFIG_PREEMPT_COUNT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) From 98197d3de58a62785be3e421864d6145955f197d Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 15 Jul 2015 12:52:04 +0300 Subject: [PATCH 1233/2091] sched/preempt: Fix cond_resched_lock() and cond_resched_softirq() commit fe32d3cd5e8eb0f82e459763374aa80797023403 upstream. These functions check should_resched() before unlocking spinlock/bh-enable: preempt_count always non-zero => should_resched() always returns false. cond_resched_lock() worked iff spin_needbreak is set. This patch adds argument "preempt_offset" to should_resched(). preempt_count offset constants for that: PREEMPT_DISABLE_OFFSET - offset after preempt_disable() PREEMPT_LOCK_OFFSET - offset after spin_lock() SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable() SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh() Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Boris Ostrovsky Cc: David Vrabel Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: bdb438065890 ("sched: Extract the basic add/sub preempt_count modifiers") Link: http://lkml.kernel.org/r/20150715095204.12246.98268.stgit@buzz Signed-off-by: Ingo Molnar Signed-off-by: Mike Galbraith Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/preempt.h | 4 ++-- include/asm-generic/preempt.h | 5 +++-- include/linux/preempt.h | 5 +++-- include/linux/preempt_mask.h | 14 +++++++++++--- include/linux/sched.h | 6 ------ kernel/sched/core.c | 6 +++--- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 8f32718425339..67b6cd00a44f8 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!raw_cpu_read_4(__preempt_count)); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPT diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index eb6f9e6c30756..b6a53e8e526a5 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!preempt_count() && tif_need_resched()); + return unlikely(preempt_count() == preempt_offset && + tif_need_resched()); } #ifdef CONFIG_PREEMPT diff --git a/include/linux/preempt.h b/include/linux/preempt.h index de83b4eb16428..8cd6725c5758c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -20,7 +20,8 @@ #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); -#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) +#define preempt_count_dec_and_test() \ + ({ preempt_count_sub(1); should_resched(0); }) #else #define preempt_count_add(val) __preempt_count_add(val) #define preempt_count_sub(val) __preempt_count_sub(val) @@ -59,7 +60,7 @@ do { \ #define preempt_check_resched() \ do { \ - if (should_resched()) \ + if (should_resched(0)) \ __preempt_schedule(); \ } while (0) diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h index 1f654ee836b7d..5cb25f17331a3 100644 --- a/include/linux/preempt_mask.h +++ b/include/linux/preempt_mask.h @@ -71,12 +71,20 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * The preempt_count offset after preempt_disable(); + */ #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_DISABLE_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET #else -# define PREEMPT_DISABLE_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif +/* + * The preempt_count offset after spin_lock() + */ +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET + /* * The preempt_count offset needed for things like: * @@ -90,7 +98,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734f..61f4f2d5c882b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2834,12 +2834,6 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT_COUNT -#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET -#else -#define PREEMPT_LOCK_OFFSET 0 -#endif - #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8476206a1e19d..4d870eb6086ba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4232,7 +4232,7 @@ SYSCALL_DEFINE0(sched_yield) int __sched _cond_resched(void) { - if (should_resched()) { + if (should_resched(0)) { preempt_schedule_common(); return 1; } @@ -4250,7 +4250,7 @@ EXPORT_SYMBOL(_cond_resched); */ int __cond_resched_lock(spinlock_t *lock) { - int resched = should_resched(); + int resched = should_resched(PREEMPT_LOCK_OFFSET); int ret = 0; lockdep_assert_held(lock); @@ -4272,7 +4272,7 @@ int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); - if (should_resched()) { + if (should_resched(SOFTIRQ_DISABLE_OFFSET)) { local_bh_enable(); preempt_schedule_common(); local_bh_disable(); From e44ddb67769172af7ef7d2b37e6a65df56317698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 23 Sep 2015 16:35:09 +0200 Subject: [PATCH 1234/2091] pinctrl: imx25: ensure that a pin with id i is at position i in the info array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9911a2d5e9d14e39692b751929a92cb5a1d9d0e0 upstream. The code in pinctrl-imx.c only works correctly if in the imx_pinctrl_soc_info passed to imx_pinctrl_probe we have: info->pins[i].number = i conf_reg(info->pins[i]) = 4 * i (which conf_reg(pin) being the offset of the pin's configuration register). When the imx25 specific part was introduced in b4a87c9b966f ("pinctrl: pinctrl-imx: add imx25 pinctrl driver") we had: info->pins[i].number = i + 1 conf_reg(info->pins[i]) = 4 * i . Commit 34027ca2bbc6 ("pinctrl: imx25: fix numbering for pins") tried to fix that but made the situation: info->pins[i-1].number = i conf_reg(info->pins[i-1]) = 4 * i which is hardly better but fixed the error seen back then. So insert another reserved entry in the array to finally yield: info->pins[i].number = i conf_reg(info->pins[i]) = 4 * i Fixes: 34027ca2bbc6 ("pinctrl: imx25: fix numbering for pins") Signed-off-by: Uwe Kleine-König Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/freescale/pinctrl-imx25.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx25.c b/drivers/pinctrl/freescale/pinctrl-imx25.c index faf635654312a..293ed4381cc0e 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx25.c +++ b/drivers/pinctrl/freescale/pinctrl-imx25.c @@ -26,7 +26,8 @@ #include "pinctrl-imx.h" enum imx25_pads { - MX25_PAD_RESERVE0 = 1, + MX25_PAD_RESERVE0 = 0, + MX25_PAD_RESERVE1 = 1, MX25_PAD_A10 = 2, MX25_PAD_A13 = 3, MX25_PAD_A14 = 4, @@ -169,6 +170,7 @@ enum imx25_pads { /* Pad names for the pinmux subsystem */ static const struct pinctrl_pin_desc imx25_pinctrl_pads[] = { IMX_PINCTRL_PIN(MX25_PAD_RESERVE0), + IMX_PINCTRL_PIN(MX25_PAD_RESERVE1), IMX_PINCTRL_PIN(MX25_PAD_A10), IMX_PINCTRL_PIN(MX25_PAD_A13), IMX_PINCTRL_PIN(MX25_PAD_A14), From ae7d500538b57f329614db03ed2e46d1e54d8192 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 1 Oct 2015 13:06:07 +0100 Subject: [PATCH 1235/2091] arm64: Fix THP protection change logic commit 1a541b4e3cd6f5795022514114854b3e1345f24e upstream. 6910fa1 ("arm64: enable PTE type bit in the mask for pte_modify") fixes a problem whereby a large block of PROT_NONE mapped memory is incorrectly mapped as block descriptors when mprotect is called. Unfortunately, a subtle bug was introduced by this fix to the THP logic. If one mmaps a large block of memory, then faults it such that it is collapsed into THPs; resulting calls to mprotect on this area of memory will lead to incorrect table descriptors being written instead of block descriptors. This is because pmd_modify calls pte_modify which is now allowed to modify the type of the page table entry. This patch reverts commit 6910fa16dbe142f6a0fd0fd7c249f9883ff7fc8a, and fixes the problem it was trying to address by adjusting PAGE_NONE to represent a table entry. Thus no change in pte type is required when moving from PROT_NONE to a different protection. Fixes: 6910fa16dbe1 ("arm64: enable PTE type bit in the mask for pte_modify") Cc: # 4.0+ Cc: Feng Kan Reported-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Reviewed-by: Catalin Marinas [SteveC: backported 1a541b4e3cd6f5795022514114854b3e1345f24e to 4.1 and 4.2 stable. Just one minor fix to second part to allow patch to apply cleanly, no logic changed.] Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 56283f8a675c5..cf73194227681 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -80,7 +80,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) @@ -460,7 +460,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From 23a0f8cd351ed6908c24273a174c8c2bd807134f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 28 Sep 2015 16:46:06 -0500 Subject: [PATCH 1236/2091] svcrdma: handle rdma read with a non-zero initial page offset commit c91aed9896946721bb30705ea2904edb3725dd61 upstream. The server rdma_read_chunk_lcl() and rdma_read_chunk_frmr() functions were not taking into account the initial page_offset when determining the rdma read length. This resulted in a read who's starting address and length exceeded the base/bounds of the frmr. The server gets an async error from the rdma device and kills the connection, and the client then reconnects and resends. This repeats indefinitely, and the application hangs. Most work loads don't tickle this bug apparently, but one test hit it every time: building the linux kernel on a 16 core node with 'make -j 16 O=/mnt/0' where /mnt/0 is a ramdisk mounted via NFSRDMA. This bug seems to only be tripped with devices having small fastreg page list depths. I didn't see it with mlx4, for instance. Fixes: 0bf4828983df ('svcrdma: refactor marshalling logic') Signed-off-by: Steve Wise Tested-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index f9f13a32ddb82..2873b8d65608e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -146,7 +146,8 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->read_hdr = head; pages_needed = min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, + rs_length); for (pno = 0; pno < pages_needed; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); @@ -245,7 +246,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, + rs_length); frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; From 0bdb53e1b4b3d99acb7579cf68ffdaee9ebb4e4f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: [PATCH 1237/2091] locks: have flock_lock_file take an inode pointer instead of a filp commit bcd7f78d078ff6197715c1ed070c92aca57ec12c upstream. ...and rename it to better describe how it works. In order to fix a use-after-free in NFS, we need to be able to remove locks from an inode after the filp associated with them may have already been freed. flock_lock_file already only dereferences the filp to get to the inode, so just change it so the callers do that. All of the callers already pass in a lock request that has the fl_file set properly, so we don't need to pass it in individually. With that change it now only dereferences the filp to get to the inode, so just push that out to the callers. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" Cc: William Dauchy Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 653faabb07f46..4366b7c54e6da 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ -static int flock_lock_file(struct file *filp, struct file_lock *request) +static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; - struct inode *inode = file_inode(filp); int error = 0; bool found = false; LIST_HEAD(dispose); @@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, fl_list) { - if (filp != fl->fl_file) + if (request->fl_file != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; @@ -1862,7 +1861,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) int error; might_sleep(); for (;;) { - error = flock_lock_file(filp, fl); + error = flock_lock_inode(file_inode(filp), fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -2401,7 +2400,8 @@ locks_remove_flock(struct file *filp) .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; - struct file_lock_context *flctx = file_inode(filp)->i_flctx; + struct inode *inode = file_inode(filp); + struct file_lock_context *flctx = inode->i_flctx; if (list_empty(&flctx->flc_flock)) return; @@ -2409,7 +2409,7 @@ locks_remove_flock(struct file *filp) if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else - flock_lock_file(filp, &fl); + flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); From b2540f146402c1cf28ea5a84ec5bb1f4c332e59e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: [PATCH 1238/2091] locks: new helpers - flock_lock_inode_wait and posix_lock_inode_wait commit 29d01b22eaa18d8b46091d3c98c6001c49f78e4a upstream. Allow callers to pass in an inode instead of a filp. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" Cc: William Dauchy Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 50 +++++++++++++++++++++++++++++++++++----------- include/linux/fs.h | 14 +++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 4366b7c54e6da..ba268a503c1bc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1163,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Variant of posix_lock_file_wait that does not take a filp, and so can be + * used after the filp has already been torn down. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1188,6 +1187,21 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(posix_lock_inode_wait); + +/** + * posix_lock_file_wait - Apply a POSIX-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a POSIX style lock to a file. + * We merge adjacent & overlapping locks whenever possible. + * POSIX locks are sorted by owner task, then by starting address + */ +int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(posix_lock_file_wait); /** @@ -1850,18 +1864,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_inode(file_inode(filp), fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1873,7 +1887,19 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(flock_lock_inode_wait); +/** + * flock_lock_file_wait - Apply a FLOCK-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a FLOCK style lock to a file. + */ +int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(flock_lock_file_wait); /** diff --git a/include/linux/fs.h b/include/linux/fs.h index f93192333b37f..fed7dea96a156 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1036,11 +1036,13 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); +extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); @@ -1127,6 +1129,12 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } +static inline int posix_lock_inode_wait(struct inode *inode, + struct file_lock *fl) +{ + return -ENOLCK; +} + static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) { return -ENOLCK; @@ -1153,6 +1161,12 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline int flock_lock_inode_wait(struct inode *inode, + struct file_lock *request) +{ + return -ENOLCK; +} + static inline int flock_lock_file_wait(struct file *filp, struct file_lock *request) { From c7fc0d83869f71e89bdc7cb4ee65ff02ac66159f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: [PATCH 1239/2091] locks: inline posix_lock_file_wait and flock_lock_file_wait commit ee296d7c5709440f8abd36b5b65c6b3e388538d9 upstream. They just call file_inode and then the corresponding *_inode_file_wait function. Just make them static inlines instead. Signed-off-by: Jeff Layton Cc: William Dauchy Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 28 ---------------------------- include/linux/fs.h | 32 ++++++++++++++------------------ 2 files changed, 14 insertions(+), 46 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index ba268a503c1bc..d3d558ba4da79 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1189,21 +1189,6 @@ int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) } EXPORT_SYMBOL(posix_lock_inode_wait); -/** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address - */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return posix_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(posix_lock_file_wait); - /** * locks_mandatory_locked - Check for an active lock * @file: the file to check @@ -1889,19 +1874,6 @@ int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) } EXPORT_SYMBOL(flock_lock_inode_wait); -/** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a FLOCK style lock to a file. - */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return flock_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(flock_lock_file_wait); - /** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. diff --git a/include/linux/fs.h b/include/linux/fs.h index fed7dea96a156..fdc369fa69e8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1037,13 +1037,11 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_inode_wait(struct inode *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1135,11 +1133,6 @@ static inline int posix_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return -ENOLCK; -} - static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; @@ -1167,12 +1160,6 @@ static inline int flock_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int flock_lock_file_wait(struct file *filp, - struct file_lock *request) -{ - return -ENOLCK; -} - static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; @@ -1206,6 +1193,20 @@ static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} + +static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} struct fasync_struct { spinlock_t fa_lock; @@ -2005,11 +2006,6 @@ extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); -static inline struct inode *file_inode(const struct file *f) -{ - return f->f_inode; -} - /* /sys/fs */ extern struct kobject *fs_kobj; From 41c4e0825b9d43a5d43a90c050ac9de4387865ba Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: [PATCH 1240/2091] nfs4: have do_vfs_lock take an inode pointer commit 83bfff23e9ed19f37c4ef0bba84e75bd88e5cf21 upstream. Now that we have file locking helpers that can deal with an inode instead of a filp, we can change the NFSv4 locking code to use that instead. This should fix the case where we have a filp that is closed while flock or OFD locks are set on it, and the task is signaled so that it doesn't wait for the LOCKU reply to come in before the filp is freed. At that point we can end up with a use-after-free with the current code, which relies on dereferencing the fl_file in the lock request. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" Cc: William Dauchy Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c245874d7e9d0..8f393fcc313b1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5367,15 +5367,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) +static int do_vfs_lock(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: - res = posix_lock_file_wait(file, fl); + res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: - res = flock_lock_file_wait(file, fl); + res = flock_lock_inode_wait(inode, fl); break; default: BUG(); @@ -5435,7 +5435,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->fl.fl_file, &calldata->fl); + do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; @@ -5543,7 +5543,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + if (do_vfs_lock(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5684,7 +5684,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { + if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -5926,7 +5926,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); if (status < 0) goto out; down_read(&nfsi->rwsem); @@ -5934,7 +5934,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); up_read(&nfsi->rwsem); goto out; } From d906a24a7ab60868a191c25e437f92f48948b5ca Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 15 Jul 2015 12:52:01 +0300 Subject: [PATCH 1241/2091] sched/preempt, xen: Use need_resched() instead of should_resched() commit 0fa2f5cb2b0ecd8d56baa51f35f09aab234eb0bf upstream. This code is used only when CONFIG_PREEMPT=n and only in non-atomic context: xen_in_preemptible_hcall is set only in privcmd_ioctl_hypercall(). Thus preempt_count is zero and should_resched() is equal to need_resched(). Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Boris Ostrovsky Cc: David Vrabel Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150715095201.12246.49283.stgit@buzz Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/xen/preempt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index a1800c150839a..08cb419eb4e63 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); asmlinkage __visible void xen_maybe_preempt_hcall(void) { if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) - && should_resched())) { + && need_resched())) { /* * Clear flag as we may be rescheduled on a different * cpu. From 57293d164ce7c9cc91dd27050f6622edabbe0c02 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 15 Jul 2015 12:52:03 +0300 Subject: [PATCH 1242/2091] sched/preempt, powerpc, kvm: Use need_resched() instead of should_resched() commit c56dadf39761a6157239cac39e3988998c994f98 upstream. Function should_resched() is equal to (!preempt_count() && need_resched()). In preemptive kernel preempt_count here is non-zero because of vc->lock. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Boris Ostrovsky Cc: David Vrabel Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150715095203.12246.72922.stgit@buzz Signed-off-by: Ingo Molnar Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index df81caab73833..f1e0e5522e3a6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2178,7 +2178,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vc->runner = vcpu; if (n_ceded == vc->n_runnable) { kvmppc_vcore_blocked(vc); - } else if (should_resched()) { + } else if (need_resched()) { vc->vcore_state = VCORE_PREEMPT; /* Let something else run */ cond_resched_lock(&vc->lock); From 10f9e3bce7f3ab7ab4d09a9b78c7208c9a1455f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Oct 2015 09:52:28 +0900 Subject: [PATCH 1243/2091] Linux 4.1.12 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c7d877b1c248e..2320f1911404d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 11 +SUBLEVEL = 12 EXTRAVERSION = NAME = Series 4800 From 1b4fe74c9f9af831cef9b9ec27dfdb22802838ef Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 24 Sep 2015 16:59:46 +0200 Subject: [PATCH 1244/2091] ath9k: declare required extra tx headroom commit 029cd0370241641eb70235d205aa0b90c84dce44 upstream. ath9k inserts padding between the 802.11 header and the data area (to align it). Since it didn't declare this extra required headroom, this led to some nasty issues like randomly dropped packets in some setups. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index f8d11efa7b0f1..46a389c20bfc1 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -874,6 +874,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) hw->max_rate_tries = 10; hw->sta_data_size = sizeof(struct ath_node); hw->vif_data_size = sizeof(struct ath_vif); + hw->extra_tx_headroom = 4; hw->wiphy->available_antennas_rx = BIT(ah->caps.max_rxchains) - 1; hw->wiphy->available_antennas_tx = BIT(ah->caps.max_txchains) - 1; From f45118528da06f294cc4aa6867dc431b9fe54001 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Sep 2015 14:36:09 +0200 Subject: [PATCH 1245/2091] iwlwifi: dvm: fix D3 firmware PN programming commit 5bd166872d8f99f156fac191299d24f828bb2348 upstream. The code to send the RX PN data (for each TID) to the firmware has a devastating bug: it overwrites the data for TID 0 with all the TID data, leaving the remaining TIDs zeroed. This will allow replays to actually be accepted by the firmware, which could allow waking up the system. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/lib.c b/drivers/net/wireless/iwlwifi/dvm/lib.c index 1d2223df5cb01..e7d3566c714bc 100644 --- a/drivers/net/wireless/iwlwifi/dvm/lib.c +++ b/drivers/net/wireless/iwlwifi/dvm/lib.c @@ -1022,7 +1022,7 @@ static void iwlagn_wowlan_program_keys(struct ieee80211_hw *hw, u8 *pn = seq.ccmp.pn; ieee80211_get_key_rx_seq(key, i, &seq); - aes_sc->pn = cpu_to_le64( + aes_sc[i].pn = cpu_to_le64( (u64)pn[5] | ((u64)pn[4] << 8) | ((u64)pn[3] << 16) | From 02fe6ec79d4ddd0624406a49af1939c99191f6d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Sep 2015 10:47:27 +0200 Subject: [PATCH 1246/2091] iwlwifi: fix firmware filename for 3160 commit b5a48134f8af08f5243328f8a0b05fc5ae7cf343 upstream. The MODULE_FIRMWARE() for 3160 should be using the 7260 version as it's done in the device configuration struct instead of referencing IWL3160_UCODE_API_OK which doesn't even exist. Reported-by: Hauke Mehrtens Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/iwl-7000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 74ad278116be3..fd83e30eaf00b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -325,6 +325,6 @@ const struct iwl_cfg iwl7265d_n_cfg = { }; MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); -MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); +MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); From 0f4e2e7765c5cd0f0fd84a943364e6a44b84b706 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 2 Oct 2015 11:44:30 -0500 Subject: [PATCH 1247/2091] rtlwifi: rtl8821ae: Fix system lockups on boot commit 54328e64047a54b8fc2362c2e1f0fa16c90f739f upstream. In commit 1277fa2ab2f9 ("rtlwifi: Remove the clear interrupt routine from all drivers"), the code that cleared all interrupt enable bits before setting them was removed for all PCI drivers. This fixed an issue that caused TX to be blocked for 3-5 seconds. On some RTL8821AE units, this change causes soft lockups to occur on boot. For that reason, the portion of the earlier commit that applied to rtl8821ae is reverted. Kernels 4.1 and newer are affected. See http://marc.info/?l=linux-wireless&m=144373370103285&w=2 and https://bugzilla.opensuse.org/show_bug.cgi?id=944978 for two cases where this regression affected user systems. Note that this bug does not appear on any of the developer's setups. For those users whose systems are affected by the TX blockage, but do not lock up on boot, a module parameter is added to disable the interrupt clear Fixes: 1277fa2ab2f9 ("rtlwifi: Remove the clear interrupt routine from all drivers") Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/rtlwifi/pci.h | 2 ++ drivers/net/wireless/rtlwifi/rtl8821ae/hw.c | 17 +++++++++++++++++ drivers/net/wireless/rtlwifi/rtl8821ae/sw.c | 5 +++++ drivers/net/wireless/rtlwifi/wifi.h | 3 +++ 4 files changed, 27 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/pci.h b/drivers/net/wireless/rtlwifi/pci.h index d4567d12e07eb..5da6703942d9d 100644 --- a/drivers/net/wireless/rtlwifi/pci.h +++ b/drivers/net/wireless/rtlwifi/pci.h @@ -247,6 +247,8 @@ struct rtl_pci { /* MSI support */ bool msi_support; bool using_msi; + /* interrupt clear before set */ + bool int_clear; }; struct mp_adapter { diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c index 3fa2fb7c8e4e9..76e52dfb2be5d 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c @@ -2253,11 +2253,28 @@ void rtl8821ae_set_qos(struct ieee80211_hw *hw, int aci) } } +static void rtl8821ae_clear_interrupt(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + u32 tmp = rtl_read_dword(rtlpriv, REG_HISR); + + rtl_write_dword(rtlpriv, REG_HISR, tmp); + + tmp = rtl_read_dword(rtlpriv, REG_HISRE); + rtl_write_dword(rtlpriv, REG_HISRE, tmp); + + tmp = rtl_read_dword(rtlpriv, REG_HSISR); + rtl_write_dword(rtlpriv, REG_HSISR, tmp); +} + void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + if (!rtlpci->int_clear) + rtl8821ae_clear_interrupt(hw);/*clear it here first*/ + rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c index a4988121e1ab6..8ee141a55bc5c 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c @@ -96,6 +96,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtl8821ae_bt_reg_init(hw); rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpci->int_clear = rtlpriv->cfg->mod_params->int_clear; rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); rtlpriv->dm.dm_initialgain_enable = 1; @@ -167,6 +168,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpci->msi_support = rtlpriv->cfg->mod_params->int_clear; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); rtlpriv->psc.reg_fwctrl_lps = 3; @@ -308,6 +310,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .msi_support = true, + .int_clear = true, .debug = DBG_EMERG, .disable_watchdog = 0, }; @@ -437,6 +440,7 @@ module_param_named(fwlps, rtl8821ae_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl8821ae_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8821ae_mod_params.disable_watchdog, bool, 0444); +module_param_named(int_clear, rtl8821ae_mod_params.int_clear, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); @@ -444,6 +448,7 @@ MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); +MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index 51572912c53dc..f1fa8100f2881 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2233,6 +2233,9 @@ struct rtl_mod_params { /* default 0: 1 means disable */ bool disable_watchdog; + + /* default 0: 1 means do not disable interrupts */ + bool int_clear; }; struct rtl_hal_usbint_cfg { From a55c440ef8c233c97dd1b0ebe095b96f2a93e917 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 31 Aug 2015 11:08:27 +0300 Subject: [PATCH 1248/2091] iwlwifi: mvm: clear csa countdown when AP is stopped commit e9cb0327b26dd7ba43a3b7a05b4b62219decf42d upstream. The csa_countdown flag was not cleared when the AP is stopped. As a result, if the AP was stopped after csa_countdown had started, all the folowing channel switch commands would fail. Fix that by clearing the csa_countdown flag when the AP is stopped. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 60c138a9bf4ff..9779c1e5688c3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -2277,6 +2277,7 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw, iwl_mvm_remove_time_event(mvm, mvmvif, &mvmvif->time_event_data); RCU_INIT_POINTER(mvm->csa_vif, NULL); + mvmvif->csa_countdown = false; } if (rcu_access_pointer(mvm->csa_tx_blocked_vif) == vif) { From 940bea34f5034f94321984e9546f4f4b064d9ded Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Sep 2015 14:36:09 +0200 Subject: [PATCH 1249/2091] iwlwifi: mvm: fix D3 firmware PN programming commit 2cf5eb3ab7bb7f2e3a70edcef236cd62c87db030 upstream. The code to send the RX PN data (for each TID) to the firmware has a devastating bug: it overwrites the data for TID 0 with all the TID data, leaving the remaining TIDs zeroed. This will allow replays to actually be accepted by the firmware, which could allow waking up the system. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/d3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 4310cf102d78e..89d6a6100c881 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -298,12 +298,12 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, u8 *pn = seq.ccmp.pn; ieee80211_get_key_rx_seq(key, i, &seq); - aes_sc->pn = cpu_to_le64((u64)pn[5] | - ((u64)pn[4] << 8) | - ((u64)pn[3] << 16) | - ((u64)pn[2] << 24) | - ((u64)pn[1] << 32) | - ((u64)pn[0] << 40)); + aes_sc[i].pn = cpu_to_le64((u64)pn[5] | + ((u64)pn[4] << 8) | + ((u64)pn[3] << 16) | + ((u64)pn[2] << 24) | + ((u64)pn[1] << 32) | + ((u64)pn[0] << 40)); } data->use_rsc_tsc = true; break; From 4508582e6a831d8256fe6305967d0a8cf13f9758 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 30 Sep 2015 11:19:55 +0300 Subject: [PATCH 1250/2091] iwlwifi: mvm: init card correctly on ctkill exit check commit 1a3fe0b2b6778b7866e2b3f5c9a299d5e9bbd89c upstream. During the CT-kill exit flow, the card is powered up and partially initialized to check if the temperature is already low enough. Unfortunately the init bails early because the CT-kill flag is set. Make the code bail early only for HW RF-kill, as was intended by the author. CT-kill is self-imposed and is not really RF-kill. Fixes: 31b8b343e019 ("iwlwifi: fix RFkill while calibrating") Signed-off-by: Arik Nemtsov Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/fw.c | 4 ++-- drivers/net/wireless/iwlwifi/mvm/mvm.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index df869633f4dd9..1e1c77a597601 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -364,7 +364,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) * abort after reading the nvm in case RF Kill is on, we will complete * the init seq later when RF kill will switch to off */ - if (iwl_mvm_is_radio_killed(mvm)) { + if (iwl_mvm_is_radio_hw_killed(mvm)) { IWL_DEBUG_RF_KILL(mvm, "jump over all phy activities due to RF kill\n"); iwl_remove_notification(&mvm->notif_wait, &calib_wait); @@ -397,7 +397,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) ret = iwl_wait_notification(&mvm->notif_wait, &calib_wait, MVM_UCODE_CALIB_TIMEOUT); - if (ret && iwl_mvm_is_radio_killed(mvm)) { + if (ret && iwl_mvm_is_radio_hw_killed(mvm)) { IWL_DEBUG_RF_KILL(mvm, "RFKILL while calibrating.\n"); ret = 1; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 6af21daaaaef9..83273adfabdd8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -870,6 +870,11 @@ static inline bool iwl_mvm_is_radio_killed(struct iwl_mvm *mvm) test_bit(IWL_MVM_STATUS_HW_CTKILL, &mvm->status); } +static inline bool iwl_mvm_is_radio_hw_killed(struct iwl_mvm *mvm) +{ + return test_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); +} + /* Must be called with rcu_read_lock() held and it can only be * released when mvmsta is not needed anymore. */ From c35a6567e963b129a0258494ef690e29c6fb44ad Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 30 Sep 2015 12:26:23 +0200 Subject: [PATCH 1251/2091] iwlwifi: mvm: flush fw_dump_wk when mvm fails to start commit dbf73d4a8bb8f4e1d1f3edd3be825692279e2ef3 upstream. FW dump may be triggered when running init ucode, for example due to a sysassert. In this case fw_dump_wk may run after mvm is freed, resulting in a kernel panic. Fix it by flushing the work. Fixes: 01b988a708af ("iwlwifi: mvm: allow to collect debug data when restart is disabled") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 2ea01238754eb..8d4f287dca3bc 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -589,6 +589,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, ieee80211_unregister_hw(mvm->hw); iwl_mvm_leds_exit(mvm); out_free: + flush_delayed_work(&mvm->fw_dump_wk); iwl_phy_db_free(mvm->phy_db); kfree(mvm->scan_cmd); if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name) From ebf5cf189b7dfc2e9c563f3e027d4d9719af2c8d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 22 Sep 2015 09:44:39 +0300 Subject: [PATCH 1252/2091] iwlwifi: pci: add a few more PCI subvendor IDs for the 7265 series commit f08f625876476b6c4a87834dc86e3b927f4697d2 upstream. Add 3 new subdevice IDs for the 0x095A device ID and 2 for the 0x095B device ID. Reported-by: Jeremy Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index b185697349223..8b16949a9cb98 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -412,6 +412,11 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095A, 0x5590, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5290, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5490, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5F10, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x5212, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x520A, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9000, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9400, iwl7265_2ac_cfg)}, /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, From 1fe434c0efcc3aecc851649511b9d43e7480b79e Mon Sep 17 00:00:00 2001 From: Christian Zander Date: Wed, 10 Jun 2015 09:41:45 -0700 Subject: [PATCH 1253/2091] iommu/vt-d: fix range computation when making room for large pages commit ba2374fd2bf379f933773811fdb06cb6a5445f41 upstream. In preparation for the installation of a large page, any small page tables that may still exist in the target IOV address range are removed. However, if a scatter/gather list entry is large enough to fit more than one large page, the address space for any subsequent large pages is not cleared of conflicting small page tables. This can cause legitimate mapping requests to fail with errors of the form below, potentially followed by a series of IOMMU faults: ERROR: DMA PTE for vPFN 0xfde00 already set (to 7f83a4003 not 7e9e00083) In this example, a 4MiB scatter/gather list entry resulted in the successful installation of a large page @ vPFN 0xfdc00, followed by a failed attempt to install another large page @ vPFN 0xfde00, due to the presence of a pointer to a small page table @ 0x7f83a4000. To address this problem, compute the number of large pages that fit into a given scatter/gather list entry, and use it to derive the last vPFN covered by the large page(s). Signed-off-by: Christian Zander Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c23427951ec1c..8b0178db6a045 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2033,15 +2033,19 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; /* It is large page*/ if (largepage_lvl > 1) { + unsigned long nr_superpages, end_pfn; + pteval |= DMA_PTE_LARGE_PAGE; lvl_pages = lvl_to_nr_pages(largepage_lvl); + + nr_superpages = sg_res / lvl_pages; + end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; + /* * Ensure that old small page tables are - * removed to make room for superpage, - * if they exist. + * removed to make room for superpage(s). */ - dma_pte_free_pagetable(domain, iov_pfn, - iov_pfn + lvl_pages - 1); + dma_pte_free_pagetable(domain, iov_pfn, end_pfn); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } From 75deee3f94d33c9858bbc2e7d67f43eaca84f5dc Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 16 Sep 2015 14:10:03 -0500 Subject: [PATCH 1254/2091] iommu/amd: Fix BUG when faulting a PROT_NONE VMA commit d14f6fced5f9360edca5a1325ddb7077aab1203b upstream. handle_mm_fault indirectly triggers a BUG in do_numa_page when given a VMA without read/write/execute access. Check this condition in do_fault. do_fault -> handle_mm_fault -> handle_pte_fault -> do_numa_page mm/memory.c 3147 static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, .... 3159 /* A PROT_NONE fault should not end up here */ 3160 BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))); Signed-off-by: Jay Cornwall Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_v2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 3465faf1809e4..45087c3e5c572 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -508,6 +508,13 @@ static void do_fault(struct work_struct *work) goto out; } + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) { + /* handle_mm_fault would BUG_ON() */ + up_read(&mm->mmap_sem); + handle_fault_error(fault); + goto out; + } + ret = handle_mm_fault(mm, vma, address, write); if (ret & VM_FAULT_ERROR) { /* failed to service fault */ From 11d9bb923f7f15361313e6f52bcd6dac0ebbe117 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Oct 2015 14:59:36 +0200 Subject: [PATCH 1255/2091] iommu/amd: Don't clear DTE flags when modifying it commit cbf3ccd09d683abf1cacd36e3640872ee912d99b upstream. During device assignment/deassignment the flags in the DTE get lost, which might cause spurious faults, for example when the device tries to access the system management range. Fix this by not clearing the flags with the rest of the DTE. Reported-by: G. Richard Bellamy Tested-by: G. Richard Bellamy Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 4 ++-- drivers/iommu/amd_iommu_types.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ca9f4edbb9409..f0fd5352f8ef9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2099,8 +2099,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) static void clear_dte_entry(u16 devid) { /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(devid); } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 05030e523771a..cbfd0f4c46082 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -295,6 +295,7 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +#define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_FLAG_IOTLB (0x01UL << 32) #define DTE_FLAG_GV (0x01ULL << 55) #define DTE_GLX_SHIFT (56) From 29589707855ec7ed432a614ee91e3fab38643233 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 16 Oct 2015 15:53:29 +0530 Subject: [PATCH 1256/2091] powerpc/rtas: Validate rtas.entry before calling enter_rtas() commit 8832317f662c06f5c06e638f57bfe89a71c9b266 upstream. Currently we do not validate rtas.entry before calling enter_rtas(). This leads to a kernel oops when user space calls rtas system call on a powernv platform (see below). This patch adds code to validate rtas.entry before making enter_rtas() call. Oops: Exception in kernel mode, sig: 4 [#1] SMP NR_CPUS=1024 NUMA PowerNV task: c000000004294b80 ti: c0000007e1a78000 task.ti: c0000007e1a78000 NIP: 0000000000000000 LR: 0000000000009c14 CTR: c000000000423140 REGS: c0000007e1a7b920 TRAP: 0e40 Not tainted (3.18.17-340.el7_1.pkvm3_1_0.2400.1.ppc64le) MSR: 1000000000081000 CR: 00000000 XER: 00000000 CFAR: c000000000009c0c SOFTE: 0 NIP [0000000000000000] (null) LR [0000000000009c14] 0x9c14 Call Trace: [c0000007e1a7bba0] [c00000000041a7f4] avc_has_perm_noaudit+0x54/0x110 (unreliable) [c0000007e1a7bd80] [c00000000002ddc0] ppc_rtas+0x150/0x2d0 [c0000007e1a7be30] [c000000000009358] syscall_exit+0x0/0x98 Fixes: 55190f88789a ("powerpc: Add skeleton PowerNV platform") Reported-by: NAGESWARA R. SASTRY Signed-off-by: Vasant Hegde [mpe: Reword change log, trim oops, and add stable + fixes] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/rtas.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index caffb10e7aa3e..5607693f35cf0 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1041,6 +1041,9 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!rtas.entry) + return -EINVAL; + if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0) return -EFAULT; From d347b289bf2767fe6d2c87589c38cc194a6a07c5 Mon Sep 17 00:00:00 2001 From: Adam Richter Date: Fri, 16 Oct 2015 03:33:02 -0700 Subject: [PATCH 1257/2091] drm: fix mutex leak in drm_dp_get_mst_branch_device commit 30730c7f5943b3beace1e29f7f1476e05de3da14 upstream. In Linux 4.3-rc5, there is an error case in drm_dp_get_branch_device that returns without releasing mgr->lock, resulting a spew of kernel messages about a kernel work function possibly having leaked a mutex and presumably more serious adverse consequences later. This patch changes the error to "goto out" to unlock the mutex before returning. [airlied: grabbed from drm-next as it fixes something we've seen] Signed-off-by: Adam J. Richter Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 2a2eb96caedaf..109e776345d39 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1179,17 +1179,18 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ list_for_each_entry(port, &mstb->ports, next) { if (port->port_num == port_num) { - if (!port->mstb) { + mstb = port->mstb; + if (!mstb) { DRM_ERROR("failed to lookup MSTB with lct %d, rad %02x\n", lct, rad[0]); - return NULL; + goto out; } - mstb = port->mstb; break; } } } kref_get(&mstb->kref); +out: mutex_unlock(&mgr->lock); return mstb; } From f7832ff91dd51ad2a43bd37846f9cef15b8c03e6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 29 Sep 2015 21:10:10 -0300 Subject: [PATCH 1258/2091] si2157: Bounds check firmware commit a828d72df216c36e9c40b6c24dc4b17b6f7b5a76 upstream. When reading the firmware and sending commands, the length must be bounds checked to avoid overrunning the size of the command buffer and smashing the stack if the firmware is not in the expected format. Add the proper check. Signed-off-by: Laura Abbott Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/tuners/si2157.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index d74ae26621ca5..c5dbba5b5bc94 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -165,6 +165,10 @@ static int si2157_init(struct dvb_frontend *fe) for (remaining = fw->size; remaining > 0; remaining -= 17) { len = fw->data[fw->size - remaining]; + if (len > SI2157_ARGLEN) { + dev_err(&client->dev, "Bad firmware length\n"); + goto err_release_firmware; + } memcpy(cmd.args, &fw->data[(fw->size - remaining) + 1], len); cmd.wlen = len; cmd.rlen = 1; From 76d6f9644b3c9029e920b315c7187605e7acf260 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 29 Sep 2015 21:10:09 -0300 Subject: [PATCH 1259/2091] si2168: Bounds check firmware commit 47810b4341ac9d2f558894bc5995e6fa2a1298f9 upstream. When reading the firmware and sending commands, the length must be bounds checked to avoid overrunning the size of the command buffer and smashing the stack if the firmware is not in the expected format: si2168 11-0064: found a 'Silicon Labs Si2168-B40' si2168 11-0064: downloading firmware from file 'dvb-demod-si2168-b40-01.fw' si2168 11-0064: firmware download failed -95 Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffffa085708f Add the proper check. Reported-by: Stuart Auchterlonie Reviewed-by: Antti Palosaari Signed-off-by: Laura Abbott Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/si2168.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c index 5db588ebfc24e..391e98395b411 100644 --- a/drivers/media/dvb-frontends/si2168.c +++ b/drivers/media/dvb-frontends/si2168.c @@ -457,6 +457,10 @@ static int si2168_init(struct dvb_frontend *fe) /* firmware is in the new format */ for (remaining = fw->size; remaining > 0; remaining -= 17) { len = fw->data[fw->size - remaining]; + if (len > SI2168_ARGLEN) { + ret = -EINVAL; + break; + } memcpy(cmd.args, &fw->data[(fw->size - remaining) + 1], len); cmd.wlen = len; cmd.rlen = 1; From f4778084b715e649d4b095151cfe30c9067c2a59 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Tue, 6 Oct 2015 00:22:23 -0300 Subject: [PATCH 1260/2091] rtl28xxu: fix control message flaws commit d18ca5b7ceca0e9674cb4bb2ed476b0fcbb23ba2 upstream. Add lock to prevent concurrent access for control message as control message function uses shared buffer. Without the lock there may be remote control polling which messes the buffer causing IO errors. Increase buffer size and add check for maximum supported message length. Link: https://bugzilla.kernel.org/show_bug.cgi?id=103391 Fixes: c56222a6b25c ("[media] rtl28xxu: move usb buffers to state") Signed-off-by: Antti Palosaari Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 15 +++++++++++++-- drivers/media/usb/dvb-usb-v2/rtl28xxu.h | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 895441fe90f7c..e862554952c12 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -34,6 +34,14 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) unsigned int pipe; u8 requesttype; + mutex_lock(&d->usb_mutex); + + if (req->size > sizeof(dev->buf)) { + dev_err(&d->intf->dev, "too large message %u\n", req->size); + ret = -EINVAL; + goto err_mutex_unlock; + } + if (req->index & CMD_WR_FLAG) { /* write */ memcpy(dev->buf, req->data, req->size); @@ -50,14 +58,17 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) dvb_usb_dbg_usb_control_msg(d->udev, 0, requesttype, req->value, req->index, dev->buf, req->size); if (ret < 0) - goto err; + goto err_mutex_unlock; /* read request, copy returned data to return buf */ if (requesttype == (USB_TYPE_VENDOR | USB_DIR_IN)) memcpy(req->data, dev->buf, req->size); + mutex_unlock(&d->usb_mutex); + return 0; -err: +err_mutex_unlock: + mutex_unlock(&d->usb_mutex); dev_dbg(&d->intf->dev, "failed=%d\n", ret); return ret; } diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.h b/drivers/media/usb/dvb-usb-v2/rtl28xxu.h index 1b5d7ffb685e0..1bdeda05d3325 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.h +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.h @@ -69,7 +69,7 @@ struct rtl28xxu_dev { - u8 buf[28]; + u8 buf[128]; u8 chip_id; u8 tuner; char *tuner_name; From 7f075aa594aab632cb6e04859679f72eb9ff8407 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Oct 2015 15:22:31 +0200 Subject: [PATCH 1261/2091] KVM: arm: use GIC support unconditionally commit 4a5d69b73948d0e03cd38d77dc11edb2e707165f upstream. The vgic code on ARM is built for all configurations that enable KVM, but the parent_data field that it references is only present when CONFIG_IRQ_DOMAIN_HIERARCHY is set: virt/kvm/arm/vgic.c: In function 'kvm_vgic_map_phys_irq': virt/kvm/arm/vgic.c:1781:13: error: 'struct irq_data' has no member named 'parent_data' This flag is implied by the GIC driver, and indeed the VGIC code only makes sense if a GIC is present. This changes the CONFIG_KVM symbol to always select GIC, which avoids the issue. Fixes: 662d9715840 ("arm/arm64: KVM: Kill CONFIG_KVM_ARM_{VGIC,TIMER}") Signed-off-by: Arnd Bergmann Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f1f79d1043096..60c1a0f4d67ae 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES + select ARM_GIC select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO From 85cdc32aea927682cfbaf21e1503a024c4356e51 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Tue, 13 Oct 2015 10:10:18 +0200 Subject: [PATCH 1262/2091] ALSA: hda - Fix inverted internal mic on Lenovo G50-80 commit e8d65a8d985271a102f07c7456da5b86c19ffe16 upstream. Add the appropriate quirk to indicate the Lenovo G50-80 has a stereo mic input where one channel has reverse polarity. Alsa-info available at: https://launchpadlibrarian.net/220846272/AlsaInfo.txt BugLink: https://bugs.launchpad.net/bugs/1504778 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 06cc9d57ba3da..488f4c7be33eb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -819,6 +819,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), + SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), From 4d721de616dd7c1b994a66252de98cca798dc8b5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Oct 2015 16:23:55 +0200 Subject: [PATCH 1263/2091] ALSA: hda - Fix deadlock at error in building PCM commit d289619a219dd01e255d7b5e30f9171b25efea48 upstream. The HDA codec driver issues snd_hda_codec_reset() at the error path of PCM build. This was needed in the earlier code base, but the recent rewrite to use the standard bus binding made this a deadlock: modprobe D 0000000000000005 0 720 716 0x00000080 Call Trace: [] schedule+0x3e/0x90 [] schedule_preempt_disabled+0x15/0x20 [] __mutex_lock_slowpath+0xb5/0x120 [] mutex_lock+0x1b/0x30 [] device_release_driver+0x1b/0x30 [] bus_remove_device+0x105/0x180 [] device_del+0x139/0x260 [] snd_hdac_device_unregister+0x25/0x30 [snd_hda_core] [] snd_hda_codec_reset+0x2a/0x70 [snd_hda_codec] [] snd_hda_codec_build_pcms+0x18b/0x1b0 [snd_hda_codec] [] hda_codec_driver_probe+0xbe/0x140 [snd_hda_codec] [] driver_probe_device+0x1f4/0x460 [] __driver_attach+0x90/0xa0 [] bus_for_each_dev+0x64/0xa0 [] driver_attach+0x1e/0x20 [] bus_add_driver+0x1eb/0x280 [] driver_register+0x60/0xe0 [] __hda_codec_driver_register+0x5a/0x60 [snd_hda_codec] [] realtek_driver_init+0x1e/0x1000 [snd_hda_codec_realtek] [] do_one_initcall+0xb3/0x200 [] do_init_module+0x60/0x1f8 [] load_module+0x1653/0x1bd0 [] SYSC_finit_module+0x98/0xc0 [] SyS_finit_module+0xe/0x10 [] entry_SYSCALL_64_fastpath+0x16/0x75 The simple fix is just to remove this call, since we don't need to think about unbinding at there any longer. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=948758 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 36e8f12366371..57197bef5f5b9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3833,10 +3833,8 @@ int snd_hda_codec_build_pcms(struct hda_codec *codec) return -EINVAL; err = snd_hda_codec_parse_pcms(codec); - if (err < 0) { - snd_hda_codec_reset(codec); + if (err < 0) return err; - } /* attach a new PCM streams */ list_for_each_entry(cpcm, &codec->pcm_list_head, list) { From a895124c5b62b4b50ba8d56ac2ffd06f2f4fda20 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 14 Oct 2015 13:31:24 +0100 Subject: [PATCH 1264/2091] ASoC: Add info callback for SX_TLV controls commit 34198710f55b5f359f43e67d9a08fe5aadfbca1b upstream. SX_TLV controls are intended for situations where the register behind the control has some non-zero value indicating the minimum gain and then gains increasing from there and eventually overflowing through zero. Currently every CODEC implementing these controls specifies the minimum as the non-zero value for the minimum and the maximum as the number of gain settings available. This means when the info callback subtracts the minimum value from the maximum value to calculate the number of gain levels available it is actually under reporting the available levels. This patch fixes this issue by adding a new snd_soc_info_volsw_sx callback that does not subtract the minimum value. Fixes: 1d99f2436d0d ("ASoC: core: Rework SOC_DOUBLE_R_SX_TLV add SOC_SINGLE_SX_TLV") Signed-off-by: Charles Keepax Acked-by: Brian Austin Tested-by: Brian Austin Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- include/sound/soc.h | 6 ++++-- sound/soc/soc-ops.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index f6226914acfee..8d948aa9c5c92 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -85,7 +85,7 @@ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ SNDRV_CTL_ELEM_ACCESS_READWRITE, \ .tlv.p = (tlv_array),\ - .info = snd_soc_info_volsw, \ + .info = snd_soc_info_volsw_sx, \ .get = snd_soc_get_volsw_sx,\ .put = snd_soc_put_volsw_sx, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ @@ -155,7 +155,7 @@ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ SNDRV_CTL_ELEM_ACCESS_READWRITE, \ .tlv.p = (tlv_array), \ - .info = snd_soc_info_volsw, \ + .info = snd_soc_info_volsw_sx, \ .get = snd_soc_get_volsw_sx, \ .put = snd_soc_put_volsw_sx, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ @@ -563,6 +563,8 @@ int snd_soc_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_info_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); +int snd_soc_info_volsw_sx(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo); #define snd_soc_info_bool_ext snd_ctl_boolean_mono_info int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 100d92b5b77ef..05977ae1ff2a3 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -206,6 +206,34 @@ int snd_soc_info_volsw(struct snd_kcontrol *kcontrol, } EXPORT_SYMBOL_GPL(snd_soc_info_volsw); +/** + * snd_soc_info_volsw_sx - Mixer info callback for SX TLV controls + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to provide information about a single mixer control, or a double + * mixer control that spans 2 registers of the SX TLV type. SX TLV controls + * have a range that represents both positive and negative values either side + * of zero but without a sign bit. + * + * Returns 0 for success. + */ +int snd_soc_info_volsw_sx(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + + snd_soc_info_volsw(kcontrol, uinfo); + /* Max represents the number of levels in an SX control not the + * maximum value, so add the minimum value back on + */ + uinfo->value.integer.max += mc->min; + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_info_volsw_sx); + /** * snd_soc_get_volsw - single mixer get callback * @kcontrol: mixer control From d4fde24fbd0c536122bce56dce7973a4b74d3c05 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 20 Oct 2015 10:25:58 +0100 Subject: [PATCH 1265/2091] ASoC: wm8904: Correct number of EQ registers commit 97aff2c03a1e4d343266adadb52313613efb027f upstream. There are 24 EQ registers not 25, I suspect this bug came about because the registers start at EQ1 not zero. The bug is relatively harmless as the extra register written is an unused one. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- include/sound/wm8904.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/wm8904.h b/include/sound/wm8904.h index 898be3a8db9ae..6d8f8fba33414 100644 --- a/include/sound/wm8904.h +++ b/include/sound/wm8904.h @@ -119,7 +119,7 @@ #define WM8904_MIC_REGS 2 #define WM8904_GPIO_REGS 4 #define WM8904_DRC_REGS 4 -#define WM8904_EQ_REGS 25 +#define WM8904_EQ_REGS 24 /** * DRC configurations are specified with a label and a set of register From bd69119dc8db8a1768c29e0e6b96a7501f63328a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C5=91v=C3=A1g=C3=B3=2C=20Zolt=C3=A1n?= Date: Mon, 12 Oct 2015 15:13:56 +0100 Subject: [PATCH 1266/2091] x86/efi: Fix multiple GOP device support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8a53554e12e98d1759205afd7b8e9e2ea0936f48 upstream. When multiple GOP devices exists, but none of them implements ConOut, the code should just choose the first GOP (according to the comments). But currently 'fb_base' will refer to the last GOP, while other parameters to the first GOP, which will likely result in a garbled display. I can reliably reproduce this bug using my ASRock Z87M Extreme4 motherboard with CSM and integrated GPU disabled, and two PCIe video cards (NVidia GT640 and GTX980), booting from efi-stub (booting from grub works fine). On the primary display the ASRock logo remains and on the secondary screen it is garbled up completely. Signed-off-by: Kővágó, Zoltán Signed-off-by: Matt Fleming Cc: Linus Torvalds Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1444659236-24837-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/eboot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 0cdc154a22b50..4c3f76b425c10 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -667,6 +667,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, bool conout_found = false; void *dummy = NULL; u32 h = handles[i]; + u32 current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop32); @@ -678,7 +679,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query32(gop32, &info, &size, &fb_base); + status = __gop_query32(gop32, &info, &size, ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -692,6 +693,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, pixel_format = info->pixel_format; pixel_info = info->pixel_information; pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; /* * Once we've found a GOP supporting ConOut, @@ -770,6 +772,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, bool conout_found = false; void *dummy = NULL; u64 h = handles[i]; + u32 current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop64); @@ -781,7 +784,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query64(gop64, &info, &size, &fb_base); + status = __gop_query64(gop64, &info, &size, ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -795,6 +798,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, pixel_format = info->pixel_format; pixel_info = info->pixel_information; pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; /* * Once we've found a GOP supporting ConOut, From 6c0da28df5dac10672efe955eb89051a850008eb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Oct 2015 13:32:21 -0700 Subject: [PATCH 1267/2091] mm: make sendfile(2) killable commit 296291cdd1629c308114504b850dc343eabc2782 upstream. Currently a simple program below issues a sendfile(2) system call which takes about 62 days to complete in my test KVM instance. int fd; off_t off = 0; fd = open("file", O_RDWR | O_TRUNC | O_SYNC | O_CREAT, 0644); ftruncate(fd, 2); lseek(fd, 0, SEEK_END); sendfile(fd, fd, &off, 0xfffffff); Now you should not ask kernel to do a stupid stuff like copying 256MB in 2-byte chunks and call fsync(2) after each chunk but if you do, sysadmin should have a way to stop you. We actually do have a check for fatal_signal_pending() in generic_perform_write() which triggers in this path however because we always succeed in writing something before the check is done, we return value > 0 from generic_perform_write() and thus the information about signal gets lost. Fix the problem by doing the signal check before writing anything. That way generic_perform_write() returns -EINTR, the error gets propagated up and the sendfile loop terminates early. Signed-off-by: Jan Kara Reported-by: Dmitry Vyukov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6bf5e42d560a4..1ffef05f1c1fd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2461,6 +2461,11 @@ ssize_t generic_perform_write(struct file *file, break; } + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); if (unlikely(status < 0)) @@ -2498,10 +2503,6 @@ ssize_t generic_perform_write(struct file *file, written += copied; balance_dirty_pages_ratelimited(mapping); - if (fatal_signal_pending(current)) { - status = -EINTR; - break; - } } while (iov_iter_count(i)); return written ? written : status; From 65bda5f8ba4552e6f16a2c9d94f2b367a4456875 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Oct 2015 13:32:27 -0700 Subject: [PATCH 1268/2091] fault-inject: fix inverted interval/probability values in printk commit bb387002693ed28b2bb0408c5dec65521b71e5f1 upstream. interval displays the probability and vice versa. Fixes: 6adc4a22f20bb ("fault-inject: add ratelimit option") Acked-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/fault-inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index f1cdeb024d172..6a823a53e357b 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -44,7 +44,7 @@ static void fail_dump(struct fault_attr *attr) printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " "space %d, times %d\n", attr->dname, - attr->probability, attr->interval, + attr->interval, attr->probability, atomic_read(&attr->space), atomic_read(&attr->times)); if (attr->verbose > 1) From 930a50a45963c8da0a27fbee73ae462d8ea54aca Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 20 Oct 2015 01:15:39 -0400 Subject: [PATCH 1269/2091] drm/nouveau/gem: return only valid domain when there's only one commit 2a6c521bb41ce862e43db46f52e7681d33e8d771 upstream. On nv50+, we restrict the valid domains to just the one where the buffer was originally created. However after the buffer is evicted to system memory, we might move it back to a different domain that was not originally valid. When sharing the buffer and retrieving its GEM_INFO data, we still want the domain that will be valid for this buffer in a pushbuf, not the one where it currently happens to be. This resolves fdo#92504 and several others. These are due to suspend evicting all buffers, making it more likely that they temporarily end up in the wrong place. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92504 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_gem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 0e690bf19fc9c..58c959265b1a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -227,11 +227,12 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nvkm_vma *vma; - if (nvbo->bo.mem.mem_type == TTM_PL_TT) + if (is_power_of_2(nvbo->valid_domains)) + rep->domain = nvbo->valid_domains; + else if (nvbo->bo.mem.mem_type == TTM_PL_TT) rep->domain = NOUVEAU_GEM_DOMAIN_GART; else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; - rep->offset = nvbo->bo.offset; if (cli->vm) { vma = nouveau_bo_vma_find(nvbo, cli->vm); From 269b571f0d6ac7b2da7f9d4d7ea56ccff1ea998d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 19 Oct 2015 09:30:42 -0400 Subject: [PATCH 1270/2091] drm/radeon/dpm: don't add pwm attributes if DPM is disabled commit 2a7d44f47f53fa1be677f44c73d78b1bcf9c05d9 upstream. PWM fan control is only available with DPM. If DPM disabled, don't expose the PWM fan controls to avoid a crash. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=92524 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 948c33105801d..6dbdebba44f20 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -720,10 +720,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct radeon_device *rdev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* Skip limit attributes if DPM is not enabled */ + /* Skip attributes if DPM is not enabled */ if (rdev->pm.pm_method != PM_METHOD_DPM && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || - attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) + attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; /* Skip fan attributes if fan is not present */ From e634bc30d00ee9830d762dca0cb7707346e721c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Aug 2015 16:08:41 +0100 Subject: [PATCH 1271/2091] drm/i915: Flush pipecontrol post-sync writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40a24488f5250d63341e74b9994159afc4589606 upstream. In order to flush the results from in-batch pipecontrol writes (used for example in glQuery) before declaring the batch complete (and so declaring the query results coherent), we need to set the FlushEnable bit in our flushing pipecontrol. The FlushEnable bit "waits until all previous writes of immediate data from post-sync circles are complete before executing the next command". I get GPU hangs on byt without flushing these writes (running ue4). piglit has examples where the flush is required for correct rendering. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9ab7c1c758aeb..72f1bb8b0499b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1298,6 +1298,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 005b5e04de4d7..b7e20dee64c47 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -342,6 +342,7 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -412,6 +413,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { flags |= PIPE_CONTROL_TLB_INVALIDATE; From 4ba1aed6217900eda26bbe55ded2f927910024d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 7 Oct 2015 22:08:24 +0300 Subject: [PATCH 1272/2091] drm/i915: Restore lost DPLL register write on gen2-4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e7a65aa70bcc1235a44e40ae0da5056525fe081 upstream. We accidentally lost the initial DPLL register write in 1c4e02746147 drm/i915: Fix DVO 2x clock enable on 830M The "three times for luck" hack probably saved us from a total disaster. But anyway, bring the initial write back so that the code actually makes some sense. Reported-and-tested-by: Nick Bowler References: http://mid.gmane.org/CAN_QmVyMaArxYgEcVVsGvsMo7-6ohZr8HmF5VhkkL4i9KOmrhw@mail.gmail.com Cc: Nick Bowler Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f208bbc6d58e2..20ce62949bb98 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1699,6 +1699,8 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) I915_READ(DPLL(!crtc->pipe)) | DPLL_DVO_2X_MODE); } + I915_WRITE(reg, dpll); + /* Wait for the clocks to stabilize. */ POSTING_READ(reg); udelay(150); From 377bc0b544cae329a385a16320e178f00f6dfcde Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 13 Oct 2015 14:22:26 +0100 Subject: [PATCH 1273/2091] drm/i915: Deny wrapping an userptr into a framebuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc917ab43541db3ff66d0136042686d40a1b4c9a upstream. Pinning a userptr onto the hardware raises interesting questions about the lifetime of such a surface as the framebuffer extends that life beyond the client's address space. That is the hardware will need to keep scanning out from the backing storage even after the client wants to remap its address space. As the hardware pins the backing storage, the userptr becomes invalid and this raises a WARN when the clients tries to unmap its address space. The situation can be even more complicated when the buffer is passed between processes, between a client and display server, where the lifetime and hardware access is even more confusing. Deny it. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Tvrtko Ursulin Cc: Michał Winiarski Reviewed-by: Tvrtko Ursulin Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_userptr.c | 5 ++++- drivers/gpu/drm/i915/intel_display.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1719078c763ac..ce175d05260bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -776,7 +776,10 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { * Also note, that the object created here is not currently a "first class" * object, in that several ioctls are banned. These are the CPU access * ioctls: mmap(), pwrite and pread. In practice, you are expected to use - * direct access via your pointer rather than use those ioctls. + * direct access via your pointer rather than use those ioctls. Another + * restriction is that we do not allow userptr surfaces to be pinned to the + * hardware and so we reject any attempt to create a framebuffer out of a + * userptr. * * If you think this is a good interface to use to pass GPU memory between * drivers, please use dma-buf instead. In fact, wherever possible use diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 20ce62949bb98..7b27a114b030c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13214,6 +13214,11 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct drm_i915_gem_object *obj = intel_fb->obj; + if (obj->userptr.mm) { + DRM_DEBUG("attempting to use a userptr for a framebuffer, denied\n"); + return -EINVAL; + } + return drm_gem_handle_create(file, &obj->base, handle); } From 2753f24e284e0fc9f2dbb61be84e1bfea05da1ee Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 23 Oct 2015 10:38:52 -0400 Subject: [PATCH 1274/2091] drm/radeon: don't try to recreate sysfs entries on resume commit 49abb26651167c892393cd9f2ad23df429645ed9 upstream. Fixes a harmless error message caused by: 51a4726b04e880fdd9b4e0e58b13f70b0a68a7f5 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_pm.c | 35 ++++++++++++++++++------------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 46eb0fa75a614..91c3f60f8bac4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1656,6 +1656,7 @@ struct radeon_pm { u8 fan_max_rpm; /* dpm */ bool dpm_enabled; + bool sysfs_initialized; struct radeon_dpm dpm; }; diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 6dbdebba44f20..91764320c56f6 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1533,19 +1533,23 @@ int radeon_pm_late_init(struct radeon_device *rdev) if (rdev->pm.pm_method == PM_METHOD_DPM) { if (rdev->pm.dpm_enabled) { - ret = device_create_file(rdev->dev, &dev_attr_power_dpm_state); - if (ret) - DRM_ERROR("failed to create device file for dpm state\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_dpm_force_performance_level); - if (ret) - DRM_ERROR("failed to create device file for dpm state\n"); - /* XXX: these are noops for dpm but are here for backwards compat */ - ret = device_create_file(rdev->dev, &dev_attr_power_profile); - if (ret) - DRM_ERROR("failed to create device file for power profile\n"); - ret = device_create_file(rdev->dev, &dev_attr_power_method); - if (ret) - DRM_ERROR("failed to create device file for power method\n"); + if (!rdev->pm.sysfs_initialized) { + ret = device_create_file(rdev->dev, &dev_attr_power_dpm_state); + if (ret) + DRM_ERROR("failed to create device file for dpm state\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_dpm_force_performance_level); + if (ret) + DRM_ERROR("failed to create device file for dpm state\n"); + /* XXX: these are noops for dpm but are here for backwards compat */ + ret = device_create_file(rdev->dev, &dev_attr_power_profile); + if (ret) + DRM_ERROR("failed to create device file for power profile\n"); + ret = device_create_file(rdev->dev, &dev_attr_power_method); + if (ret) + DRM_ERROR("failed to create device file for power method\n"); + if (!ret) + rdev->pm.sysfs_initialized = true; + } mutex_lock(&rdev->pm.mutex); ret = radeon_dpm_late_enable(rdev); @@ -1561,7 +1565,8 @@ int radeon_pm_late_init(struct radeon_device *rdev) } } } else { - if (rdev->pm.num_power_states > 1) { + if ((rdev->pm.num_power_states > 1) && + (!rdev->pm.sysfs_initialized)) { /* where's the best place to put these? */ ret = device_create_file(rdev->dev, &dev_attr_power_profile); if (ret) @@ -1569,6 +1574,8 @@ int radeon_pm_late_init(struct radeon_device *rdev) ret = device_create_file(rdev->dev, &dev_attr_power_method); if (ret) DRM_ERROR("failed to create device file for power method\n"); + if (!ret) + rdev->pm.sysfs_initialized = true; } } return ret; From bf1289f0ad3cb1a9fb8d765078aefe94ece856d2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 Oct 2015 10:56:44 -0400 Subject: [PATCH 1275/2091] drm/radeon: fix dpms when driver backlight control is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ae93580ee59c02395c1711d3e6b90546b8137b86 upstream. If driver backlight control is disabled, either by driver parameter or default per-asic setting, revert to the old behavior. Fixes a regression in commit: 4281f46ef839050d2ef60348f661eb463c21cc2e Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index b4ff4c134fbb4..ab5d9c5649b18 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1624,9 +1624,14 @@ radeon_atom_encoder_dpms_avivo(struct drm_encoder *encoder, int mode) } else atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + if (rdev->mode_info.bl_encoder) { + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - atombios_set_backlight_level(radeon_encoder, dig->backlight_level); + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); + } else { + args.ucAction = ATOM_LCD_BLON; + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + } } break; case DRM_MODE_DPMS_STANDBY: @@ -1706,8 +1711,13 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) if (ASIC_IS_DCE4(rdev)) atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0); } - if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - atombios_set_backlight_level(radeon_encoder, dig->backlight_level); + if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { + if (rdev->mode_info.bl_encoder) + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); + else + atombios_dig_transmitter_setup(encoder, + ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + } if (ext_encoder) atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); break; From c5989318286f04f54bfc2549198856b4546d01a5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Oct 2015 14:26:32 -0400 Subject: [PATCH 1276/2091] drm/radeon: move bl encoder assignment into bl init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4cee6a9057d5e13911f0cb6e143d11dc1a3245dd upstream. So that the bl encoder will be null if the GPU does not control the backlight. Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 1 + drivers/gpu/drm/radeon/radeon_encoders.c | 1 - drivers/gpu/drm/radeon/radeon_legacy_encoders.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index ab5d9c5649b18..5be50ef2b30ef 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -237,6 +237,7 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, backlight_update_status(bd); DRM_INFO("radeon atom DIG backlight initialized\n"); + rdev->mode_info.bl_encoder = radeon_encoder; return; diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index ef99917f000d9..c6ee80216cf4a 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -194,7 +194,6 @@ static void radeon_encoder_add_backlight(struct radeon_encoder *radeon_encoder, radeon_atom_backlight_init(radeon_encoder, connector); else radeon_legacy_backlight_init(radeon_encoder, connector); - rdev->mode_info.bl_encoder = radeon_encoder; } } diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index 45715307db717..30de43366eae8 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -441,6 +441,7 @@ void radeon_legacy_backlight_init(struct radeon_encoder *radeon_encoder, backlight_update_status(bd); DRM_INFO("radeon legacy LVDS backlight initialized\n"); + rdev->mode_info.bl_encoder = radeon_encoder; return; From b13d8c95f5e0de81892045ec4d8633ab9fc025fd Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 7 Oct 2015 13:10:54 +0200 Subject: [PATCH 1277/2091] iio: mxs-lradc: Fix temperature offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b94e22805a2224061bb263a82b72e09544a5fbb3 upstream. 0° Kelvin is actually −273.15°C, not -272.15°C. Fix the temperature offset. Also improve the comment explaining the calculation. Reported-by: Janusz Użycki Signed-off-by: Alexandre Belloni Acked-by: Stefan Wahren Acked-by: Marek Vasut Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/mxs-lradc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index d7c5223f1c3e7..2931ea9b75d13 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -919,11 +919,12 @@ static int mxs_lradc_read_raw(struct iio_dev *iio_dev, case IIO_CHAN_INFO_OFFSET: if (chan->type == IIO_TEMP) { /* The calculated value from the ADC is in Kelvin, we - * want Celsius for hwmon so the offset is - * -272.15 * scale + * want Celsius for hwmon so the offset is -273.15 + * The offset is applied before scaling so it is + * actually -213.15 * 4 / 1.012 = -1079.644268 */ - *val = -1075; - *val2 = 691699; + *val = -1079; + *val2 = 644268; return IIO_VAL_INT_PLUS_MICRO; } From 49ffed3bf81c2da2ba1958df661d2f0580a2335c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 2 Sep 2015 21:02:58 +0200 Subject: [PATCH 1278/2091] iio: st_accel: fix interrupt handling on LIS3LV02 commit 61fd56309165d4790f99462d893b099f0b07312a upstream. This accelerometer accidentally either emits a DRDY signal or an IRQ signal. Accidentally I activated the IRQ signal as I thought it was analogous to the interrupt generator on other ST accelerometers. This was wrong. After this patch generic_buffer gives a nice stream of accelerometer readings. Fixes: 3acddf74f807778f "iio: st-sensors: add support for lis3lv02d accelerometer" Cc: Denis CIOCCA Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/st_accel_core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 211b13271c615..2ae7150442fc8 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -149,8 +149,6 @@ #define ST_ACCEL_4_BDU_MASK 0x40 #define ST_ACCEL_4_DRDY_IRQ_ADDR 0x21 #define ST_ACCEL_4_DRDY_IRQ_INT1_MASK 0x04 -#define ST_ACCEL_4_IG1_EN_ADDR 0x21 -#define ST_ACCEL_4_IG1_EN_MASK 0x08 #define ST_ACCEL_4_MULTIREAD_BIT true static const struct iio_chan_spec st_accel_12bit_channels[] = { @@ -446,10 +444,6 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .drdy_irq = { .addr = ST_ACCEL_4_DRDY_IRQ_ADDR, .mask_int1 = ST_ACCEL_4_DRDY_IRQ_INT1_MASK, - .ig1 = { - .en_addr = ST_ACCEL_4_IG1_EN_ADDR, - .en_mask = ST_ACCEL_4_IG1_EN_MASK, - }, }, .multi_read_bit = ST_ACCEL_4_MULTIREAD_BIT, .bootime = 2, /* guess */ From d3f3bbe554198c39f2e8e69b92c87444ae4273c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 8 Aug 2015 22:16:42 +0300 Subject: [PATCH 1279/2091] iio: accel: sca3000: memory corruption in sca3000_read_first_n_hw_rb() commit eda7d0f38aaf50dbb2a2de15e8db386c4f6f65fc upstream. "num_read" is in byte units but we are write u16s so we end up write twice as much as intended. Signed-off-by: Dan Carpenter Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/accel/sca3000_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/accel/sca3000_ring.c b/drivers/staging/iio/accel/sca3000_ring.c index 8589eade1057e..de65a8730d888 100644 --- a/drivers/staging/iio/accel/sca3000_ring.c +++ b/drivers/staging/iio/accel/sca3000_ring.c @@ -116,7 +116,7 @@ static int sca3000_read_first_n_hw_rb(struct iio_buffer *r, if (ret) goto error_ret; - for (i = 0; i < num_read; i++) + for (i = 0; i < num_read / sizeof(u16); i++) *(((u16 *)rx) + i) = be16_to_cpup((__be16 *)rx + i); if (copy_to_user(buf, rx, num_read)) From 3c38392d5697f28d62a2a910c2ec7aa700d0b2ad Mon Sep 17 00:00:00 2001 From: Ronny Hegewald Date: Thu, 15 Oct 2015 18:50:46 +0000 Subject: [PATCH 1280/2091] rbd: require stable pages if message data CRCs are enabled commit bae818ee1577c27356093901a0ea48f672eda514 upstream. rbd requires stable pages, as it performs a crc of the page data before they are send to the OSDs. But since kernel 3.9 (patch 1d1d1a767206fbe5d4c69493b7e6d2a8d08cc0a0 "mm: only enforce stable page writes if the backing device requires it") it is not assumed anymore that block devices require stable pages. This patch sets the necessary flag to get stable pages back for rbd. In a ceph installation that provides multiple ext4 formatted rbd devices "bad crc" messages appeared regularly (ca 1 message every 1-2 minutes on every OSD that provided the data for the rbd) in the OSD-logs before this patch. After this patch this messages are pretty much gone (only ca 1-2 / month / OSD). Signed-off-by: Ronny Hegewald [idryomov@gmail.com: require stable pages only in crc case, changelog] [idryomov@gmail.com: backport to 3.18-4.2: context] Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index fe8f1e4b4c7c2..4b4d9fce7387e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3797,6 +3797,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_zeroes_data = 1; blk_queue_merge_bvec(q, rbd_merge_bvec); + if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) + q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + disk->queue = q; q->queuedata = rbd_dev; From 10f560cde4edf75af70eb55c0e55b3a04fcb6b31 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 11 Oct 2015 19:38:00 +0200 Subject: [PATCH 1281/2091] rbd: don't leak parent_spec in rbd_dev_probe_parent() commit 1f2c6651f69c14d0d3a9cfbda44ea101b02160ba upstream. Currently we leak parent_spec and trigger a "parent reference underflow" warning if rbd_dev_create() in rbd_dev_probe_parent() fails. The problem is we take the !parent out_err branch and that only drops refcounts; parent_spec that would've been freed had we called rbd_dev_unparent() remains and triggers rbd_warn() in rbd_dev_parent_put() - at that point we have parent_spec != NULL and parent_ref == 0, so counter ends up being -1 after the decrement. Redo rbd_dev_probe_parent() to fix this. Signed-off-by: Ilya Dryomov [idryomov@gmail.com: backport to < 4.2: rbd_dev->opts] Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4b4d9fce7387e..02c1ee2bcce71 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5148,41 +5148,36 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) { struct rbd_device *parent = NULL; - struct rbd_spec *parent_spec; - struct rbd_client *rbdc; int ret; if (!rbd_dev->parent_spec) return 0; - /* - * We need to pass a reference to the client and the parent - * spec when creating the parent rbd_dev. Images related by - * parent/child relationships always share both. - */ - parent_spec = rbd_spec_get(rbd_dev->parent_spec); - rbdc = __rbd_get_client(rbd_dev->rbd_client); - ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec); - if (!parent) + parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec); + if (!parent) { + ret = -ENOMEM; goto out_err; + } + + /* + * Images related by parent/child relationships always share + * rbd_client and spec/parent_spec, so bump their refcounts. + */ + __rbd_get_client(rbd_dev->rbd_client); + rbd_spec_get(rbd_dev->parent_spec); ret = rbd_dev_image_probe(parent, false); if (ret < 0) goto out_err; + rbd_dev->parent = parent; atomic_set(&rbd_dev->parent_ref, 1); - return 0; + out_err: - if (parent) { - rbd_dev_unparent(rbd_dev); + rbd_dev_unparent(rbd_dev); + if (parent) rbd_dev_destroy(parent); - } else { - rbd_put_client(rbdc); - rbd_spec_put(parent_spec); - } - return ret; } From 1e335cfce3d1fe07b754f2efd0781e8d58f1e675 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 11 Oct 2015 19:38:00 +0200 Subject: [PATCH 1282/2091] rbd: prevent kernel stack blow up on rbd map commit 6d69bb536bac0d403d83db1ca841444981b280cd upstream. Mapping an image with a long parent chain (e.g. image foo, whose parent is bar, whose parent is baz, etc) currently leads to a kernel stack overflow, due to the following recursion in the reply path: rbd_osd_req_callback() rbd_obj_request_complete() rbd_img_obj_callback() rbd_img_parent_read_callback() rbd_obj_request_complete() ... Limit the parent chain to 16 images, which is ~5K worth of stack. When the above recursion is eliminated, this limit can be lifted. Fixes: http://tracker.ceph.com/issues/12538 Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin [idryomov@gmail.com: backport to 4.1: rbd_dev->opts] Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 02c1ee2bcce71..1ec6441fe2a5a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v) #define RBD_MINORS_PER_MAJOR 256 #define RBD_SINGLE_MAJOR_PART_SHIFT 4 +#define RBD_MAX_PARENT_CHAIN_LEN 16 + #define RBD_SNAP_DEV_NAME_PREFIX "snap_" #define RBD_MAX_SNAP_NAME_LEN \ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) @@ -425,7 +427,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf, size_t count); -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth); static void rbd_spec_put(struct rbd_spec *spec); static int rbd_dev_id_to_minor(int dev_id) @@ -5145,7 +5147,12 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) +/* + * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() -> + * rbd_dev_image_probe() recursion depth, which means it's also the + * length of the already discovered part of the parent chain. + */ +static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth) { struct rbd_device *parent = NULL; int ret; @@ -5153,6 +5160,12 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) if (!rbd_dev->parent_spec) return 0; + if (++depth > RBD_MAX_PARENT_CHAIN_LEN) { + pr_info("parent chain is too long (%d)\n", depth); + ret = -EINVAL; + goto out_err; + } + parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec); if (!parent) { ret = -ENOMEM; @@ -5166,7 +5179,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) __rbd_get_client(rbd_dev->rbd_client); rbd_spec_get(rbd_dev->parent_spec); - ret = rbd_dev_image_probe(parent, false); + ret = rbd_dev_image_probe(parent, depth); if (ret < 0) goto out_err; @@ -5295,7 +5308,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) * parent), initiate a watch on its header object before using that * object to get detailed information about the rbd image. */ -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { int ret; @@ -5313,7 +5326,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) if (ret) goto err_out_format; - if (mapping) { + if (!depth) { ret = rbd_dev_header_watch_sync(rbd_dev); if (ret) { if (ret == -ENOENT) @@ -5334,7 +5347,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Otherwise this is a parent image, identified by pool, image * and snap ids - need to fill in names for those ids. */ - if (mapping) + if (!depth) ret = rbd_spec_fill_snap_id(rbd_dev); else ret = rbd_spec_fill_names(rbd_dev); @@ -5356,12 +5369,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Need to warn users if this image is the one being * mapped and has a parent. */ - if (mapping && rbd_dev->parent_spec) + if (!depth && rbd_dev->parent_spec) rbd_warn(rbd_dev, "WARNING: kernel layering is EXPERIMENTAL!"); } - ret = rbd_dev_probe_parent(rbd_dev); + ret = rbd_dev_probe_parent(rbd_dev, depth); if (ret) goto err_out_probe; @@ -5372,7 +5385,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: - if (mapping) + if (!depth) rbd_dev_header_unwatch_sync(rbd_dev); out_header_name: kfree(rbd_dev->header_name); @@ -5437,7 +5450,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, rbdc = NULL; /* rbd_dev now owns this */ spec = NULL; /* rbd_dev now owns this */ - rc = rbd_dev_image_probe(rbd_dev, true); + rc = rbd_dev_image_probe(rbd_dev, 0); if (rc < 0) goto err_out_rbd_dev; From 17a85a76a2cfa579060efcd7c25e0daa134c2746 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 3 Oct 2015 13:03:47 -0700 Subject: [PATCH 1283/2091] ARM: orion: Fix DSA platform device after mvmdio conversion commit d836ace65ee98d7079bc3c5afdbcc0e27dca20a3 upstream. DSA expects the host_dev pointer to be the device structure associated with the MDIO bus controller driver. First commit breaking that was c3a07134e6aa ("mv643xx_eth: convert to use the Marvell Orion MDIO driver"), and then, it got completely under the radar for a while. Reported-by: Frans van de Wiel Fixes: c3a07134e6aa ("mv643xx_eth: convert to use the Marvell Orion MDIO driver") Signed-off-by: Florian Fainelli Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/plat-orion/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index f5b00f41c4f6d..b8b6e22f99875 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -499,7 +499,7 @@ void __init orion_ge00_switch_init(struct dsa_platform_data *d, int irq) d->netdev = &orion_ge00.dev; for (i = 0; i < d->nr_chips; i++) - d->chip[i].host_dev = &orion_ge00_shared.dev; + d->chip[i].host_dev = &orion_ge_mvmdio.dev; orion_switch_device.dev.platform_data = d; platform_device_register(&orion_switch_device); From af50e1769ca3edf7239890059e7877e7513d1098 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Thu, 15 Oct 2015 03:17:08 +0200 Subject: [PATCH 1284/2091] ARM: mvebu: correct a385-db-ap compatible string commit db347f1a5304d68c68c52f19971924b1e5842f3c upstream. This commit enables standby support on Armada 385 DB-AP board, because the PM initalization routine requires "marvell,armada380" compatible string for all Armada 38x-based platforms. Beside the compatible "marvell,armada38x" was wrong and should be fixed in the stable kernels too. [gregory.clement@free-electrons.com: add information, about the fixes] Fixes: e5ee12817e9ea ("ARM: mvebu: Add Armada 385 Access Point Development Board support") Signed-off-by: Marcin Wojtas Signed-off-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/armada-385-db-ap.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts index 7219ac3a3d900..9f730e8e9f872 100644 --- a/arch/arm/boot/dts/armada-385-db-ap.dts +++ b/arch/arm/boot/dts/armada-385-db-ap.dts @@ -46,7 +46,7 @@ / { model = "Marvell Armada 385 Access Point Development Board"; - compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada38x"; + compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada380"; chosen { stdout-path = "serial1:115200n8"; From cad2d6d21eb4f335a03f43da2deaa50a327294b1 Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Tue, 13 Oct 2015 04:32:53 +0900 Subject: [PATCH 1285/2091] ARM: dts: Fix audio card detection on Peach boards commit b8bb9baad27e455c467e8fac47eebadbe765c18f upstream. Since commit 2fad972d45c4 ("ARM: dts: Add mclk entry for Peach boards"), sound card detection is broken on peach boards and gives below errors: [ 3.630457] max98090 7-0010: MAX98091 REVID=0x51 [ 3.634233] max98090 7-0010: use default 2.8v micbias [ 3.640985] snow-audio sound: HiFi <-> 3830000.i2s mapping ok [ 3.645307] max98090 7-0010: Invalid master clock frequency [ 3.650824] snow-audio sound: ASoC: Peach-Pi-I2S-MAX98091 late_probe() failed: -22 [ 3.658914] snow-audio sound: snd_soc_register_card failed (-22) [ 3.664366] snow-audio: probe of sound failed with error -22 This patch adds missing assigned-clocks and assigned-clock-parents for pmu_system_controller node which is used as "mclk" for audio codec. Fixes: 2fad972d45c4 ("ARM: dts: Add mclk entry for Peach boards") Signed-off-by: Alim Akhtar Reviewed-by: Krzysztof Kozlowski Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 5 +++++ arch/arm/boot/dts/exynos5800-peach-pi.dts | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 146e71118a72b..a0ec8bff83ddf 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -915,6 +915,11 @@ }; }; +&pmu_system_controller { + assigned-clocks = <&pmu_system_controller 0>; + assigned-clock-parents = <&clock CLK_FIN_PLL>; +}; + &rtc { status = "okay"; clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 02eb8b15374f3..1171f347878a7 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -878,6 +878,11 @@ }; }; +&pmu_system_controller { + assigned-clocks = <&pmu_system_controller 0>; + assigned-clock-parents = <&clock CLK_FIN_PLL>; +}; + &rtc { status = "okay"; clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>; From 9e21e90e845f80a15ef2b3c9cb9793567e11af27 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 25 Sep 2015 16:02:03 +0300 Subject: [PATCH 1286/2091] ARM: dts: am57xx-beagle-x15: set VDD_SD to always-on commit 7e381ec6a36aa44f15fc1a76e6efb9e2cd942e61 upstream. LDO1 regulator (VDD_SD) is connected to SoC's vddshv8. vddshv8 needs to be kept always powered (see commit 5a0f93c6576a ("ARM: dts: Add am57xx-beagle-x15"), but at the moment VDD_SD is enabled/disabled depending on whether an SD card is inserted or not. This patch sets LDO1 regulator to always-on. This patch has a side effect of fixing another issue, HDMI DDC not working when SD card is not inserted: Why this happens is that the tpd12s015 (HDMI level shifter/ESD protection chip) has LS_OE GPIO input, which needs to be enabled for the HDMI DDC to work. LS_OE comes from gpio6_28. The pin that provides gpio6_28 is powered by vddshv8, and vddshv8 comes from VDD_SD. So when SD card is not inserted, VDD_SD is disabled, and LS_OE stays off. The proper fix for the HDMI DDC issue would be to maybe have the pinctrl framework manage the pin specific power. Apparently this fixes also a third issue (copy paste from Kishon's patch): ldo1_reg in addition to being connected to the io lines is also connected to the card detect line. On card removal, omap_hsmmc driver does a regulator_disable causing card detect line to be pulled down. This raises a card insertion interrupt and once the MMC core detects there is no card inserted, it does a regulator disable which again raises a card insertion interrupt. This happens in a loop causing infinite MMC interrupts. Fixes: 5a0f93c6576a ("ARM: dts: Add am57xx-beagle-x15") Cc: Kishon Vijay Abraham I Signed-off-by: Tomi Valkeinen Reported-by: Louis McCarthy Acked-by: Nishanth Menon Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am57xx-beagle-x15.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index c9df40e5cd3b2..e8397879d0a7e 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -354,11 +354,12 @@ /* SMPS9 unused */ ldo1_reg: ldo1 { - /* VDD_SD */ + /* VDD_SD / VDDSHV8 */ regulator-name = "ldo1"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; + regulator-always-on; }; ldo2_reg: ldo2 { From ec51ec401be08a6216f9a29249f733fcd0e74a17 Mon Sep 17 00:00:00 2001 From: Timo Sigurdsson Date: Tue, 4 Aug 2015 23:08:01 +0200 Subject: [PATCH 1287/2091] ARM: dts: sunxi: Raise minimum CPU voltage for sun7i-a20 to meet SoC specifications commit eaeef1ad9b6ea6df1d1220c254d9563da60cb9d1 upstream. sun7i-a20.dtsi contains a cpufreq operating point at 0.9 volts. The minimum CPU voltage for the Allwinner A20 SoC, however, is 1.0 volts. Thus, raise the voltage for the lowest operating point to 1.0 volts in order to stay within the SoC specifications. It is an undervolted setting that isn't stable across all SoCs and boards out there. Fixes: d96b7161916f ("ARM: dts: sun7i: Add cpu clock reference and operating points to dtsi") Signed-off-by: Timo Sigurdsson Acked-by: Iain Paton Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun7i-a20.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 2b4847c7cbd41..fa36571b755ab 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -111,7 +111,7 @@ 720000 1200000 528000 1100000 312000 1000000 - 144000 900000 + 144000 1000000 >; #cooling-cells = <2>; cooling-min-level = <0>; From 5c11cecb4671b380f0802c1fbe8833aad9d914d2 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Fri, 16 Oct 2015 22:19:06 +0100 Subject: [PATCH 1288/2091] ARM: 8445/1: fix vdsomunge not to depend on glibc specific byteswap.h commit 8a603f91cc4848ab1a0458bc065aa9f64322e123 upstream. If the host toolchain is not glibc based then the arm kernel build fails with HOSTCC arch/arm/vdso/vdsomunge arch/arm/vdso/vdsomunge.c:48:22: fatal error: byteswap.h: No such file or directory Observed: with omap2plus_defconfig and compile on Mac OS X with arm ELF cross-compiler. Reason: byteswap.h is a glibc only header. Solution: replace by private byte-swapping macros (taken from arch/mips/boot/elf2ecoff.c and kindly improved by Russell King) Tested to compile on Mac OS X 10.9.5 host. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Nathan Lynch Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/vdsomunge.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index aedec81d11988..0cebd98cd88c3 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,7 +45,6 @@ * it does. */ -#include #include #include #include @@ -59,6 +58,16 @@ #include #include +#define swab16(x) \ + ((((x) & 0x00ff) << 8) | \ + (((x) & 0xff00) >> 8)) + +#define swab32(x) \ + ((((x) & 0x000000ff) << 24) | \ + (((x) & 0x0000ff00) << 8) | \ + (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0xff000000) << 24)) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define HOST_ORDER ELFDATA2LSB #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -104,17 +113,17 @@ static void cleanup(void) static Elf32_Word read_elf_word(Elf32_Word word, bool swap) { - return swap ? bswap_32(word) : word; + return swap ? swab32(word) : word; } static Elf32_Half read_elf_half(Elf32_Half half, bool swap) { - return swap ? bswap_16(half) : half; + return swap ? swab16(half) : half; } static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap) { - *dst = swap ? bswap_32(val) : val; + *dst = swap ? swab32(val) : val; } int main(int argc, char **argv) From 62f362095ba8f7cc31c8f7a48e07bd9c871a6524 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Wed, 28 Oct 2015 19:00:26 +0100 Subject: [PATCH 1289/2091] ARM: 8449/1: fix bug in vdsomunge swab32 macro commit 38850d786a799c3ff2de0dc1980902c3263698dc upstream. Commit 8a603f91cc48 ("ARM: 8445/1: fix vdsomunge not to depend on glibc specific byteswap.h") unfortunately introduced a bug created but not found during discussion and patch simplification. Reported-by: Efraim Yawitz Signed-off-by: H. Nikolaus Schaller Fixes: 8a603f91cc48 ("ARM: 8445/1: fix vdsomunge not to depend on glibc specific byteswap.h") Signed-off-by: Nathan Lynch Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/vdsomunge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 0cebd98cd88c3..f6455273b2f87 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -66,7 +66,7 @@ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ (((x) & 0x00ff0000) >> 8) | \ - (((x) & 0xff000000) << 24)) + (((x) & 0xff000000) >> 24)) #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define HOST_ORDER ELFDATA2LSB From c538a266b0214f8fcf76b93d374549493c526ae1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 28 Oct 2015 16:56:13 +0000 Subject: [PATCH 1290/2091] Revert "ARM64: unwind: Fix PC calculation" commit 9702970c7bd3e2d6fecb642a190269131d4ac16c upstream. This reverts commit e306dfd06fcb44d21c80acb8e5a88d55f3d1cf63. With this patch applied, we were the only architecture making this sort of adjustment to the PC calculation in the unwinder. This causes problems for ftrace, where the PC values are matched against the contents of the stack frames in the callchain and fail to match any records after the address adjustment. Whilst there has been some effort to change ftrace to workaround this, those patches are not yet ready for mainline and, since we're the odd architecture in this regard, let's just step in line with other architectures (like arch/arm/) for now. Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/stacktrace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 407991bf79f51..ccb6078ed9f20 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -48,11 +48,7 @@ int notrace unwind_frame(struct stackframe *frame) frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - /* - * -4 here because we care about the PC at time of bl, - * not where the return will go. - */ - frame->pc = *(unsigned long *)(fp + 8) - 4; + frame->pc = *(unsigned long *)(fp + 8); return 0; } From f043d45591b6a6fd4ae2888a5dbbb53f8e50153b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 21 Oct 2015 18:36:49 +0100 Subject: [PATCH 1291/2091] dm btree remove: fix a bug when rebalancing nodes after removal commit 2871c69e025e8bc507651d5a9cf81a8a7da9d24b upstream. Commit 4c7e309340ff ("dm btree remove: fix bug in redistribute3") wasn't a complete fix for redistribute3(). The redistribute3 function takes 3 btree nodes and shares out the entries evenly between them. If the three nodes in total contained (MAX_ENTRIES * 3) - 1 entries between them then this was erroneously getting rebalanced as (MAX_ENTRIES - 1) on the left and right, and (MAX_ENTRIES + 1) in the center. Fix this issue by being more careful about calculating the target number of entries for the left and right nodes. Unit tested in userspace using this program: https://github.com/jthornber/redistribute3-test/blob/master/redistribute3_t.c Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree-remove.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 7c0d75547ccf5..92cd09f3c69b5 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -301,11 +301,16 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, { int s; uint32_t max_entries = le32_to_cpu(left->header.max_entries); - unsigned target = (nr_left + nr_center + nr_right) / 3; - BUG_ON(target > max_entries); + unsigned total = nr_left + nr_center + nr_right; + unsigned target_right = total / 3; + unsigned remainder = (target_right * 3) != total; + unsigned target_left = target_right + remainder; + + BUG_ON(target_left > max_entries); + BUG_ON(target_right > max_entries); if (nr_left < nr_right) { - s = nr_left - target; + s = nr_left - target_left; if (s < 0 && nr_center < -s) { /* not enough in central node */ @@ -316,10 +321,10 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, } else shift(left, center, s); - shift(center, right, target - nr_right); + shift(center, right, target_right - nr_right); } else { - s = target - nr_right; + s = target_right - nr_right; if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); @@ -329,7 +334,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, } else shift(center, right, s); - shift(left, center, nr_left - target); + shift(left, center, nr_left - target_left); } *key_ptr(parent, c->index) = center->keys[0]; From 8104ee7b8f8bcfad0114cdad62dd3b4aea5fcf74 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 22 Oct 2015 10:56:40 -0400 Subject: [PATCH 1292/2091] dm btree: fix leak of bufio-backed block in btree_split_beneath error path commit 4dcb8b57df3593dcb20481d9d6cf79d1dc1534be upstream. btree_split_beneath()'s error path had an outstanding FIXME that speaks directly to the potential for _not_ cleaning up a previously allocated bufio-backed block. Fix this by releasing the previously allocated bufio block using unlock_block(). Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index c7726cebc4950..d6e47033b5e0d 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -523,7 +523,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) r = new_block(s->info, &right); if (r < 0) { - /* FIXME: put left */ + unlock_block(s->info, left); return r; } From 2eed4a75bda13e54bd19d00f2c6ed00611d82327 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Fri, 9 Oct 2015 17:14:56 +0200 Subject: [PATCH 1293/2091] Revert "serial: 8250_dma: don't bother DMA with small transfers" commit f967fc8f165fadb72166f2bd4785094b3ca21307 upstream. This reverts commit 9119fba0cfeda6d415c9f068df66838a104b87cb. This commit prevents from sending "big" file using Bluetooth. When sending a lot of data quickly through the Bluetooth interface, and after a variable amount of data sent, transfer fails with error: kernel: [ 415.247453] Bluetooth: hci0 hardware error 0x00 Found on T100TA. After reverting this commit, send works fine for any file size. Signed-off-by: Frederic Danis Fixes: 9119fba0cfed (serial: 8250_dma: don't bother DMA with small transfers) Reviewed-by: Heikki Krogerus Acked-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index 21d01a491405a..e508939daea3f 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -80,10 +80,6 @@ int serial8250_tx_dma(struct uart_8250_port *p) return 0; dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); - if (dma->tx_size < p->port.fifosize) { - ret = -EINVAL; - goto err; - } desc = dmaengine_prep_slave_single(dma->txchan, dma->tx_addr + xmit->tail, From 34a251c6eecaaf5e863ed366089e7ac9a5b78986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 22 Oct 2015 14:24:24 +0200 Subject: [PATCH 1294/2091] USB: qcserial: add Sierra Wireless MC74xx/EM74xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f504ab1888026d15b5be8f9c262bf4ae9cacd177 upstream. New device IDs shamelessly lifted from the vendor driver. Signed-off-by: Bjørn Mork Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index ebcec8cda8584..f49d262e926ba 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -153,6 +153,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx/EM74xx */ + {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx/EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From 783a9cc58acb0fd2d1c34bd620d524f664c47edf Mon Sep 17 00:00:00 2001 From: Hezi Shahmoon Date: Tue, 20 Oct 2015 16:32:24 +0200 Subject: [PATCH 1295/2091] i2c: mv64xxx: really allow I2C offloading commit 0729a04977d497cf66234fd7f900ddcec3ef1c52 upstream. Commit 00d8689b85a7 ("i2c: mv64xxx: rework offload support to fix several problems") completely reworked the offload support, but left a debugging-related "return false" at the beginning of the mv64xxx_i2c_can_offload() function. This has the unfortunate consequence that offloading is in fact never used, which wasn't really the intention. This commit fixes that problem by removing the bogus "return false". Fixes: 00d8689b85a7 ("i2c: mv64xxx: rework offload support to fix several problems") Signed-off-by: Hezi Shahmoon [Thomas: reworked commit log and title.] Signed-off-by: Thomas Petazzoni Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-mv64xxx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 30059c1df2a3b..5801227b97ab0 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -669,8 +669,6 @@ mv64xxx_i2c_can_offload(struct mv64xxx_i2c_data *drv_data) struct i2c_msg *msgs = drv_data->msgs; int num = drv_data->num_msgs; - return false; - if (!drv_data->offload_enabled) return false; From 25713cfc86030924312dec2ba82667f2fb94856b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 12 Oct 2015 11:30:12 +0300 Subject: [PATCH 1296/2091] xhci: handle no ping response error properly commit 3b4739b8951d650becbcd855d7d6f18ac98a9a85 upstream. If a host fails to wake up a isochronous SuperSpeed device from U1/U2 in time for a isoch transfer it will generate a "No ping response error" Host will then move to the next transfer descriptor. Handle this case in the same way as missed service errors, tag the current TD as skipped and handle it on the next transfer event. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ad975a2975ca4..41d7a05f8af4a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2239,6 +2239,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, u32 trb_comp_code; int ret = 0; int td_num = 0; + bool handling_skipped_tds = false; slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags)); xdev = xhci->devs[slot_id]; @@ -2372,6 +2373,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep->skip = true; xhci_dbg(xhci, "Miss service interval error, set skip flag\n"); goto cleanup; + case COMP_PING_ERR: + ep->skip = true; + xhci_dbg(xhci, "No Ping response error, Skip one Isoc TD\n"); + goto cleanup; default: if (xhci_is_vendor_info_code(xhci, trb_comp_code)) { status = 0; @@ -2508,13 +2513,18 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep, &status); cleanup: + + + handling_skipped_tds = ep->skip && + trb_comp_code != COMP_MISSED_INT && + trb_comp_code != COMP_PING_ERR; + /* - * Do not update event ring dequeue pointer if ep->skip is set. - * Will roll back to continue process missed tds. + * Do not update event ring dequeue pointer if we're in a loop + * processing missed tds. */ - if (trb_comp_code == COMP_MISSED_INT || !ep->skip) { + if (!handling_skipped_tds) inc_deq(xhci, xhci->event_ring); - } if (ret) { urb = td->urb; @@ -2549,7 +2559,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, * Process them as short transfer until reach the td pointed by * the event. */ - } while (ep->skip && trb_comp_code != COMP_MISSED_INT); + } while (handling_skipped_tds); return 0; } From 01d369942901be5e54715047d0b88936ced31197 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 12 Oct 2015 11:30:13 +0300 Subject: [PATCH 1297/2091] xhci: Add spurious wakeup quirk for LynxPoint-LP controllers commit fd7cd061adcf5f7503515ba52b6a724642a839c8 upstream. We received several reports of systems rebooting and powering on after an attempted shutdown. Testing showed that setting XHCI_SPURIOUS_WAKEUP quirk in addition to the XHCI_SPURIOUS_REBOOT quirk allowed the system to shutdown as expected for LynxPoint-LP xHCI controllers. Set the quirk back. Note that the quirk was originally introduced for LynxPoint and LynxPoint-LP just for this same reason. See: commit 638298dc66ea ("xhci: Fix spurious wakeups after S5 on Haswell") It was later limited to only concern HP machines as it caused regression on some machines, see both bug and commit: Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=66171 commit 6962d914f317 ("xhci: Limit the spurious wakeup fix only to HP machines") Later it was discovered that the powering on after shutdown was limited to LynxPoint-LP (Haswell-ULT) and that some non-LP HP machine suffered from spontaneous resume from S3 (which should not be related to the SPURIOUS_WAKEUP quirk at all). An attempt to fix this then removed the SPURIOUS_WAKEUP flag usage completely. commit b45abacde3d5 ("xhci: no switching back on non-ULT Haswell") Current understanding is that LynxPoint-LP (Haswell ULT) machines need the SPURIOUS_WAKEUP quirk, otherwise they will restart, and plain Lynxpoint (Haswell) machines may _not_ have the quirk set otherwise they again will restart. Signed-off-by: Laura Abbott Cc: Takashi Iwai Cc: Oliver Neukum [Added more history to commit message -Mathias] Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 2af32e26fafc3..7e5c90eebb9c0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -135,6 +135,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) { xhci->quirks |= XHCI_SPURIOUS_REBOOT; + xhci->quirks |= XHCI_SPURIOUS_WAKEUP; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || From 89d8953e76da25abcc74e0934267f70977e79582 Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Fri, 2 Oct 2015 09:35:01 -0400 Subject: [PATCH 1298/2091] xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing) commit a54c8f0f2d7df525ff997e2afe71866a1a013064 upstream. xen-blkfront will crash if the check to talk_to_blkback() in blkback_changed()(XenbusStateInitWait) returns an error. The driver data is freed and info is set to NULL. Later during the close process via talk_to_blkback's call to xenbus_dev_fatal() the null pointer is passed to and dereference in blkfront_closing. Signed-off-by: Cathy Avery Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 89c7371ab2dcc..42ef86c409b69 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1925,7 +1925,8 @@ static void blkback_changed(struct xenbus_device *dev, break; /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: - blkfront_closing(info); + if (info) + blkfront_closing(info); break; } } From 310b5f12920a7d329cf7258f8488d10d4f65b4e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Aug 2015 10:34:59 +0930 Subject: [PATCH 1299/2091] module: Fix locking in symbol_put_addr() commit 275d7d44d802ef271a42dc87ac091a495ba72fc5 upstream. Poma (on the way to another bug) reported an assertion triggering: [] module_assert_mutex_or_preempt+0x49/0x90 [] __module_address+0x32/0x150 [] __module_text_address+0x16/0x70 [] symbol_put_addr+0x29/0x40 [] dvb_frontend_detach+0x7d/0x90 [dvb_core] Laura Abbott produced a patch which lead us to inspect symbol_put_addr(). This function has a comment claiming it doesn't need to disable preemption around the module lookup because it holds a reference to the module it wants to find, which therefore cannot go away. This is wrong (and a false optimization too, preempt_disable() is really rather cheap, and I doubt any of this is on uber critical paths, otherwise it would've retained a pointer to the actual module anyway and avoided the second lookup). While its true that the module cannot go away while we hold a reference on it, the data structure we do the lookup in very much _CAN_ change while we do the lookup. Therefore fix the comment and add the required preempt_disable(). Reported-by: poma Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell Fixes: a6e6abd575fc ("module: remove module_text_address()") Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index cfc9e843a9240..3b9ff966edb93 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -906,11 +906,15 @@ void symbol_put_addr(void *addr) if (core_kernel_text(a)) return; - /* module_text_address is safe here: we're supposed to have reference - * to module from symbol_get, so it can't go away. */ + /* + * Even though we hold a reference on the module; we still need to + * disable preemption in order to safely traverse the data structure. + */ + preempt_disable(); modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); + preempt_enable(); } EXPORT_SYMBOL_GPL(symbol_put_addr); From ca9ba892ebc734af45a9b61d9bc4169f73f8e379 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 7 Oct 2015 11:03:28 -0500 Subject: [PATCH 1300/2091] PCI: Prevent out of bounds access in numa_node override commit 1266963170f576d4d08e6310b6963e26d3ff9d1e upstream. 63692df103e9 ("PCI: Allow numa_node override via sysfs") didn't check that the numa node provided by userspace is valid. Passing a node number too high would attempt to access invalid memory and trigger a kernel panic. Fixes: 63692df103e9 ("PCI: Allow numa_node override via sysfs") Signed-off-by: Sasha Levin Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 312f23a8429cd..92618686604cb 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -216,7 +216,7 @@ static ssize_t numa_node_store(struct device *dev, if (ret) return ret; - if (!node_online(node)) + if (node >= MAX_NUMNODES || !node_online(node)) return -EINVAL; add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); From a03bd0e033ca7b1d5394ebf17cd6e2f6a3395478 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 24 Aug 2015 15:57:18 +0300 Subject: [PATCH 1301/2091] ovl: free stack of paths in ovl_fill_super commit 0f95502ad84874b3c05fc7cdd9d4d9d5cddf7859 upstream. This fixes small memory leak after mount. Kmemleak report: unreferenced object 0xffff88003683fe00 (size 16): comm "mount", pid 2029, jiffies 4294909563 (age 33.380s) hex dump (first 16 bytes): 20 27 1f bb 00 88 ff ff 40 4b 0f 36 02 88 ff ff '......@K.6.... backtrace: [] create_object+0x124/0x2c0 [] kmemleak_alloc+0x7b/0xc0 [] __kmalloc+0x106/0x340 [] ovl_fill_super+0x389/0x9a0 [overlay] [] mount_nodev+0x54/0xa0 [] ovl_mount+0x18/0x20 [overlay] [] mount_fs+0x43/0x170 [] vfs_kern_mount+0x74/0x170 [] do_mount+0x22d/0xdf0 [] SyS_mount+0x7b/0xc0 [] entry_SYSCALL_64_fastpath+0x12/0x76 [] 0xffffffffffffffff Signed-off-by: Konstantin Khlebnikov Signed-off-by: Miklos Szeredi Fixes: a78d9f0d5d5c ("ovl: support multiple lower layers") Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 33f2d27a6792f..865b8b1bec0c5 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -981,6 +981,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; } + kfree(stack); root_dentry->d_fsdata = oe; From 5c418f1bde8deab2ab636d64d4e8465bc22b0bb8 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 24 Aug 2015 15:57:19 +0300 Subject: [PATCH 1302/2091] ovl: free lower_mnt array in ovl_put_super commit 5ffdbe8bf1e485026e1c7e4714d2841553cf0b40 upstream. This fixes memory leak after umount. Kmemleak report: unreferenced object 0xffff8800ba791010 (size 8): comm "mount", pid 2394, jiffies 4294996294 (age 53.920s) hex dump (first 8 bytes): 20 1c 13 02 00 88 ff ff ....... backtrace: [] create_object+0x124/0x2c0 [] kmemleak_alloc+0x7b/0xc0 [] __kmalloc+0x106/0x340 [] ovl_fill_super+0x55c/0x9b0 [overlay] [] mount_nodev+0x54/0xa0 [] ovl_mount+0x18/0x20 [overlay] [] mount_fs+0x43/0x170 [] vfs_kern_mount+0x74/0x170 [] do_mount+0x22d/0xdf0 [] SyS_mount+0x7b/0xc0 [] entry_SYSCALL_64_fastpath+0x12/0x76 [] 0xffffffffffffffff Signed-off-by: Konstantin Khlebnikov Signed-off-by: Miklos Szeredi Fixes: dd662667e6d3 ("ovl: add mutli-layer infrastructure") Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 865b8b1bec0c5..d74af7f78fec3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -474,6 +474,7 @@ static void ovl_put_super(struct super_block *sb) mntput(ufs->upper_mnt); for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); + kfree(ufs->lower_mnt); kfree(ufs->config.lowerdir); kfree(ufs->config.upperdir); From aa637cda1d17943103bde4263252e5215a1f2805 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Sep 2015 11:45:12 +0100 Subject: [PATCH 1303/2091] ovl: use O_LARGEFILE in ovl_copy_up() commit 0480334fa60488d12ae101a02d7d9e1a3d03d7dd upstream. Open the lower file with O_LARGEFILE in ovl_copy_up(). Pass O_LARGEFILE unconditionally in ovl_copy_up_data() as it's purely for catching 32-bit userspace dealing with a file large enough that it'll be mishandled if the application isn't aware that there might be an integer overflow. Inside the kernel, there shouldn't be any problems. Reported-by: Ulrich Obergfell Signed-off-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/copy_up.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 84d693d374284..b1990ac8fa097 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -81,11 +81,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) if (len == 0) return 0; - old_file = ovl_path_open(old, O_RDONLY); + old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY); if (IS_ERR(old_file)) return PTR_ERR(old_file); - new_file = ovl_path_open(new, O_WRONLY); + new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY); if (IS_ERR(new_file)) { error = PTR_ERR(new_file); goto out_fput; From 7fd58acc9f6f751aebcee8288d020d959d815445 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Sep 2015 11:45:22 +0100 Subject: [PATCH 1304/2091] ovl: fix dentry reference leak commit ab79efab0a0ba01a74df782eb7fa44b044dae8b5 upstream. In ovl_copy_up_locked(), newdentry is leaked if the function exits through out_cleanup as this just to out after calling ovl_cleanup() - which doesn't actually release the ref on newdentry. The out_cleanup segment should instead exit through out2 as certainly newdentry leaks - and possibly upper does also, though this isn't caught given the catch of newdentry. Without this fix, something like the following is seen: BUG: Dentry ffff880023e9eb20{i=f861,n=#ffff880023e82d90} still in use (1) [unmount of tmpfs tmpfs] BUG: Dentry ffff880023ece640{i=0,n=bigfile} still in use (1) [unmount of tmpfs tmpfs] when unmounting the upper layer after an error occurred in copyup. An error can be induced by creating a big file in a lower layer with something like: dd if=/dev/zero of=/lower/a/bigfile bs=65536 count=1 seek=$((0xf000)) to create a large file (4.1G). Overlay an upper layer that is too small (on tmpfs might do) and then induce a copy up by opening it writably. Reported-by: Ulrich Obergfell Signed-off-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/copy_up.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index b1990ac8fa097..871fcb67be974 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -267,7 +267,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, out_cleanup: ovl_cleanup(wdir, newdentry); - goto out; + goto out2; } /* From 250f9c620c988485bea9499146a5c6a58f5b0e2e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 26 Oct 2015 01:50:28 -0700 Subject: [PATCH 1305/2091] Input: alps - only the Dell Latitude D420/430/620/630 have separate stick button bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 195562194aad3a0a3915941077f283bcc6347b9b upstream. commit 92bac83dd79e ("Input: alps - non interleaved V2 dualpoint has separate stick button bits") assumes that all alps v2 non-interleaved dual point setups have the separate stick button bits. Later we limited this to Dell laptops only because of reports that this broke things on non Dell laptops. Now it turns out that this breaks things on the Dell Latitude D600 too. So it seems that only the Dell Latitude D420/430/620/630, which all share the same touchpad / stick combo, have these separate bits. This patch limits the checking of the separate bits to only these models fixing regressions with other models. Reported-and-tested-by: Larry Finger Tested-by: Hans de Goede Signed-off-by: Hans de Goede Acked-By: Pali Rohár Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/alps.c | 48 +++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index bc7eed67998aa..4b9e31a5b3f86 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -100,7 +100,7 @@ static const struct alps_nibble_commands alps_v6_nibble_commands[] = { #define ALPS_FOUR_BUTTONS 0x40 /* 4 direction button present */ #define ALPS_PS2_INTERLEAVED 0x80 /* 3-byte PS/2 packet interleaved with 6-byte ALPS packet */ -#define ALPS_DELL 0x100 /* device is a Dell laptop */ +#define ALPS_STICK_BITS 0x100 /* separate stick button bits */ #define ALPS_BUTTONPAD 0x200 /* device is a clickpad */ static const struct alps_model_info alps_model_data[] = { @@ -159,6 +159,43 @@ static const struct alps_protocol_info alps_v8_protocol_data = { ALPS_PROTO_V8, 0x18, 0x18, 0 }; +/* + * Some v2 models report the stick buttons in separate bits + */ +static const struct dmi_system_id alps_dmi_has_separate_stick_buttons[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Extrapolated from other entries */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D420"), + }, + }, + { + /* Reported-by: Hans de Bruin */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D430"), + }, + }, + { + /* Reported-by: Hans de Goede */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D620"), + }, + }, + { + /* Extrapolated from other entries */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude D630"), + }, + }, +#endif + { } +}; + static void alps_set_abs_params_st(struct alps_data *priv, struct input_dev *dev1); static void alps_set_abs_params_mt(struct alps_data *priv, @@ -253,9 +290,8 @@ static void alps_process_packet_v1_v2(struct psmouse *psmouse) return; } - /* Dell non interleaved V2 dualpoint has separate stick button bits */ - if (priv->proto_version == ALPS_PROTO_V2 && - priv->flags == (ALPS_DELL | ALPS_PASS | ALPS_DUALPOINT)) { + /* Some models have separate stick button bits */ + if (priv->flags & ALPS_STICK_BITS) { left |= packet[0] & 1; right |= packet[0] & 2; middle |= packet[0] & 4; @@ -2544,8 +2580,6 @@ static int alps_set_protocol(struct psmouse *psmouse, priv->byte0 = protocol->byte0; priv->mask0 = protocol->mask0; priv->flags = protocol->flags; - if (dmi_name_in_vendors("Dell")) - priv->flags |= ALPS_DELL; priv->x_max = 2000; priv->y_max = 1400; @@ -2560,6 +2594,8 @@ static int alps_set_protocol(struct psmouse *psmouse, priv->set_abs_params = alps_set_abs_params_st; priv->x_max = 1023; priv->y_max = 767; + if (dmi_check_system(alps_dmi_has_separate_stick_buttons)) + priv->flags |= ALPS_STICK_BITS; break; case ALPS_PROTO_V3: From 4f277ccd28e4525d8c7bbe2de27f2710de8d4368 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Oct 2015 18:23:57 +0800 Subject: [PATCH 1306/2091] crypto: api - Only abort operations on fatal signal commit 3fc89adb9fa4beff31374a4bf50b3d099d88ae83 upstream. Currently a number of Crypto API operations may fail when a signal occurs. This causes nasty problems as the caller of those operations are often not in a good position to restart the operation. In fact there is currently no need for those operations to be interrupted by user signals at all. All we need is for them to be killable. This patch replaces the relevant calls of signal_pending with fatal_signal_pending, and wait_for_completion_interruptible with wait_for_completion_killable, respectively. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ablkcipher.c | 2 +- crypto/algapi.c | 2 +- crypto/api.c | 6 +++--- crypto/crypto_user.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index db201bca15819..523dd10e17512 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -698,7 +698,7 @@ struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name, err: if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } diff --git a/crypto/algapi.c b/crypto/algapi.c index d2627a3d4ed8b..dda720c6ab08d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -337,7 +337,7 @@ static void crypto_wait_for_test(struct crypto_larval *larval) crypto_alg_tested(larval->alg.cra_driver_name, 0); } - err = wait_for_completion_interruptible(&larval->completion); + err = wait_for_completion_killable(&larval->completion); WARN_ON(err); out: diff --git a/crypto/api.c b/crypto/api.c index afe4610afc4b9..bbc147cb5dec8 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -172,7 +172,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) struct crypto_larval *larval = (void *)alg; long timeout; - timeout = wait_for_completion_interruptible_timeout( + timeout = wait_for_completion_killable_timeout( &larval->completion, 60 * HZ); alg = larval->adult; @@ -445,7 +445,7 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) err: if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } @@ -562,7 +562,7 @@ void *crypto_alloc_tfm(const char *alg_name, err: if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 41dfe762b7fba..edf2e3ea17407 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -381,7 +381,7 @@ static struct crypto_alg *crypto_user_aead_alg(const char *name, u32 type, err = PTR_ERR(alg); if (err != -EAGAIN) break; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { err = -EINTR; break; } From 26b70a8981b22f03f677dd569b5c8191ade40848 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Oct 2015 12:09:12 -0400 Subject: [PATCH 1307/2091] md/raid1: submit_bio_wait() returns 0 on success commit 203d27b0226a05202438ddb39ef0ef1acb14a759 upstream. This was introduced with 9e882242c6193ae6f416f2d8d8db0d9126bd996b which changed the return value of submit_bio_wait() to return != 0 on error, but didn't update the caller accordingly. Fixes: 9e882242c6 ("block: Add submit_bio_wait(), remove from md") Reported-by: Bill Kuzeja Signed-off-by: Jes Sorensen Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5ce3cd5c4e1d9..bff6c1c7fecba 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2248,7 +2248,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) From e73ccce0672d430409dd021735d658bdcd78656e Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Oct 2015 12:09:13 -0400 Subject: [PATCH 1308/2091] md/raid10: submit_bio_wait() returns 0 on success commit 681ab4696062f5aa939c9e04d058732306a97176 upstream. This was introduced with 9e882242c6193ae6f416f2d8d8db0d9126bd996b which changed the return value of submit_bio_wait() to return != 0 on error, but didn't update the caller accordingly. Fixes: 9e882242c6 ("block: Add submit_bio_wait(), remove from md") Reported-by: Bill Kuzeja Signed-off-by: Jes Sorensen Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fe0122771642b..adfc83a0f023c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2590,7 +2590,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) choose_data_offset(r10_bio, rdev) + (sector - r10_bio->sector)); wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* Failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) From a89e9e234a84bf9a67586f57f4e027c880439383 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 31 Oct 2015 10:53:50 +1100 Subject: [PATCH 1309/2091] md/raid5: fix locking in handle_stripe_clean_event() commit b8a9d66d043ffac116100775a469f05f5158c16f upstream. After commit 566c09c53455 ("raid5: relieve lock contention in get_active_stripe()") __find_stripe() is called under conf->hash_locks + hash. But handle_stripe_clean_event() calls remove_hash() under conf->device_lock. Under some cirscumstances the hash chain can be circuited, and we get an infinite loop with disabled interrupts and locked hash lock in __find_stripe(). This leads to hard lockup on multiple CPUs and following system crash. I was able to reproduce this behavior on raid6 over 6 ssd disks. The devices_handle_discard_safely option should be set to enable trim support. The following script was used: for i in `seq 1 32`; do dd if=/dev/zero of=large$i bs=10M count=100 & done neilb: original was against a 3.x kernel. I forward-ported to 4.3-rc. This verison is suitable for any kernel since Commit: 59fc630b8b5f ("RAID5: batch adjacent full stripe write") (v4.1+). I'll post a version for earlier kernels to stable. Signed-off-by: Roman Gushchin Fixes: 566c09c53455 ("raid5: relieve lock contention in get_active_stripe()") Signed-off-by: NeilBrown Cc: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 23af6772f1469..0d767e31f455d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3494,6 +3494,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, } if (!discard_pending && test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { + int hash; clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); if (sh->qd_idx >= 0) { @@ -3507,16 +3508,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, * no updated data, so remove it from hash list and the stripe * will be reinitialized */ - spin_lock_irq(&conf->device_lock); unhash: + hash = sh->hash_lock_index; + spin_lock_irq(conf->hash_locks + hash); remove_hash(sh); + spin_unlock_irq(conf->hash_locks + hash); if (head_sh->batch_head) { sh = list_first_entry(&sh->batch_list, struct stripe_head, batch_list); if (sh != head_sh) goto unhash; } - spin_unlock_irq(&conf->device_lock); sh = head_sh; if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) From bd4009424ef161c0d07ca48ce00bf2d81658ca21 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 31 Oct 2015 11:00:56 +1100 Subject: [PATCH 1310/2091] Revert "md: allow a partially recovered device to be hot-added to an array." commit d01552a76d71f9879af448e9142389ee9be6e95b upstream. This reverts commit 7eb418851f3278de67126ea0c427641ab4792c57. This commit is poorly justified, I can find not discusison in email, and it clearly causes a problem. If a device which is being recovered fails and is subsequently re-added to an array, there could easily have been changes to the array *before* the point where the recovery was up to. So the recovery must start again from the beginning. If a spare is being recovered and fails, then when it is re-added we really should do a bitmap-based recovery up to the recovery-offset, and then a full recovery from there. Before this reversion, we only did the "full recovery from there" which is not corect. After this reversion with will do a full recovery from the start, which is safer but not ideal. It will be left to a future patch to arrange the two different styles of recovery. Reported-and-tested-by: Nate Dailey Signed-off-by: NeilBrown Fixes: 7eb418851f32 ("md: allow a partially recovered device to be hot-added to an array.") Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e8c44fcb1ad14..78c1f77e79035 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8013,8 +8013,7 @@ static int remove_and_add_spares(struct mddev *mddev, !test_bit(Bitmap_sync, &rdev->flags))) continue; - if (rdev->saved_raid_disk < 0) - rdev->recovery_offset = 0; + rdev->recovery_offset = 0; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) From e4b49886889e2e3feb166b9718cbcae61c001a34 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Wed, 5 Aug 2015 13:16:01 -0500 Subject: [PATCH 1311/2091] EDAC, sb_edac: Fix TAD presence check for sbridge_mci_bind_devs() commit 2900ea609616c2651dec65312beeb2a6e536bc50 upstream. In commit 7d375bffa524 ("sb_edac: Fix support for systems with two home agents per socket") NUM_CHANNELS was changed to 8 and the channel space was renumerated to handle EN, EP, and EX configurations. The *_mci_bind_devs() functions - except for sbridge_mci_bind_devs() - got a new device presence check in the form of saw_chan_mask. However, sbridge_mci_bind_devs() still uses the NUM_CHANNELS for loop. With the increase in NUM_CHANNELS, this loop fails at index 4 since SB only has 4 TADs. This results in the following error on SB machines: EDAC sbridge: Some needed devices are missing EDAC sbridge: Couldn't find mci handler EDAC sbridge: Couldn't find mci handle This patch adapts the saw_chan_mask logic for sbridge_mci_bind_devs() as well. After this patch: EDAC MC0: Giving out device to module sbridge_edac.c controller Sandy Bridge Socket#0: DEV 0000:3f:0e.0 (POLLED) EDAC MC1: Giving out device to module sbridge_edac.c controller Sandy Bridge Socket#1: DEV 0000:7f:0e.0 (POLLED) Signed-off-by: Seth Jennings Acked-by: Aristeu Rozanski Acked-by: Tony Luck Tested-by: Borislav Petkov Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/1438798561-10180-1-git-send-email-sjenning@redhat.com Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/sb_edac.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 1acf57ba4c86b..cd6b9c72c8ac5 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1608,6 +1608,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, { struct sbridge_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; + u8 saw_chan_mask = 0; int i; for (i = 0; i < sbridge_dev->n_devs; i++) { @@ -1641,6 +1642,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, { int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0; pvt->pci_tad[id] = pdev; + saw_chan_mask |= 1 << id; } break; case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO: @@ -1661,10 +1663,8 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) goto enodev; - for (i = 0; i < NUM_CHANNELS; i++) { - if (!pvt->pci_tad[i]) - goto enodev; - } + if (saw_chan_mask != 0x0f) + goto enodev; return 0; enodev: From f37c8c0acecff7edaec3bdd5ea84765e2058edb6 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Sun, 25 Oct 2015 16:39:12 +0100 Subject: [PATCH 1312/2091] irqchip/tegra: Propagate IRQ type setting to parent commit 209da39154837ec1b69fb34f438041939911e4b4 upstream. The LIC doesn't deal with the different types of interrupts itself but needs to forward calls to set the appropriate type to its parent IRQ controller. Without this fix all IRQs routed through the LIC will stay at the initial EDGE type, while most of them should actually be level triggered. Fixes: 1eec582158e2 "irqchip: tegra: Add Tegra210 support" Signed-off-by: Lucas Stach Cc: Stephen Warren Cc: Thierry Reding Cc: Alexandre Courbot Cc: Jason Cooper Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1445787552-13062-1-git-send-email-dev@lynxeye.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-tegra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c index f67bbd80433e8..ab5353a96a820 100644 --- a/drivers/irqchip/irq-tegra.c +++ b/drivers/irqchip/irq-tegra.c @@ -215,6 +215,7 @@ static struct irq_chip tegra_ictlr_chip = { .irq_unmask = tegra_unmask, .irq_retrigger = tegra_retrigger, .irq_set_wake = tegra_set_wake, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, From e12243dc14b81bc5c4e6e82b0b9ff9911a060a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= Date: Fri, 21 Aug 2015 07:29:22 +0300 Subject: [PATCH 1313/2091] mvsas: Fix NULL pointer dereference in mvs_slot_task_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2280521719e81919283b82902ac24058f87dfc1b upstream. When pci_pool_alloc fails in mvs_task_prep then task->lldd_task stays NULL but it's later used in mvs_abort_task as slot which is passed to mvs_slot_task_free causing NULL pointer dereference. Just return from mvs_slot_task_free when passed with NULL slot. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=101891 Signed-off-by: Dāvis Mosāns Reviewed-by: Tomas Henzl Reviewed-by: Johannes Thumshirn Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mvsas/mv_sas.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 454536c49315d..9c780740fb829 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -887,6 +887,8 @@ static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc) static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task, struct mvs_slot_info *slot, u32 slot_idx) { + if (!slot) + return; if (!slot->task) return; if (!sas_protocol_ata(task->task_proto)) From d35e462164987724e4a1d20a55351e52181464aa Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 3 Mar 2015 09:52:20 +0100 Subject: [PATCH 1314/2091] MFD/OF: document MFD devices and handle simple-mfd commit 22869a9eca4ea5b534538d160b68c7aef44e378a upstream. This defines a new compatible option for MFD devices "simple-mfd" that will make the OF core spawn child devices for all subnodes of that MFD device. It is optional but handy for things like syscon and possibly other simpler MFD devices. Since there was no file to put the documentation in, I took this opportunity to make a small writeup on MFD devices and add the compatible definition there. Suggested-by: Lee Jones Acked-by: Lee Jones Acked-by: Antoine Tenart Acked-by: Alexandre Belloni Cc: Arnd Bergmann Cc: Devicetree Cc: Rob Herring Cc: Benjamin Herrenschmidt Cc: Grant Likely Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Signed-off-by: Linus Walleij Cc: Henrik Juul Pedersen Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/mfd/mfd.txt | 41 +++++++++++++++++++ drivers/of/platform.c | 1 + 2 files changed, 42 insertions(+) create mode 100644 Documentation/devicetree/bindings/mfd/mfd.txt diff --git a/Documentation/devicetree/bindings/mfd/mfd.txt b/Documentation/devicetree/bindings/mfd/mfd.txt new file mode 100644 index 0000000000000..af9d6931a1a25 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/mfd.txt @@ -0,0 +1,41 @@ +Multi-Function Devices (MFD) + +These devices comprise a nexus for heterogeneous hardware blocks containing +more than one non-unique yet varying hardware functionality. + +A typical MFD can be: + +- A mixed signal ASIC on an external bus, sometimes a PMIC (Power Management + Integrated Circuit) that is manufactured in a lower technology node (rough + silicon) that handles analog drivers for things like audio amplifiers, LED + drivers, level shifters, PHY (physical interfaces to things like USB or + ethernet), regulators etc. + +- A range of memory registers containing "miscellaneous system registers" also + known as a system controller "syscon" or any other memory range containing a + mix of unrelated hardware devices. + +Optional properties: + +- compatible : "simple-mfd" - this signifies that the operating system should + consider all subnodes of the MFD device as separate devices akin to how + "simple-bus" inidicates when to see subnodes as children for a simple + memory-mapped bus. For more complex devices, when the nexus driver has to + probe registers to figure out what child devices exist etc, this should not + be used. In the latter case the child devices will be determined by the + operating system. + +Example: + +foo@1000 { + compatible = "syscon", "simple-mfd"; + reg = <0x01000 0x1000>; + + led@08.0 { + compatible = "register-bit-led"; + offset = <0x08>; + mask = <0x01>; + label = "myled"; + default-state = "on"; + }; +}; diff --git a/drivers/of/platform.c b/drivers/of/platform.c index a01f57c9e34ea..ddf8e42c9367d 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -25,6 +25,7 @@ const struct of_device_id of_default_bus_match_table[] = { { .compatible = "simple-bus", }, + { .compatible = "simple-mfd", }, #ifdef CONFIG_ARM_AMBA { .compatible = "arm,amba-bus", }, #endif /* CONFIG_ARM_AMBA */ From ee03d02ebc5dd0ee414fc47eb3992e8d3aa7396e Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Wed, 21 Oct 2015 00:50:06 +0200 Subject: [PATCH 1315/2091] btrfs: fix possible leak in btrfs_ioctl_balance() commit 0f89abf56abbd0e1c6e3cef9813e6d9f05383c1e upstream. Commit 8eb934591f8b ("btrfs: check unsupported filters in balance arguments") adds a jump to exit label out_bargs in case the argument check fails. At this point in addition to the bargs memory, the memory for struct btrfs_balance_control has already been allocated. Ownership of bctl is passed to btrfs_balance() in the good case, thus the memory is not freed due to the introduced jump. Make sure that the memory gets freed in any case as necessary. Detected by Coverity CID 1328378. Signed-off-by: Christian Engelmayer Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index af3dd3c55ef18..8b2c82ce36b3a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4494,7 +4494,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { ret = -EINVAL; - goto out_bargs; + goto out_bctl; } do_balance: @@ -4508,12 +4508,15 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) need_unlock = false; ret = btrfs_balance(bctl, bargs); + bctl = NULL; if (arg) { if (copy_to_user(arg, bargs, sizeof(*bargs))) ret = -EFAULT; } +out_bctl: + kfree(bctl); out_bargs: kfree(bargs); out_unlock: From 5a52c0e04133b418583244918b3fda8bd3b87d43 Mon Sep 17 00:00:00 2001 From: Doron Tsur Date: Sun, 11 Oct 2015 15:58:17 +0300 Subject: [PATCH 1316/2091] IB/cm: Fix rb-tree duplicate free and use-after-free commit 0ca81a2840f77855bbad1b9f172c545c4dc9e6a4 upstream. ib_send_cm_sidr_rep could sometimes erase the node from the sidr (depending on errors in the process). Since ib_send_cm_sidr_rep is called both from cm_sidr_req_handler and cm_destroy_id, cm_id_priv could be either erased from the rb_tree twice or not erased at all. Fixing that by making sure it's erased only once before freeing cm_id_priv. Fixes: a977049dacde ('[PATCH] IB: Add the kernel CM implementation') Signed-off-by: Doron Tsur Signed-off-by: Matan Barak Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0271608a51c40..0962b6821ce1f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -859,6 +859,11 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) case IB_CM_SIDR_REQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); + spin_lock_irq(&cm.lock); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) + rb_erase(&cm_id_priv->sidr_id_node, + &cm.remote_sidr_table); + spin_unlock_irq(&cm.lock); break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: @@ -3098,7 +3103,10 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm.lock, flags); - rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); + } spin_unlock_irqrestore(&cm.lock, flags); return 0; From 82696471b9c72e4b996e009bb6b4ee432c175582 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 15 Oct 2015 12:34:21 -0700 Subject: [PATCH 1317/2091] cpufreq: intel_pstate: Fix divide by zero on Knights Landing (KNL) commit 8e601a9f97a00bab031980de34f9a81891c1f82f upstream. This is a workaround for KNL platform, where in some cases MPERF counter will not have updated value before next read of MSR_IA32_MPERF. In this case divide by zero will occur. This change ignores current sample for busy calculation in this case. Fixes: b34ef932d79a (intel_pstate: Knights Landing support) Signed-off-by: Srinivas Pandruvada Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e8d16997c5cbc..1ee2ab58e37d6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -761,6 +761,11 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + if (cpu->prev_mperf == mperf) { + local_irq_restore(flags); + return; + } + local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; From fb1de80c1b15980a0e5109d00fd2ffad30563de3 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 18 Jun 2015 15:41:32 +0100 Subject: [PATCH 1318/2091] arm64: kernel: rename __cpu_suspend to keep it aligned with arm commit af391b15f7b56ce19f52862d36595637dd42b575 upstream. This patch renames __cpu_suspend to cpu_suspend so that it's aligned with ARM32. It also removes the redundant wrapper created. This is in preparation to implement generic PSCI system suspend using the cpu_{suspend,resume} which now has the same interface on both ARM and ARM64. Cc: Mark Rutland Reviewed-by: Lorenzo Pieralisi Reviewed-by: Ashwin Chaugule Signed-off-by: Sudeep Holla Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpuidle.h | 8 ++------ arch/arm64/include/asm/suspend.h | 2 +- arch/arm64/kernel/cpuidle.c | 4 ++-- arch/arm64/kernel/psci.c | 2 +- arch/arm64/kernel/suspend.c | 6 +++--- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index 141b2fcabaa67..0f74f05d662a7 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -5,20 +5,16 @@ #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_init(unsigned int cpu); -extern int cpu_suspend(unsigned long arg); +extern int arm_cpuidle_suspend(int index); #else static inline int arm_cpuidle_init(unsigned int cpu) { return -EOPNOTSUPP; } -static inline int cpu_suspend(unsigned long arg) +static inline int arm_cpuidle_suspend(int index) { return -EOPNOTSUPP; } #endif -static inline int arm_cpuidle_suspend(int index) -{ - return cpu_suspend(index); -} #endif diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 003802f589633..59a5b0f1e81c3 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -21,6 +21,6 @@ struct sleep_save_sp { phys_addr_t save_ptr_stash_phys; }; -extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); +extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); #endif diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index a78143a5c99ff..2bbd0fee084fe 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -37,7 +37,7 @@ int arm_cpuidle_init(unsigned int cpu) * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU * operations back-end error code otherwise. */ -int cpu_suspend(unsigned long arg) +int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); @@ -47,5 +47,5 @@ int cpu_suspend(unsigned long arg) */ if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) return -EOPNOTSUPP; - return cpu_ops[cpu]->cpu_suspend(arg); + return cpu_ops[cpu]->cpu_suspend(index); } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ea18cb53921e8..24d4733b7e3c6 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -546,7 +546,7 @@ static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index) if (state[index - 1].type == PSCI_POWER_STATE_TYPE_STANDBY) ret = psci_ops.cpu_suspend(state[index - 1], 0); else - ret = __cpu_suspend(index, psci_suspend_finisher); + ret = cpu_suspend(index, psci_suspend_finisher); return ret; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index d7daf45ae7a25..400e1265e205a 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -51,13 +51,13 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) } /* - * __cpu_suspend + * cpu_suspend * * arg: argument to pass to the finisher function * fn: finisher function pointer * */ -int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { struct mm_struct *mm = current->active_mm; int ret; @@ -82,7 +82,7 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * We are resuming from reset with TTBR0_EL1 set to the * idmap to enable the MMU; restore the active_mm mappings in * TTBR0_EL1 unless the active_mm == &init_mm, in which case - * the thread entered __cpu_suspend with TTBR0_EL1 set to + * the thread entered cpu_suspend with TTBR0_EL1 set to * reserved TTBR0 page tables and should be restored as such. */ if (mm == &init_mm) From dbf2a8c020446889a5214c3f91fd69442cd2cd86 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 27 Oct 2015 17:29:10 +0000 Subject: [PATCH 1319/2091] arm64: kernel: fix tcr_el1.t0sz restore on systems with extended idmap commit e13d918a19a7b6cba62b32884f5e336e764c2cc6 upstream. Commit dd006da21646 ("arm64: mm: increase VA range of identity map") introduced a mechanism to extend the virtual memory map range to support arm64 systems with system RAM located at very high offset, where the identity mapping used to enable/disable the MMU requires additional translation levels to map the physical memory at an equal virtual offset. The kernel detects at boot time the tcr_el1.t0sz value required by the identity mapping and sets-up the tcr_el1.t0sz register field accordingly, any time the identity map is required in the kernel (ie when enabling the MMU). After enabling the MMU, in the cold boot path the kernel resets the tcr_el1.t0sz to its default value (ie the actual configuration value for the system virtual address space) so that after enabling the MMU the memory space translated by ttbr0_el1 is restored as expected. Commit dd006da21646 ("arm64: mm: increase VA range of identity map") also added code to set-up the tcr_el1.t0sz value when the kernel resumes from low-power states with the MMU off through cpu_resume() in order to effectively use the identity mapping to enable the MMU but failed to add the code required to restore the tcr_el1.t0sz to its default value, when the core returns to the kernel with the MMU enabled, so that the kernel might end up running with tcr_el1.t0sz value set-up for the identity mapping which can be lower than the value required by the actual virtual address space, resulting in an erroneous set-up. This patchs adds code in the resume path that restores the tcr_el1.t0sz default value upon core resume, mirroring this way the cold boot path behaviour therefore fixing the issue. Cc: Catalin Marinas Fixes: dd006da21646 ("arm64: mm: increase VA range of identity map") Acked-by: Ard Biesheuvel Signed-off-by: Lorenzo Pieralisi Signed-off-by: James Morse Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/suspend.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 400e1265e205a..53f1f8dccf6c0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -80,17 +80,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) if (ret == 0) { /* * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; restore the active_mm mappings in - * TTBR0_EL1 unless the active_mm == &init_mm, in which case - * the thread entered cpu_suspend with TTBR0_EL1 set to - * reserved TTBR0 page tables and should be restored as such. + * idmap to enable the MMU; set the TTBR0 to the reserved + * page tables to prevent speculative TLB allocations, flush + * the local tlb and set the default tcr_el1.t0sz so that + * the TTBR0 address space set-up is properly restored. + * If the current active_mm != &init_mm we entered cpu_suspend + * with mappings in TTBR0 that must be restored, so we switch + * them back to complete the address space configuration + * restoration before returning. */ - if (mm == &init_mm) - cpu_set_reserved_ttbr0(); - else - cpu_switch_mm(mm->pgd, mm); - + cpu_set_reserved_ttbr0(); flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); /* * Restore per-cpu offset before any kernel From 599f528a90026dd61533aa5145e64cc0ac3625ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 15 Oct 2015 13:55:53 +0100 Subject: [PATCH 1320/2091] arm64: compat: fix stxr failure case in SWP emulation commit 589cb22bbedacf325951014c07a35a2b01ca57f6 upstream. If the STXR instruction fails in the SWP emulation code, we leave *data overwritten with the loaded value, therefore corrupting the data written by a subsequent, successful attempt. This patch re-jigs the code so that we only write back to *data once we know that the update has happened. Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm") Reported-by: Shengjiu Wang Reported-by: Vladimir Murzin Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/armv8_deprecated.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7922c2e710cad..7ac3920b1356a 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -279,22 +279,24 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) */ #define __user_swpX_asm(data, addr, res, temp, B) \ __asm__ __volatile__( \ - " mov %w2, %w1\n" \ - "0: ldxr"B" %w1, [%3]\n" \ - "1: stxr"B" %w0, %w2, [%3]\n" \ + "0: ldxr"B" %w2, [%3]\n" \ + "1: stxr"B" %w0, %w1, [%3]\n" \ " cbz %w0, 2f\n" \ " mov %w0, %w4\n" \ + " b 3f\n" \ "2:\n" \ + " mov %w1, %w2\n" \ + "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ - "3: mov %w0, %w5\n" \ - " b 2b\n" \ + "4: mov %w0, %w5\n" \ + " b 3b\n" \ " .popsection" \ " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ - " .quad 0b, 3b\n" \ - " .quad 1b, 3b\n" \ - " .popsection" \ + " .quad 0b, 4b\n" \ + " .quad 1b, 4b\n" \ + " .popsection\n" \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ : "memory") From a1638a11d9fb9457a8a5a9cc1a00e59ca56c8fe3 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 15 Oct 2015 13:38:48 -0600 Subject: [PATCH 1321/2091] NVMe: Fix memory leak on retried commands commit 0dfc70c33409afc232ef0b9ec210535dfbf9bc61 upstream. Resources are reallocated for requeued commands, so unmap and release the iod for the failed command. It's a pretty bad memory leak and causes a kernel hang if you remove a drive because of a busy dma pool. You'll get messages spewing like this: nvme 0000:xx:xx.x: dma_pool_destroy prp list 256, ffff880420dec000 busy and lock up pci and the driver since removal never completes while holding a lock. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nvme-core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 683dff272562b..04c0e8f3183c3 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -590,6 +590,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); + bool requeue = false; u16 status = le16_to_cpup(&cqe->status) >> 1; @@ -598,12 +599,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, && (jiffies - req->start_time) < req->timeout) { unsigned long flags; + requeue = true; blk_mq_requeue_request(req); spin_lock_irqsave(req->q->queue_lock, flags); if (!blk_queue_stopped(req->q)) blk_mq_kick_requeue_list(req->q); spin_unlock_irqrestore(req->q->queue_lock, flags); - return; + goto release_iod; } req->errors = nvme_error_status(status); } else @@ -613,7 +615,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, dev_warn(&nvmeq->dev->pci_dev->dev, "completing aborted command with status:%04x\n", status); - + release_iod: if (iod->nents) { dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -626,7 +628,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req); + if (likely(!requeue)) + blk_mq_complete_request(req); } /* length is in bytes. gfp flags indicates whether we may sleep. */ From 435d5d7f3a0dd62dc1465c314e338f159fb7d43a Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 14 Sep 2015 01:13:11 -0700 Subject: [PATCH 1322/2091] drm/vmwgfx: Fix up user_dmabuf refcounting commit 54c12bc374408faddbff75dbf1a6167c19af39c4 upstream. If user space calls unreference on a user_dmabuf it will typically kill the struct ttm_base_object member which is responsible for the user-space visibility. However the dmabuf part may still be alive and refcounted. In some situations, like for shared guest-backed surface referencing/opening, the driver may try to reference the struct ttm_base_object member again, causing an immediate kernel warning and a later kernel NULL pointer dereference. Fix this by always maintaining a reference on the struct ttm_base_object member, in situations where it might subsequently be referenced. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 +++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 6 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 29 +++++++++++++++++------- drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 12 +++++++--- 7 files changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 620bb5cf617c9..15a8d7746fd2c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1458,6 +1458,9 @@ static void __exit vmwgfx_exit(void) drm_pci_exit(&driver, &vmw_pci_driver); } +MODULE_INFO(vmw_patch, "ed7d78b2"); +MODULE_INFO(vmw_patch, "54c12bc3"); + module_init(vmwgfx_init); module_exit(vmwgfx_exit); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d26a6daa9719a..d8896ed41b9eb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -636,7 +636,8 @@ extern int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, uint32_t size, bool shareable, uint32_t *handle, - struct vmw_dma_buffer **p_dma_buf); + struct vmw_dma_buffer **p_dma_buf, + struct ttm_base_object **p_base); extern int vmw_user_dmabuf_reference(struct ttm_object_file *tfile, struct vmw_dma_buffer *dma_buf, uint32_t *handle); @@ -650,7 +651,8 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo, uint32_t cur_validate_node); extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo); extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, - uint32_t id, struct vmw_dma_buffer **out); + uint32_t id, struct vmw_dma_buffer **out, + struct ttm_base_object **base); extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 97ad3bcb99a75..aee1c6ccc52d8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -887,7 +887,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo, + NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use MOB buffer.\n"); ret = -EINVAL; @@ -949,7 +950,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, struct vmw_relocation *reloc; int ret; - ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); + ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo, + NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); ret = -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index 87e39f68e9d07..e1898982b44af 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -484,7 +484,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, goto out_unlock; } - ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf); + ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf, NULL); if (ret) goto out_unlock; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 210ef15b1d091..c5b4c47e86d63 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -356,7 +356,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv, } *out_surf = NULL; - ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf); + ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf, NULL); return ret; } @@ -483,7 +483,8 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, uint32_t size, bool shareable, uint32_t *handle, - struct vmw_dma_buffer **p_dma_buf) + struct vmw_dma_buffer **p_dma_buf, + struct ttm_base_object **p_base) { struct vmw_user_dma_buffer *user_bo; struct ttm_buffer_object *tmp; @@ -517,6 +518,10 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, } *p_dma_buf = &user_bo->dma; + if (p_base) { + *p_base = &user_bo->prime.base; + kref_get(&(*p_base)->refcount); + } *handle = user_bo->prime.base.hash.key; out_no_base_object: @@ -633,6 +638,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, struct vmw_dma_buffer *dma_buf; struct vmw_user_dma_buffer *user_bo; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct ttm_base_object *buffer_base; int ret; if ((arg->flags & (drm_vmw_synccpu_read | drm_vmw_synccpu_write)) == 0 @@ -645,7 +651,8 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, switch (arg->op) { case drm_vmw_synccpu_grab: - ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf); + ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf, + &buffer_base); if (unlikely(ret != 0)) return ret; @@ -653,6 +660,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data, dma); ret = vmw_user_dmabuf_synccpu_grab(user_bo, tfile, arg->flags); vmw_dmabuf_unreference(&dma_buf); + ttm_base_object_unref(&buffer_base); if (unlikely(ret != 0 && ret != -ERESTARTSYS && ret != -EBUSY)) { DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n", @@ -694,7 +702,8 @@ int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data, return ret; ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, - req->size, false, &handle, &dma_buf); + req->size, false, &handle, &dma_buf, + NULL); if (unlikely(ret != 0)) goto out_no_dmabuf; @@ -723,7 +732,8 @@ int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data, } int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, - uint32_t handle, struct vmw_dma_buffer **out) + uint32_t handle, struct vmw_dma_buffer **out, + struct ttm_base_object **p_base) { struct vmw_user_dma_buffer *vmw_user_bo; struct ttm_base_object *base; @@ -745,7 +755,10 @@ int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, prime.base); (void)ttm_bo_reference(&vmw_user_bo->dma.base); - ttm_base_object_unref(&base); + if (p_base) + *p_base = base; + else + ttm_base_object_unref(&base); *out = &vmw_user_bo->dma; return 0; @@ -1006,7 +1019,7 @@ int vmw_dumb_create(struct drm_file *file_priv, ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile, args->size, false, &args->handle, - &dma_buf); + &dma_buf, NULL); if (unlikely(ret != 0)) goto out_no_dmabuf; @@ -1034,7 +1047,7 @@ int vmw_dumb_map_offset(struct drm_file *file_priv, struct vmw_dma_buffer *out_buf; int ret; - ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf); + ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf, NULL); if (ret != 0) return -EINVAL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 6a4584a43aa6c..d2751ada19b1e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -470,7 +470,7 @@ int vmw_shader_define_ioctl(struct drm_device *dev, void *data, if (arg->buffer_handle != SVGA3D_INVALID_ID) { ret = vmw_user_dmabuf_lookup(tfile, arg->buffer_handle, - &buffer); + &buffer, NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not find buffer for shader " "creation.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 4ecdbf3e59da2..17a4107639b2a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -43,6 +43,7 @@ struct vmw_user_surface { struct vmw_surface srf; uint32_t size; struct drm_master *master; + struct ttm_base_object *backup_base; }; /** @@ -652,6 +653,8 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + if (user_srf->backup_base) + ttm_base_object_unref(&user_srf->backup_base); vmw_resource_unreference(&res); } @@ -846,7 +849,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, res->backup_size, true, &backup_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); goto out_unlock; @@ -1309,7 +1313,8 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, if (req->buffer_handle != SVGA3D_INVALID_ID) { ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); } else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer) ret = vmw_user_dmabuf_alloc(dev_priv, tfile, @@ -1317,7 +1322,8 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data, req->drm_surface_flags & drm_vmw_surface_flag_shareable, &backup_handle, - &res->backup); + &res->backup, + &user_srf->backup_base); if (unlikely(ret != 0)) { vmw_resource_unreference(&res); From dc14f050d2a7bd60b32179f842a72e376cfd467b Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 22 Oct 2015 13:32:19 -0700 Subject: [PATCH 1323/2091] thp: use is_zero_pfn() only after pte_present() check commit 47aee4d8e314384807e98b67ade07f6da476aa75 upstream. Use is_zero_pfn() on pteval only after pte_present() check on pteval (It might be better idea to introduce is_zero_pte() which checks pte_present() first). Otherwise when working on a swap or migration entry and if pte_pfn's result is equal to zero_pfn by chance, we lose user's data in __collapse_huge_page_copy(). So if you're unlucky, the application segfaults and finally you could see below message on exit: BUG: Bad rss-counter state mm:ffff88007f099300 idx:2 val:3 Fixes: ca0984caa823 ("mm: incorporate zero pages into transparent huge pages") Signed-off-by: Minchan Kim Reviewed-by: Andrea Arcangeli Acked-by: Kirill A. Shutemov Cc: Mel Gorman Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 078832cf36365..8e792ec5e84c6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2137,7 +2137,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, address += PAGE_SIZE) { pte_t pteval = *_pte; - if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { + if (pte_none(pteval) || (pte_present(pteval) && + is_zero_pfn(pte_pfn(pteval)))) { if (++none_or_zero <= khugepaged_max_ptes_none) continue; else From 7b4744eee459a9c521e538907ee822318e9b303e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 4 Aug 2015 15:03:14 +0300 Subject: [PATCH 1324/2091] pinctrl: baytrail: Serialize all register access commit 39ce8150a079e3ae6ed9abf26d7918a558ef7c19 upstream. There is a hardware issue in Intel Baytrail where concurrent GPIO register access might result reads of 0xffffffff and writes might get dropped completely. Prevent this from happening by taking the serializing lock in all places where it is possible that more than one thread might be accessing the hardware concurrently. Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij Cc: Lucas De Marchi Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-baytrail.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 2062c224e32fb..7ca46f418f616 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -201,6 +201,9 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) struct byt_gpio *vg = to_byt_gpio(chip); void __iomem *reg = byt_gpio_reg(chip, offset, BYT_CONF0_REG); u32 value, gpio_mux; + unsigned long flags; + + spin_lock_irqsave(&vg->lock, flags); /* * In most cases, func pin mux 000 means GPIO function. @@ -214,18 +217,16 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) value = readl(reg) & BYT_PIN_MUX; gpio_mux = byt_get_gpio_mux(vg, offset); if (WARN_ON(gpio_mux != value)) { - unsigned long flags; - - spin_lock_irqsave(&vg->lock, flags); value = readl(reg) & ~BYT_PIN_MUX; value |= gpio_mux; writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); dev_warn(&vg->pdev->dev, "pin %u forcibly re-configured as GPIO\n", offset); } + spin_unlock_irqrestore(&vg->lock, flags); + pm_runtime_get(&vg->pdev->dev); return 0; @@ -277,7 +278,15 @@ static int byt_irq_type(struct irq_data *d, unsigned type) static int byt_gpio_get(struct gpio_chip *chip, unsigned offset) { void __iomem *reg = byt_gpio_reg(chip, offset, BYT_VAL_REG); - return readl(reg) & BYT_LEVEL; + struct byt_gpio *vg = to_byt_gpio(chip); + unsigned long flags; + u32 val; + + spin_lock_irqsave(&vg->lock, flags); + val = readl(reg); + spin_unlock_irqrestore(&vg->lock, flags); + + return val & BYT_LEVEL; } static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) @@ -450,8 +459,10 @@ static void byt_irq_ack(struct irq_data *d) unsigned offset = irqd_to_hwirq(d); void __iomem *reg; + spin_lock(&vg->lock); reg = byt_gpio_reg(&vg->chip, offset, BYT_INT_STAT_REG); writel(BIT(offset % 32), reg); + spin_unlock(&vg->lock); } static void byt_irq_unmask(struct irq_data *d) From 709adbf08f54b646159c3524144dfb09af59b8cb Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 17 Aug 2015 16:03:17 +0300 Subject: [PATCH 1325/2091] pinctrl: baytrail: Use raw_spinlock for locking commit 78e1c896932df5b8bcdff7bf5417d8e72a4d0d6b upstream. The Intel Baytrail pinctrl driver implements irqchip callbacks which are called with desc->lock raw_spinlock held. In mainline this is fine because spinlock resolves to raw_spinlock. However, running the same code in -rt we get: BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/0 Preemption disabled at:[] cpu_startup_entry+0x17f/0x480 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.5-rt5 #13 ... Call Trace: [] dump_stack+0x4a/0x61 [] ___might_sleep+0xe7/0x170 [] rt_spin_lock+0x1f/0x50 [] byt_gpio_clear_triggering+0x38/0x60 [] byt_irq_mask+0x11/0x20 [] handle_level_irq+0x83/0x150 [] generic_handle_irq+0x27/0x40 [] byt_gpio_irq_handler+0x7f/0xc0 [] handle_irq+0xaa/0x190 ... This is because in -rt spinlocks are preemptible so taking the driver private spinlock in irqchip callbacks causes might_sleep() to trigger. In order to keep -rt happy but at the same time make sure that register accesses get serialized, convert the driver to use raw_spinlock instead. Also shorten the critical section a bit in few places. Suggested-by: Linus Walleij Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij Cc: Lucas De Marchi Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-baytrail.c | 50 ++++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 7ca46f418f616..b2602210784dc 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -146,7 +146,7 @@ struct byt_gpio_pin_context { struct byt_gpio { struct gpio_chip chip; struct platform_device *pdev; - spinlock_t lock; + raw_spinlock_t lock; void __iomem *reg_base; struct pinctrl_gpio_range *range; struct byt_gpio_pin_context *saved_context; @@ -174,11 +174,11 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned offset) unsigned long flags; u32 value; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg); value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); } static u32 byt_get_gpio_mux(struct byt_gpio *vg, unsigned offset) @@ -203,7 +203,7 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) u32 value, gpio_mux; unsigned long flags; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); /* * In most cases, func pin mux 000 means GPIO function. @@ -225,7 +225,7 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) "pin %u forcibly re-configured as GPIO\n", offset); } - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); pm_runtime_get(&vg->pdev->dev); @@ -251,7 +251,7 @@ static int byt_irq_type(struct irq_data *d, unsigned type) if (offset >= vg->chip.ngpio) return -EINVAL; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg); WARN(value & BYT_DIRECT_IRQ_EN, @@ -270,7 +270,7 @@ static int byt_irq_type(struct irq_data *d, unsigned type) else if (type & IRQ_TYPE_LEVEL_MASK) __irq_set_handler_locked(d->irq, handle_level_irq); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return 0; } @@ -282,9 +282,9 @@ static int byt_gpio_get(struct gpio_chip *chip, unsigned offset) unsigned long flags; u32 val; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); val = readl(reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return val & BYT_LEVEL; } @@ -296,7 +296,7 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) unsigned long flags; u32 old_val; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); old_val = readl(reg); @@ -305,7 +305,7 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) else writel(old_val & ~BYT_LEVEL, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); } static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned offset) @@ -315,13 +315,13 @@ static int byt_gpio_direction_input(struct gpio_chip *chip, unsigned offset) unsigned long flags; u32 value; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg) | BYT_DIR_MASK; value &= ~BYT_INPUT_EN; /* active low */ writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return 0; } @@ -335,7 +335,7 @@ static int byt_gpio_direction_output(struct gpio_chip *chip, unsigned long flags; u32 reg_val; - spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&vg->lock, flags); /* * Before making any direction modifications, do a check if gpio @@ -354,7 +354,7 @@ static int byt_gpio_direction_output(struct gpio_chip *chip, else writel(reg_val & ~BYT_LEVEL, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); return 0; } @@ -363,18 +363,19 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) { struct byt_gpio *vg = to_byt_gpio(chip); int i; - unsigned long flags; u32 conf0, val, offs; - spin_lock_irqsave(&vg->lock, flags); - for (i = 0; i < vg->chip.ngpio; i++) { const char *pull_str = NULL; const char *pull = NULL; + unsigned long flags; const char *label; offs = vg->range->pins[i] * 16; + + raw_spin_lock_irqsave(&vg->lock, flags); conf0 = readl(vg->reg_base + offs + BYT_CONF0_REG); val = readl(vg->reg_base + offs + BYT_VAL_REG); + raw_spin_unlock_irqrestore(&vg->lock, flags); label = gpiochip_is_requested(chip, i); if (!label) @@ -427,7 +428,6 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) seq_puts(s, "\n"); } - spin_unlock_irqrestore(&vg->lock, flags); } static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc) @@ -459,10 +459,10 @@ static void byt_irq_ack(struct irq_data *d) unsigned offset = irqd_to_hwirq(d); void __iomem *reg; - spin_lock(&vg->lock); + raw_spin_lock(&vg->lock); reg = byt_gpio_reg(&vg->chip, offset, BYT_INT_STAT_REG); writel(BIT(offset % 32), reg); - spin_unlock(&vg->lock); + raw_spin_unlock(&vg->lock); } static void byt_irq_unmask(struct irq_data *d) @@ -474,9 +474,9 @@ static void byt_irq_unmask(struct irq_data *d) void __iomem *reg; u32 value; - spin_lock_irqsave(&vg->lock, flags); - reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG); + + raw_spin_lock_irqsave(&vg->lock, flags); value = readl(reg); switch (irqd_get_trigger_type(d)) { @@ -497,7 +497,7 @@ static void byt_irq_unmask(struct irq_data *d) writel(value, reg); - spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&vg->lock, flags); } static void byt_irq_mask(struct irq_data *d) @@ -589,7 +589,7 @@ static int byt_gpio_probe(struct platform_device *pdev) if (IS_ERR(vg->reg_base)) return PTR_ERR(vg->reg_base); - spin_lock_init(&vg->lock); + raw_spin_lock_init(&vg->lock); gc = &vg->chip; gc->label = dev_name(&pdev->dev); From cdfdd2ea3f8a5e80f7a350ee08b2d6fef0c6db0f Mon Sep 17 00:00:00 2001 From: Soeren Grunewald Date: Thu, 11 Jun 2015 09:25:04 +0200 Subject: [PATCH 1326/2091] serial: 8250_pci: Add support for 12 port Exar boards commit be32c0cf0462c36f482b5ddcff1d8371be1e183c upstream. The Exar XR17V358 can also be combined with a XR17V354 chip to act as a single 12 port chip. This works the same way as the combining two XR17V358 chips. But the reported device id then is 0x4358. Signed-off-by: Soeren Grunewald Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 9373cca121d33..eb8adc2e68c15 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1998,6 +1998,7 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCIE_DEVICE_ID_WCH_CH382_2S1P 0x3250 #define PCIE_DEVICE_ID_WCH_CH384_4S 0x3470 +#define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358 #define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 #define PCI_VENDOR_ID_PERICOM 0x12D8 @@ -2513,6 +2514,13 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17v35x_setup, }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V4358, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, { .vendor = PCI_VENDOR_ID_EXAR, .device = PCI_DEVICE_ID_EXAR_XR17V8358, @@ -2999,6 +3007,7 @@ enum pci_board_num_t { pbn_exar_XR17V352, pbn_exar_XR17V354, pbn_exar_XR17V358, + pbn_exar_XR17V4358, pbn_exar_XR17V8358, pbn_exar_ibm_saturn, pbn_pasemi_1682M, @@ -3690,6 +3699,14 @@ static struct pciserial_board pci_boards[] = { .reg_shift = 0, .first_offset = 0, }, + [pbn_exar_XR17V4358] = { + .flags = FL_BASE0, + .num_ports = 12, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, [pbn_exar_XR17V8358] = { .flags = FL_BASE0, .num_ports = 16, @@ -5133,6 +5150,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17V358 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V4358, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V4358 }, { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V8358, PCI_ANY_ID, PCI_ANY_ID, 0, From e4338aeff6aab3640c2925844f4e5e53746cd3d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Nov 2015 11:07:07 -0800 Subject: [PATCH 1327/2091] xen: fix backport of previous kexec patch Fixes the backport of 0b34a166f291d255755be46e43ed5497cdd194f2 upstream Commit 0b34a166f291d255755be46e43ed5497cdd194f2 "x86/xen: Support kexec/kdump in HVM guests by doing a soft reset" has been added to the 4.2-stable tree" needed to correct the CONFIG variable, as CONFIG_KEXEC_CORE only showed up in 4.3. Reported-by: David Vrabel Reported-by: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0cc657160cb69..a10ed8915bf49 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,7 +33,7 @@ #include #include -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_KEXEC #include #endif @@ -1802,7 +1802,7 @@ static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_KEXEC static void xen_hvm_shutdown(void) { native_machine_shutdown(); @@ -1836,7 +1836,7 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_KEXEC machine_ops.shutdown = xen_hvm_shutdown; machine_ops.crash_shutdown = xen_hvm_crash_shutdown; #endif From 50eda1546d87542d21eb5a69caf4f046a4bb416e Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:02 +0800 Subject: [PATCH 1328/2091] dts: imx6: fix sd card gpio polarity specified in device tree commit 89c1a8cf63f8c69dfddb6e377c04df8b25ab1c88 upstream. cd-gpios polarity should be changed to GPIO_ACTIVE_LOW and wp-gpios should be changed to GPIO_ACTIVE_HIGH. Otherwise, the SD may not work properly due to wrong polarity inversion specified in DT after switch to common parsing function mmc_of_parse(). Signed-off-by: Dong Aisheng Acked-by: Shawn Guo Signed-off-by: Ulf Hansson Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6dl-riotboard.dts | 8 ++++---- arch/arm/boot/dts/imx6q-arm2.dts | 5 +++-- arch/arm/boot/dts/imx6q-gk802.dts | 3 ++- arch/arm/boot/dts/imx6q-tbs2910.dts | 4 ++-- arch/arm/boot/dts/imx6qdl-aristainetos.dtsi | 4 ++-- arch/arm/boot/dts/imx6qdl-cubox-i.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi | 4 +++- arch/arm/boot/dts/imx6qdl-gw52xx.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-gw53xx.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-gw54xx.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-hummingboard.dtsi | 3 ++- arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi | 4 ++-- arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi | 8 ++++---- arch/arm/boot/dts/imx6qdl-rex.dtsi | 4 ++-- arch/arm/boot/dts/imx6qdl-sabreauto.dtsi | 4 ++-- arch/arm/boot/dts/imx6qdl-sabrelite.dtsi | 6 +++--- arch/arm/boot/dts/imx6qdl-sabresd.dtsi | 8 ++++---- arch/arm/boot/dts/imx6qdl-tx6.dtsi | 4 ++-- arch/arm/boot/dts/imx6qdl-wandboard.dtsi | 6 ++++-- arch/arm/boot/dts/imx6sl-evk.dts | 10 +++++----- arch/arm/boot/dts/imx6sx-sabreauto.dts | 4 ++-- arch/arm/boot/dts/imx6sx-sdb.dtsi | 4 ++-- 22 files changed, 54 insertions(+), 47 deletions(-) diff --git a/arch/arm/boot/dts/imx6dl-riotboard.dts b/arch/arm/boot/dts/imx6dl-riotboard.dts index 43cb3fd76be76..5111f5170d534 100644 --- a/arch/arm/boot/dts/imx6dl-riotboard.dts +++ b/arch/arm/boot/dts/imx6dl-riotboard.dts @@ -305,8 +305,8 @@ &usdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; - cd-gpios = <&gpio1 4 0>; - wp-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -314,8 +314,8 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 0>; - wp-gpios = <&gpio7 1 0>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-arm2.dts b/arch/arm/boot/dts/imx6q-arm2.dts index 78df05e9d1ce6..d6515f7a56c42 100644 --- a/arch/arm/boot/dts/imx6q-arm2.dts +++ b/arch/arm/boot/dts/imx6q-arm2.dts @@ -11,6 +11,7 @@ */ /dts-v1/; +#include #include "imx6q.dtsi" / { @@ -196,8 +197,8 @@ }; &usdhc3 { - cd-gpios = <&gpio6 11 0>; - wp-gpios = <&gpio6 14 0>; + cd-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3 diff --git a/arch/arm/boot/dts/imx6q-gk802.dts b/arch/arm/boot/dts/imx6q-gk802.dts index 703539cf36d30..00bd63e63d0cd 100644 --- a/arch/arm/boot/dts/imx6q-gk802.dts +++ b/arch/arm/boot/dts/imx6q-gk802.dts @@ -7,6 +7,7 @@ */ /dts-v1/; +#include #include "imx6q.dtsi" / { @@ -161,7 +162,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; - cd-gpios = <&gpio6 11 0>; + cd-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts index a43abfa21e33b..5645d52850a7e 100644 --- a/arch/arm/boot/dts/imx6q-tbs2910.dts +++ b/arch/arm/boot/dts/imx6q-tbs2910.dts @@ -251,7 +251,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; - cd-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -260,7 +260,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; - cd-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi index e6d9195a1da7b..f4d6ae564ead2 100644 --- a/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi +++ b/arch/arm/boot/dts/imx6qdl-aristainetos.dtsi @@ -173,7 +173,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio4 7 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio4 7 GPIO_ACTIVE_LOW>; status = "okay"; }; @@ -181,7 +181,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio4 8 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi index d033bb1820602..6a846e0ef5054 100644 --- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi @@ -259,6 +259,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_cubox_i_usdhc2_aux &pinctrl_cubox_i_usdhc2>; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio1 4 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi index 2c253d6d20bd1..45e7c39e80d58 100644 --- a/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi +++ b/arch/arm/boot/dts/imx6qdl-dfi-fs700-m60.dtsi @@ -1,3 +1,5 @@ +#include + / { regulators { compatible = "simple-bus"; @@ -181,7 +183,7 @@ &usdhc2 { /* module slot */ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; - cd-gpios = <&gpio2 2 0>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index b5756c21ea1d5..4493f6e993301 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -318,7 +318,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi index 86f03c1b147c6..a857d1294609a 100644 --- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi @@ -324,7 +324,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi index 4a8d97f477592..1afe3385e2d28 100644 --- a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi @@ -417,7 +417,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi index 151a3db2aea95..c6833d2b4ff5c 100644 --- a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi @@ -41,6 +41,7 @@ */ #include "imx6qdl-microsom.dtsi" #include "imx6qdl-microsom-ar8035.dtsi" +#include / { chosen { @@ -288,6 +289,6 @@ &pinctrl_hummingboard_usdhc2 >; vmmc-supply = <®_3p3v>; - cd-gpios = <&gpio1 4 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi index 08218120e770a..64e0b6178bf43 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi @@ -449,7 +449,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 0>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -457,7 +457,7 @@ &usdhc4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; - cd-gpios = <&gpio2 6 0>; + cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index 1ce6133b67f5c..9e6ecd99b472d 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -409,8 +409,8 @@ &usdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; - cd-gpios = <&gpio1 4 0>; - wp-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; status = "disabled"; }; @@ -418,7 +418,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3 &pinctrl_usdhc3_cdwp>; - cd-gpios = <&gpio1 27 0>; - wp-gpios = <&gpio1 29 0>; + cd-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi index 394a4ace351af..a503562438888 100644 --- a/arch/arm/boot/dts/imx6qdl-rex.dtsi +++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi @@ -340,7 +340,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -349,6 +349,6 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 3b24b12651b2b..e329ca5c33227 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -467,8 +467,8 @@ pinctrl-0 = <&pinctrl_usdhc3>; pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; - cd-gpios = <&gpio6 15 0>; - wp-gpios = <&gpio1 13 0>; + cd-gpios = <&gpio6 15 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi index 0b28a9d5241e5..1e27485e42931 100644 --- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi @@ -444,8 +444,8 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio7 0 0>; - wp-gpios = <&gpio7 1 0>; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; @@ -453,7 +453,7 @@ &usdhc4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; - cd-gpios = <&gpio2 6 0>; + cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index a626e6dd8022c..944eb81cb2b8c 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -562,8 +562,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <8>; - cd-gpios = <&gpio2 2 0>; - wp-gpios = <&gpio2 3 0>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -571,8 +571,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <8>; - cd-gpios = <&gpio2 0 0>; - wp-gpios = <&gpio2 1 0>; + cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-tx6.dtsi b/arch/arm/boot/dts/imx6qdl-tx6.dtsi index f02b80b41d4fb..da08de324e9eb 100644 --- a/arch/arm/boot/dts/imx6qdl-tx6.dtsi +++ b/arch/arm/boot/dts/imx6qdl-tx6.dtsi @@ -680,7 +680,7 @@ pinctrl-0 = <&pinctrl_usdhc1>; bus-width = <4>; no-1-8-v; - cd-gpios = <&gpio7 2 0>; + cd-gpios = <&gpio7 2 GPIO_ACTIVE_LOW>; fsl,wp-controller; status = "okay"; }; @@ -690,7 +690,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; no-1-8-v; - cd-gpios = <&gpio7 3 0>; + cd-gpios = <&gpio7 3 GPIO_ACTIVE_LOW>; fsl,wp-controller; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index 5fb091675582e..9e096d811beda 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -9,6 +9,8 @@ * */ +#include + / { regulators { compatible = "simple-bus"; @@ -250,13 +252,13 @@ &usdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; - cd-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 2 GPIO_ACTIVE_LOW>; status = "okay"; }; &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - cd-gpios = <&gpio3 9 0>; + cd-gpios = <&gpio3 9 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts index 945887d3fdb35..b84dff2e94ea1 100644 --- a/arch/arm/boot/dts/imx6sl-evk.dts +++ b/arch/arm/boot/dts/imx6sl-evk.dts @@ -617,8 +617,8 @@ pinctrl-1 = <&pinctrl_usdhc1_100mhz>; pinctrl-2 = <&pinctrl_usdhc1_200mhz>; bus-width = <8>; - cd-gpios = <&gpio4 7 0>; - wp-gpios = <&gpio4 6 0>; + cd-gpios = <&gpio4 7 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 6 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -627,8 +627,8 @@ pinctrl-0 = <&pinctrl_usdhc2>; pinctrl-1 = <&pinctrl_usdhc2_100mhz>; pinctrl-2 = <&pinctrl_usdhc2_200mhz>; - cd-gpios = <&gpio5 0 0>; - wp-gpios = <&gpio4 29 0>; + cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 29 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -637,6 +637,6 @@ pinctrl-0 = <&pinctrl_usdhc3>; pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; - cd-gpios = <&gpio3 22 0>; + cd-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index e3c0b63c22056..115f3fd789718 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -49,7 +49,7 @@ pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; bus-width = <8>; - cd-gpios = <&gpio7 10 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; keep-power-in-suspend; enable-sdio-wakeup; @@ -61,7 +61,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; bus-width = <8>; - cd-gpios = <&gpio7 11 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>; no-1-8-v; keep-power-in-suspend; enable-sdio-wakup; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index cef04cef3a807..ac88c3467078e 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -293,7 +293,7 @@ pinctrl-1 = <&pinctrl_usdhc3_100mhz>; pinctrl-2 = <&pinctrl_usdhc3_200mhz>; bus-width = <8>; - cd-gpios = <&gpio2 10 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 10 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>; keep-power-in-suspend; enable-sdio-wakeup; @@ -304,7 +304,7 @@ &usdhc4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc4>; - cd-gpios = <&gpio6 21 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio6 21 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio6 20 GPIO_ACTIVE_HIGH>; status = "okay"; }; From 1f2ce4a2e7aea3a2123b17aff62a80553df31e21 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Nov 2015 14:34:10 -0800 Subject: [PATCH 1329/2091] Linux 4.1.13 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2320f1911404d..d5d229db61d56 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 12 +SUBLEVEL = 13 EXTRAVERSION = NAME = Series 4800 From 1c98797fc8ff4cf4122a98cd7365c25c598c090e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Oct 2015 13:16:49 +0300 Subject: [PATCH 1330/2091] irda: precedence bug in irlmp_seq_hb_idx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 50010c20597d14667eff0fdb628309986f195230 ] This is decrementing the pointer, instead of the value stored in the pointer. KASan detects it as an out of bounds reference. Reported-by: "Berry Cheng 程君(成淼)" Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/irda/irlmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index a26c401ef4a44..43964594aa12d 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off) for (element = hashbin_get_first(iter->hashbin); element != NULL; element = hashbin_get_next(iter->hashbin)) { - if (!off || *off-- == 0) { + if (!off || (*off)-- == 0) { /* NB: hashbin left locked */ return element; } From d45ed6c1ff20d3640a31f03816ca2d48fb7d6f22 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 19 Oct 2015 11:33:00 -0400 Subject: [PATCH 1331/2091] tipc: allow non-linear first fragment buffer [ Upstream commit 45c8b7b175ceb2d542e0fe15247377bf3bce29ec ] The current code for message reassembly is erroneously assuming that the the first arriving fragment buffer always is linear, and then goes ahead resetting the fragment list of that buffer in anticipation of more arriving fragments. However, if the buffer already happens to be non-linear, we will inadvertently drop the already attached fragment list, and later on trig a BUG() in __pskb_pull_tail(). We see this happen when running fragmented TIPC multicast across UDP, something made possible since commit d0f91938bede ("tipc: add ip/udp media type") We fix this by not resetting the fragment list when the buffer is non- linear, and by initiatlizing our private fragment list tail pointer to the tail of the existing fragment list. Fixes: commit d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/msg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c3e96e8154188..e9333147d6f18 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) { struct sk_buff *head = *headbuf; struct sk_buff *frag = *buf; - struct sk_buff *tail; + struct sk_buff *tail = NULL; struct tipc_msg *msg; u32 fragid; int delta; @@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (unlikely(skb_unclone(frag, GFP_ATOMIC))) goto err; head = *headbuf = frag; - skb_frag_list_init(head); - TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; + TIPC_SKB_CB(head)->tail = NULL; + if (skb_is_nonlinear(head)) { + skb_walk_frags(head, tail) { + TIPC_SKB_CB(head)->tail = tail; + } + } else { + skb_frag_list_init(head); + } return 0; } From f06dd3b4658175f5c9e9331d35e31abe33ac00c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 22 Oct 2015 14:15:58 +0200 Subject: [PATCH 1332/2091] qmi_wwan: add Sierra Wireless MC74xx/EM74xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0db65fcfcded76fe4f74e3ca9f4e2baf67b683ef ] New device IDs shamelessly lifted from the vendor driver. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f603f362504bc..4e0470d396a38 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -764,6 +764,10 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ + {QMI_FIXED_INTF(0x1199, 0x9070, 8)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9070, 10)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ From 728109e9d679faf2d50378109fc9b8fd1a8c3ae7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Oct 2015 00:57:05 -0400 Subject: [PATCH 1333/2091] macvtap: unbreak receiving of gro skb with frag list [ Upstream commit f23d538bc24a83c16127c2eb82c9cf1adc2b5149 ] We don't have fraglist support in TAP_FEATURES. This will lead software segmentation of gro skb with frag list. Fixes by having frag list support in TAP_FEATURES. With this patch single session of netperf receiving were restored from about 5Gb/s to about 12Gb/s on mlx4. Fixes a567dd6252 ("macvtap: simplify usage of tap_features") Cc: Vlad Yasevich Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 58858c5589db7..4dba5fbc735e3 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -82,7 +82,7 @@ static const struct proto_ops macvtap_socket_ops; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) -#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG) +#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev) { From b4158226f16fb48eb25ad8882888e45b4465bd83 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 22 Oct 2015 16:57:10 +0200 Subject: [PATCH 1334/2091] ppp: fix pppoe_dev deletion condition in pppoe_release() [ Upstream commit 1acea4f6ce1b1c0941438aca75dd2e5c6b09db60 ] We can't rely on PPPOX_ZOMBIE to decide whether to clear po->pppoe_dev. PPPOX_ZOMBIE can be set by pppoe_disc_rcv() even when po->pppoe_dev is NULL. So we have no guarantee that (sk->sk_state & PPPOX_ZOMBIE) implies (po->pppoe_dev != NULL). Since we're releasing a PPPoE socket, we want to release the pppoe_dev if it exists and reset sk_state to PPPOX_DEAD, no matter the previous value of sk_state. So we can just check for po->pppoe_dev and avoid any assumption on sk->sk_state. Fixes: 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index db2c3cdf2c40d..ab33262ed8260 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -589,7 +589,7 @@ static int pppoe_release(struct socket *sock) po = pppox_sk(sk); - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { + if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } From 199bcc1dc5bd32e70e104a42c56857c1062714f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Oct 2015 05:47:44 -0700 Subject: [PATCH 1335/2091] ipv6: gre: support SIT encapsulation [ Upstream commit 7e3b6e7423d5f994257c1de88e06b509673fdbcf ] gre_gso_segment() chokes if SIT frames were aggregated by GRO engine. Fixes: 61c1db7fae21e ("ipv6: sit: add GSO/TSO support") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/gre_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 5aa46d4b44efb..5a8ee32825508 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP))) + SKB_GSO_IPIP | + SKB_GSO_SIT))) goto out; if (!skb->encapsulation) From a8cf2fa6a557924d9d3d3ed67bd4afeeac37b91b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 27 Oct 2015 15:06:45 -0700 Subject: [PATCH 1336/2091] fib_trie: leaf_walk_rcu should not compute key if key is less than pn->key [ Upstream commit c2229fe1430d4e1c70e36520229dd64a87802b20 ] We were computing the child index in cases where the key value we were looking for was actually less than the base key of the tnode. As a result we were getting incorrect index values that would cause us to skip over some children. To fix this I have added a test that will force us to use child index 0 if the key we are looking for is less than the key of the current tnode. Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf") Reported-by: Brian Rak Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0ca933db1b41b..93b8029848192 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1547,7 +1547,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) do { /* record parent and next child index */ pn = n; - cindex = key ? get_index(key, pn) : 0; + cindex = (key > pn->key) ? get_index(key, pn) : 0; if (cindex >> pn->bits) break; From 7d5b34f8f87376c530b006195e398a58364451af Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 26 Oct 2015 12:46:37 -0400 Subject: [PATCH 1337/2091] RDS-TCP: Recover correctly from pskb_pull()/pksb_trim() failure in rds_tcp_data_recv [ Upstream commit 8ce675ff39b9958d1c10f86cf58e357efaafc856 ] Either of pskb_pull() or pskb_trim() may fail under low memory conditions. If rds_tcp_data_recv() ignores such failures, the application will receive corrupted data because the skb has not been correctly carved to the RDS datagram size. Avoid this by handling pskb_pull/pskb_trim failure in the same manner as the skb_clone failure: bail out of rds_tcp_data_recv(), and retry via the deferred call to rds_send_worker() that gets set up on ENOMEM from rds_tcp_read_sock() Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/tcp_recv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index fbc5ef88bc0e6..27a992154804c 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } to_copy = min(tc->t_tinc_data_rem, left); - pskb_pull(clone, offset); - pskb_trim(clone, to_copy); + if (!pskb_pull(clone, offset) || + pskb_trim(clone, to_copy)) { + pr_warn("rds_tcp_data_recv: pull/trim failed " + "left %zu data_rem %zu skb_len %d\n", + left, tc->t_tinc_data_rem, skb->len); + kfree_skb(clone); + desc->error = -ENOMEM; + goto out; + } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " From bf346548c66e5ba077a709ccfa11d512e14b8498 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Tue, 27 Oct 2015 17:36:20 +0200 Subject: [PATCH 1338/2091] net/mlx4: Copy/set only sizeof struct mlx4_eqe bytes [ Upstream commit c02b05011fadf8e409e41910217ca689f2fc9d91 ] When doing memcpy/memset of EQEs, we should use sizeof struct mlx4_eqe as the base size and not caps.eqe_size which could be bigger. If caps.eqe_size is bigger than the struct mlx4_eqe then we corrupt data in the master context. When using a 64 byte stride, the memcpy copied over 63 bytes to the slave_eq structure. This resulted in copying over the entire eqe of interest, including its ownership bit -- and also 31 bytes of garbage into the next WQE in the slave EQ -- which did NOT include the ownership bit (and therefore had no impact). However, once the stride is increased to 128, we are overwriting the ownership bits of *three* eqes in the slave_eq struct. This results in an incorrect ownership bit for those eqes, which causes the eq to seem to be full. The issue therefore surfaced only once 128-byte EQEs started being used in SRIOV and (overarchitectures that have 128/256 byte cache-lines such as PPC) - e.g after commit 77507aa249ae "net/mlx4_core: Enable CQE/EQE stride support". Fixes: 08ff32352d6f ('mlx4: 64-byte CQE/EQE support') Signed-off-by: Carol L Soto Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 529ef0594b902..3756e45d8cec6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2382,7 +2382,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } } - memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); + memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 983b1d51244df..337811d208bd0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -185,7 +185,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) return; } - memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); + memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ dma_wmb(); From ef3ab7c8ef69a799b6c6156a2ae564b0114a76b9 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 28 Oct 2015 13:09:53 -0400 Subject: [PATCH 1339/2091] tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers [ Upstream commit 5cbb28a4bf65c7e4daa6c25b651fed8eb888c620 ] Testing of the new UDP bearer has revealed that reception of NAME_DISTRIBUTOR, LINK_PROTOCOL/RESET and LINK_PROTOCOL/ACTIVATE message buffers is not prepared for the case that those may be non-linear. We now linearize all such buffers before they are delivered up to the generic reception layer. In order for the commit to apply cleanly to 'net' and 'stable', we do the change in the function tipc_udp_recv() for now. Later, we will post a commit to 'net-next' moving the linearization to generic code, in tipc_named_rcv() and tipc_link_proto_rcv(). Fixes: commit d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/udp_media.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 66deebc66aa10..f8dfee5072c0e 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,6 +48,7 @@ #include #include "core.h" #include "bearer.h" +#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -216,6 +217,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; + int usr = msg_user(buf_msg(skb)); + + if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) + skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { From dd963c7aeb1f8059ea931afab2aa4ba5f09eb496 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Fri, 30 Oct 2015 16:43:55 +0800 Subject: [PATCH 1340/2091] stmmac: Correctly report PTP capabilities. [ Upstream commit e6dbe1eb2db0d7a14991c06278dd3030c45fb825 ] priv->hwts_*_en indicate if timestamping is enabled/disabled at run time. But priv->dma_cap.time_stamp and priv->dma_cap.atime_stamp indicates HW is support for PTPv1/PTPv2. Signed-off-by: Phil Reid Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 771cda2a48b2a..2e51b816a7e81 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,10 +721,13 @@ static int stmmac_get_ts_info(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if ((priv->hwts_tx_en) && (priv->hwts_rx_en)) { + if ((priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) { - info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; if (priv->ptp_clock) From dcbca575d68df4cc48dca7f6cd0b4887b5495a5c Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Fri, 30 Oct 2015 16:54:31 -0700 Subject: [PATCH 1341/2091] ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context. [ Upstream commit 44f49dd8b5a606870a1f21101522a0f9c4414784 ] Fixes the following kernel BUG : BUG: using __this_cpu_add() in preemptible [00000000] code: bash/2758 caller is __this_cpu_preempt_check+0x13/0x15 CPU: 0 PID: 2758 Comm: bash Tainted: P O 3.18.19 #2 ffffffff8170eaca ffff880110d1b788 ffffffff81482b2a 0000000000000000 0000000000000000 ffff880110d1b7b8 ffffffff812010ae ffff880007cab800 ffff88001a060800 ffff88013a899108 ffff880108b84240 ffff880110d1b7c8 Call Trace: [] dump_stack+0x52/0x80 [] check_preemption_disabled+0xce/0xe1 [] __this_cpu_preempt_check+0x13/0x15 [] ipmr_queue_xmit+0x647/0x70c [] ip_mr_forward+0x32f/0x34e [] ip_mroute_setsockopt+0xe03/0x108c [] ? get_parent_ip+0x11/0x42 [] ? pollwake+0x4d/0x51 [] ? default_wake_function+0x0/0xf [] ? get_parent_ip+0x11/0x42 [] ? __wake_up_common+0x45/0x77 [] ? _raw_spin_unlock_irqrestore+0x1d/0x32 [] ? __wake_up_sync_key+0x4a/0x53 [] ? sock_def_readable+0x71/0x75 [] do_ip_setsockopt+0x9d/0xb55 [] ? unix_seqpacket_sendmsg+0x3f/0x41 [] ? sock_sendmsg+0x6d/0x86 [] ? sockfd_lookup_light+0x12/0x5d [] ? SyS_sendto+0xf3/0x11b [] ? new_sync_read+0x82/0xaa [] compat_ip_setsockopt+0x3b/0x99 [] compat_raw_setsockopt+0x11/0x32 [] compat_sock_common_setsockopt+0x18/0x1f [] compat_SyS_setsockopt+0x1a9/0x1cf [] compat_SyS_socketcall+0x180/0x1e3 [] cstar_dispatch+0x7/0x1e Signed-off-by: Ani Sinha Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3a2c0162c3bad..df28693f32e13 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1683,8 +1683,8 @@ static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); + IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1746,7 +1746,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * to blackhole. */ - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } From 44fec2292efaa6765c49e27deb102c9def8f745d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Nov 2015 17:08:19 -0800 Subject: [PATCH 1342/2091] sit: fix sit0 percpu double allocations [ Upstream commit 4ece9009774596ee3df0acba65a324b7ea79387c ] sit0 device allocates its percpu storage twice : - One time in ipip6_tunnel_init() - One time in ipip6_fb_tunnel_init() Thus we leak 48 bytes per possible cpu per network namespace dismantle. ipip6_fb_tunnel_init() can be much simpler and does not return an error, and should be called after register_netdev() Note that ipip6_tunnel_clone_6rd() also needs to be called after register_netdev() (calling ipip6_tunnel_init()) Fixes: ebe084aafb7e ("sit: Use ipip6_tunnel_init as the ndo_init function.") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ac35a28599be5..85c4b2fff504d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) +static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - tunnel->dev = dev; - tunnel->net = dev_net(dev); - iph->version = 4; iph->protocol = IPPROTO_IPV6; iph->ihl = 5; iph->ttl = 64; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { - free_percpu(dev->tstats); - return -ENOMEM; - } - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); - return 0; } static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net) */ sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; - err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - if (err) - goto err_dev_free; - - ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); err = register_netdev(sitn->fb_tunnel_dev); if (err) goto err_reg_dev; + ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + t = netdev_priv(sitn->fb_tunnel_dev); strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: - dev_put(sitn->fb_tunnel_dev); -err_dev_free: ipip6_dev_free(sitn->fb_tunnel_dev); err_alloc_dev: return err; From 20975f42a7933b489c0ef463e913c9742cfe2235 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Mon, 2 Nov 2015 12:51:31 +0000 Subject: [PATCH 1343/2091] sfc: push partner queue for skb->xmit_more [ Upstream commit b2663a4f30e85ec606b806f5135413e6d5c78d1e ] When the IP stack passes SKBs the sfc driver puts them in 2 different TX queues (called partners), one for checksummed and one for not checksummed. If the SKB has xmit_more set the driver will delay pushing the work to the NIC. When later it does decide to push the buffers this patch ensures it also pushes the partner queue, if that also has any delayed work. Before this fix the work in the partner queue would be left for a long time and cause a netdev watchdog. Fixes: 70b33fb ("sfc: add support for skb->xmit_more") Reported-by: Jianlin Shi Signed-off-by: Martin Habets Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/ef10.c | 4 +++- drivers/net/ethernet/sfc/farch.c | 4 +++- drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/tx.c | 30 +++++++++++++++++++++++++-- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fbb6cfa0f5f1d..feca46efa12f8 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1344,7 +1344,9 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) unsigned int write_ptr; efx_qword_t *txd; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index bb89e96a125ea..6d4e0047a31d5 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -319,7 +319,9 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue) unsigned write_ptr; unsigned old_write_count = tx_queue->write_count; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 325dd94bca465..0bdef4a074dd3 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -218,6 +218,7 @@ struct efx_tx_buffer { * @tso_packets: Number of packets via the TSO xmit path * @pushes: Number of times the TX push feature has been used * @pio_packets: Number of times the TX PIO feature has been used + * @xmit_more_available: Are any packets waiting to be pushed to the NIC * @empty_read_count: If the completion path has seen the queue as empty * and the transmission path has not yet checked this, the value of * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. @@ -250,6 +251,7 @@ struct efx_tx_queue { unsigned int tso_packets; unsigned int pushes; unsigned int pio_packets; + bool xmit_more_available; /* Statistics to supplement MAC stats */ unsigned long tx_packets; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index aaf2987512b5d..e70edc3dea7e0 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -431,8 +431,20 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those + * SKBs had skb->xmit_more set. If we do not push those they + * could be left for a long time and cause a netdev watchdog. + */ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tx_packets++; @@ -721,6 +733,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->read_count = 0; tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_more_available = false; /* Set up TX descriptor ring */ efx_nic_init_tx(tx_queue); @@ -746,6 +759,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) ++tx_queue->read_count; } + tx_queue->xmit_more_available = false; netdev_tx_reset_queue(tx_queue->core_txq); } @@ -1301,8 +1315,20 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those + * SKBs had skb->xmit_more set. If we do not push those they + * could be left for a long time and cause a netdev watchdog. + */ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tso_bursts++; return NETDEV_TX_OK; From 4db9971168297eca9f8ba93e1d25e8fbc26944ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Nov 2015 07:50:07 -0800 Subject: [PATCH 1344/2091] net: avoid NULL deref in inet_ctl_sock_destroy() [ Upstream commit 8fa677d2706d325d71dab91bf6e6512c05214e37 ] Under low memory conditions, tcp_sk_init() and icmp_sk_init() can both iterate on all possible cpus and call inet_ctl_sock_destroy(), with eventual NULL pointer. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_common.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 4a92423eefa50..82669da2540c9 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + if (sk) + sk_release_kernel(sk); } #endif From 174888f27748daf5901ca7d181a8cea01dd2cdcc Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 4 Nov 2015 14:47:53 +0100 Subject: [PATCH 1345/2091] ipv6: clean up dev_snmp6 proc entry when we fail to initialize inet6_dev [ Upstream commit 2a189f9e57650e9f310ddf4aad75d66c1233a064 ] In ipv6_add_dev, when addrconf_sysctl_register fails, we do not clean up the dev_snmp6 entry that we have already registered for this device. Call snmp6_unregister_dev in this case. Fixes: a317a2f19da7d ("ipv6: fail early when creating netdev named all or default") Reported-by: Dmitry Vyukov Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 37b70e82bff8e..fd3aa6148dd18 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -411,6 +411,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (err) { ipv6_mc_destroy_dev(ndev); del_timer(&ndev->regen_timer); + snmp6_unregister_dev(ndev); goto err_release; } /* protected by rtnl_lock */ From b4e98bdeb671acafd85072f745eaf85cafe452a1 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 3 Nov 2015 14:32:57 -0800 Subject: [PATCH 1346/2091] ipv4: disable BH when changing ip local port range [ Upstream commit 4ee3bd4a8c7463cdef0b82ebc33fc94a9170a7e0 ] This fixes the following lockdep warning: [ INFO: inconsistent lock state ] 4.3.0-rc7+ #1197 Not tainted --------------------------------- inconsistent {IN-SOFTIRQ-R} -> {SOFTIRQ-ON-W} usage. sysctl/1019 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&net->ipv4.ip_local_ports.lock)->seqcount){+.+-..}, at: [] ipv4_local_port_range+0xb4/0x12a {IN-SOFTIRQ-R} state was registered at: [] __lock_acquire+0x2f6/0xdf0 [] lock_acquire+0x11c/0x1a4 [] inet_get_local_port_range+0x4e/0xae [] udp_flow_src_port.constprop.40+0x23/0x116 [] vxlan_xmit_one+0x219/0xa6a [] vxlan_xmit+0xa6b/0xaa5 [] dev_hard_start_xmit+0x2ae/0x465 [] __dev_queue_xmit+0x531/0x633 [] dev_queue_xmit_sk+0x13/0x15 [] neigh_resolve_output+0x12f/0x14d [] ip6_finish_output2+0x344/0x39f [] ip6_finish_output+0x88/0x8e [] ip6_output+0x91/0xe5 [] dst_output_sk+0x47/0x4c [] NF_HOOK_THRESH.constprop.30+0x38/0x82 [] mld_sendpack+0x189/0x266 [] mld_ifc_timer_expire+0x1ef/0x223 [] call_timer_fn+0xfb/0x28c [] run_timer_softirq+0x1c7/0x1f1 Fixes: b8f1a55639e6 ("udp: Add function to make source port for UDP tunnels") Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c3852a7ff3c76..f0e8297359680 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); net->ipv4.ip_local_ports.range[0] = range[0]; net->ipv4.ip_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ From 97c28b72a5ceb80e193910cbed068b44c951bd94 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Thu, 5 Nov 2015 08:16:14 -0800 Subject: [PATCH 1347/2091] packet: race condition in packet_bind [ Upstream commit 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 ] There is a race conditions between packet_notifier and packet_bind{_spkt}. It happens if packet_notifier(NETDEV_UNREGISTER) executes between the time packet_bind{_spkt} takes a reference on the new netdevice and the time packet_do_bind sets po->ifindex. In this case the notification can be missed. If this happens during a dev_change_net_namespace this can result in the netdevice to be moved to the new namespace while the packet_sock in the old namespace still holds a reference on it. When the netdevice is later deleted in the new namespace the deletion hangs since the packet_sock is not found in the new namespace' &net->packet.sklist. It can be reproduced with the script below. This patch makes packet_do_bind check again for the presence of the netdevice in the packet_sock's namespace after the synchronize_net in unregister_prot_hook. More in general it also uses the rcu lock for the duration of the bind to stop dev_change_net_namespace/rollback_registered_many from going past the synchronize_net following unlist_netdevice, so that no NETDEV_UNREGISTER notifications can happen on the new netdevice while the bind is executing. In order to do this some code from packet_bind{_spkt} is consolidated into packet_do_dev. import socket, os, time, sys proto=7 realDev='em1' vlanId=400 if len(sys.argv) > 1: vlanId=int(sys.argv[1]) dev='vlan%d' % vlanId os.system('taskset -p 0x10 %d' % os.getpid()) s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto) os.system('ip link add link %s name %s type vlan id %d' % (realDev, dev, vlanId)) os.system('ip netns add dummy') pid=os.fork() if pid == 0: # dev should be moved while packet_do_bind is in synchronize net os.system('taskset -p 0x20000 %d' % os.getpid()) os.system('ip link set %s netns dummy' % dev) os.system('ip netns exec dummy ip link del %s' % dev) s.close() sys.exit(0) time.sleep(.004) try: s.bind(('%s' % dev, proto+1)) except: print 'Could not bind socket' s.close() os.system('ip netns del dummy') sys.exit(0) os.waitpid(pid, 0) s.close() os.system('ip netns del dummy') sys.exit(0) Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 80 ++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e1ea5d43b01ef..686e601874010 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2686,22 +2686,40 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) +static int packet_do_bind(struct sock *sk, const char *name, int ifindex, + __be16 proto) { struct packet_sock *po = pkt_sk(sk); struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; + struct net_device *dev = NULL; + int ret = 0; + bool unlisted = false; - if (po->fanout) { - if (dev) - dev_put(dev); - + if (po->fanout) return -EINVAL; - } lock_sock(sk); spin_lock(&po->bind_lock); + rcu_read_lock(); + + if (name) { + dev = dev_get_by_name_rcu(sock_net(sk), name); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } else if (ifindex) { + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } + + if (dev) + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -2709,14 +2727,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) need_rehook = proto_curr != proto || dev_curr != dev; if (need_rehook) { - unregister_prot_hook(sk, true); + if (po->running) { + rcu_read_unlock(); + __unregister_prot_hook(sk, true); + rcu_read_lock(); + dev_curr = po->prot_hook.dev; + if (dev) + unlisted = !dev_get_by_index_rcu(sock_net(sk), + dev->ifindex); + } po->num = proto; po->prot_hook.type = proto; - po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; - packet_cached_dev_assign(po, dev); + if (unlikely(unlisted)) { + dev_put(dev); + po->prot_hook.dev = NULL; + po->ifindex = -1; + packet_cached_dev_reset(po); + } else { + po->prot_hook.dev = dev; + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } } if (dev_curr) dev_put(dev_curr); @@ -2724,7 +2757,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) if (proto == 0 || !need_rehook) goto out_unlock; - if (!dev || (dev->flags & IFF_UP)) { + if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; @@ -2733,9 +2766,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) } out_unlock: + rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); - return 0; + return ret; } /* @@ -2747,8 +2781,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; char name[15]; - struct net_device *dev; - int err = -ENODEV; /* * Check legality @@ -2758,19 +2790,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; strlcpy(name, uaddr->sa_data, sizeof(name)); - dev = dev_get_by_name(sock_net(sk), name); - if (dev) - err = packet_do_bind(sk, dev, pkt_sk(sk)->num); - return err; + return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; - struct net_device *dev = NULL; - int err; - /* * Check legality @@ -2781,16 +2807,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_family != AF_PACKET) return -EINVAL; - if (sll->sll_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); - if (dev == NULL) - goto out; - } - err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); - -out: - return err; + return packet_do_bind(sk, NULL, sll->sll_ifindex, + sll->sll_protocol ? : pkt_sk(sk)->num); } static struct proto packet_proto = { From e6fac8c7fff3d816729ad16327837ba3216d48c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Nov 2015 17:51:23 -0800 Subject: [PATCH 1348/2091] net: fix a race in dst_release() [ Upstream commit d69bbf88c8d0b367cf3e3a052f6daadf630ee566 ] Only cpu seeing dst refcount going to 0 can safely dereference dst->flags. Otherwise an other cpu might already have freed the dst. Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") Reported-by: Greg Thelen Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index e956ce6d13782..f8db4032d45a4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -285,7 +285,7 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } From 152964690b41b91049d00eb8aea1d25880cd13f0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Aug 2015 10:34:04 +0800 Subject: [PATCH 1349/2091] virtio-net: drop NETIF_F_FRAGLIST [ Upstream commit 48900cb6af4282fa0fb6ff4d72a81aa3dadb5c39 ] virtio declares support for NETIF_F_FRAGLIST, but assumes that there are at most MAX_SKB_FRAGS + 2 fragments which isn't always true with a fraglist. A longer fraglist in the skb will make the call to skb_to_sgvec overflow the sg array, leading to memory corruption. Drop NETIF_F_FRAGLIST so we only get what we can handle. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7fbca37a1adff..237f8e5e493dd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1756,9 +1756,9 @@ static int virtnet_probe(struct virtio_device *vdev) /* Do we support "hardware" checksums? */ if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ - dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (csum) - dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO From dd5efc80af05cfd7ae77bb3229a35cc9bc09615d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 8 Sep 2015 10:53:40 -0400 Subject: [PATCH 1350/2091] RDS: verify the underlying transport exists before creating a connection [ Upstream commit 74e98eb085889b0d2d4908f59f6e00026063014f ] There was no verification that an underlying transport exists when creating a connection, this would cause dereferencing a NULL ptr. It might happen on sockets that weren't properly bound before attempting to send a message, which will cause a NULL ptr deref: [135546.047719] kasan: GPF could be caused by NULL-ptr deref or user memory accessgeneral protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN [135546.051270] Modules linked in: [135546.051781] CPU: 4 PID: 15650 Comm: trinity-c4 Not tainted 4.2.0-next-20150902-sasha-00041-gbaa1222-dirty #2527 [135546.053217] task: ffff8800835bc000 ti: ffff8800bc708000 task.ti: ffff8800bc708000 [135546.054291] RIP: __rds_conn_create (net/rds/connection.c:194) [135546.055666] RSP: 0018:ffff8800bc70fab0 EFLAGS: 00010202 [135546.056457] RAX: dffffc0000000000 RBX: 0000000000000f2c RCX: ffff8800835bc000 [135546.057494] RDX: 0000000000000007 RSI: ffff8800835bccd8 RDI: 0000000000000038 [135546.058530] RBP: ffff8800bc70fb18 R08: 0000000000000001 R09: 0000000000000000 [135546.059556] R10: ffffed014d7a3a23 R11: ffffed014d7a3a21 R12: 0000000000000000 [135546.060614] R13: 0000000000000001 R14: ffff8801ec3d0000 R15: 0000000000000000 [135546.061668] FS: 00007faad4ffb700(0000) GS:ffff880252000000(0000) knlGS:0000000000000000 [135546.062836] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [135546.063682] CR2: 000000000000846a CR3: 000000009d137000 CR4: 00000000000006a0 [135546.064723] Stack: [135546.065048] ffffffffafe2055c ffffffffafe23fc1 ffffed00493097bf ffff8801ec3d0008 [135546.066247] 0000000000000000 00000000000000d0 0000000000000000 ac194a24c0586342 [135546.067438] 1ffff100178e1f78 ffff880320581b00 ffff8800bc70fdd0 ffff880320581b00 [135546.068629] Call Trace: [135546.069028] ? __rds_conn_create (include/linux/rcupdate.h:856 net/rds/connection.c:134) [135546.069989] ? rds_message_copy_from_user (net/rds/message.c:298) [135546.071021] rds_conn_create_outgoing (net/rds/connection.c:278) [135546.071981] rds_sendmsg (net/rds/send.c:1058) [135546.072858] ? perf_trace_lock (include/trace/events/lock.h:38) [135546.073744] ? lockdep_init (kernel/locking/lockdep.c:3298) [135546.074577] ? rds_send_drop_to (net/rds/send.c:976) [135546.075508] ? __might_fault (./arch/x86/include/asm/current.h:14 mm/memory.c:3795) [135546.076349] ? __might_fault (mm/memory.c:3795) [135546.077179] ? rds_send_drop_to (net/rds/send.c:976) [135546.078114] sock_sendmsg (net/socket.c:611 net/socket.c:620) [135546.078856] SYSC_sendto (net/socket.c:1657) [135546.079596] ? SYSC_connect (net/socket.c:1628) [135546.080510] ? trace_dump_stack (kernel/trace/trace.c:1926) [135546.081397] ? ring_buffer_unlock_commit (kernel/trace/ring_buffer.c:2479 kernel/trace/ring_buffer.c:2558 kernel/trace/ring_buffer.c:2674) [135546.082390] ? trace_buffer_unlock_commit (kernel/trace/trace.c:1749) [135546.083410] ? trace_event_raw_event_sys_enter (include/trace/events/syscalls.h:16) [135546.084481] ? do_audit_syscall_entry (include/trace/events/syscalls.h:16) [135546.085438] ? trace_buffer_unlock_commit (kernel/trace/trace.c:1749) [135546.085515] rds_ib_laddr_check(): addr 36.74.25.172 ret -99 node type -1 Acked-by: Santosh Shilimkar Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/connection.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/rds/connection.c b/net/rds/connection.c index da6da57e5f36b..9d66705f9d414 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -187,6 +187,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } } + if (trans == NULL) { + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-ENODEV); + goto out; + } + conn->c_trans = trans; ret = trans->conn_alloc(conn, gfp); From 19c61dfda03fe9d8290366f55d765a83ce459394 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Aug 2015 09:41:39 +0100 Subject: [PATCH 1351/2091] ARM: 8426/1: dma-mapping: add missing range check in dma_mmap() commit 371f0f085f629fc0f66695f572373ca4445a67ad upstream. dma_mmap() function in IOMMU-based dma-mapping implementation lacked a check for valid range of mmap parameters (offset and buffer size), what might have caused access beyond the allocated buffer. This patch fixes this issue. Signed-off-by: Marek Szyprowski Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 6e4b9ff22ef3c..5bbfe56c5bc65 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1395,12 +1395,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (!pages) return -ENXIO; + if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) + return -ENXIO; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { From c1c6e3d704033ec56c158c25af29246892d2aed9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Aug 2015 09:42:09 +0100 Subject: [PATCH 1352/2091] ARM: 8427/1: dma-mapping: add support for offset parameter in dma_mmap() commit 7e31210349e9e03a9a4dff31ab5f2bc83e8e84f5 upstream. IOMMU-based dma_mmap() implementation lacked proper support for offset parameter used in mmap call (it always assumed that mapping starts from offset zero). This patch adds support for offset parameter to IOMMU-based implementation. Signed-off-by: Marek Szyprowski Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 5bbfe56c5bc65..64d7486262e51 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1406,6 +1406,8 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) return -ENXIO; + pages += off; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { From ad13ada66d59097dae89f5941892440f84eeea1d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:43 +0300 Subject: [PATCH 1353/2091] ARM: common: edma: Fix channel parameter for irq callbacks commit 696d8b70c09dd421c4d037fab04341e5b30585cf upstream. In case when the interrupt happened for the second eDMA the channel number was incorrectly passed to the client driver. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- arch/arm/common/edma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 5662a872689b3..30613204da154 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -406,7 +406,8 @@ static irqreturn_t dma_irq_handler(int irq, void *data) BIT(slot)); if (edma_cc[ctlr]->intr_data[channel].callback) edma_cc[ctlr]->intr_data[channel].callback( - channel, EDMA_DMA_COMPLETE, + EDMA_CTLR_CHAN(ctlr, channel), + EDMA_DMA_COMPLETE, edma_cc[ctlr]->intr_data[channel].data); } } while (sh_ipr); @@ -460,7 +461,8 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) if (edma_cc[ctlr]->intr_data[k]. callback) { edma_cc[ctlr]->intr_data[k]. - callback(k, + callback( + EDMA_CTLR_CHAN(ctlr, k), EDMA_DMA_CC_ERROR, edma_cc[ctlr]->intr_data [k].data); From 3ef017078d1f2791228828db8fb0bcf1502fd2b8 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 16 Sep 2015 09:35:06 +0800 Subject: [PATCH 1354/2091] ARM: dts: imx27.dtsi: change the clock information for usb commit facf47ee6b4d07d43c3bfd6f0762f1b28f64703a upstream. For imx27, it needs three clocks to let the controller work, the old code is wrong, and usbmisc has not included clock handling code any more. Without this patch, it will cause below data abort when accessing usbmisc registers. usbcore: registered new interface driver usb-storage Unhandled fault: external abort on non-linefetch (0x008) at 0xf4424600 pgd = c0004000 [f4424600] *pgd=10000452(bad) Internal error: : 8 [#1] PREEMPT ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.1.0-next-20150701-dirty #3089 Hardware name: Freescale i.MX27 (Device Tree Support) task: c7832b60 ti: c783e000 task.ti: c783e000 PC is at usbmisc_imx27_init+0x4c/0xbc LR is at usbmisc_imx27_init+0x40/0xbc pc : [] lr : [] psr: 60000093 sp : c783fe08 ip : 00000000 fp : 00000000 r10: c0576434 r9 : 0000009c r8 : c7a773a0 r7 : 01000000 r6 : 60000013 r5 : c7a776f0 r4 : c7a773f0 r3 : f4424600 r2 : 00000000 r1 : 00000001 r0 : 00000001 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 0005317f Table: a0004000 DAC: 00000017 Process swapper (pid: 1, stack limit = 0xc783e190) Stack: (0xc783fe08 to 0xc7840000) Signed-off-by: Peter Chen Reported-by: Fabio Estevam Tested-by: Fabio Estevam Acked-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx27.dtsi | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index bc215e4b75fd5..6a87233d0b194 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -477,7 +477,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024000 0x200>; interrupts = <56>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 0>; status = "disabled"; }; @@ -486,7 +489,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024200 0x200>; interrupts = <54>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 1>; dr_mode = "host"; status = "disabled"; @@ -496,7 +502,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024400 0x200>; interrupts = <55>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 2>; dr_mode = "host"; status = "disabled"; @@ -506,7 +515,6 @@ #index-cells = <1>; compatible = "fsl,imx27-usbmisc"; reg = <0x10024600 0x200>; - clocks = <&clks IMX27_CLK_USB_AHB_GATE>; }; sahara2: sahara@10025000 { From 4a5cc0ec03eafaa174b11ec570a7330dca850ebf Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 30 Jun 2015 17:15:50 +0300 Subject: [PATCH 1355/2091] ARM: tegra: paz00: use con_id's to refer GPIO's in gpiod_lookup table commit e77b675f8786f38d40fc1562e1275875daf67fef upstream. Commit 72daceb9a10a ("net: rfkill: gpio: Add default GPIO driver mappings for ACPI") removed possibility to request GPIO by table index for non-ACPI platforms without changing its users. As result "shutdown" GPIO request will fail if request for "reset" GPIO succeeded or "reset" will be requested instead of "shutdown" if "reset" wasn't defined. Fix it by making gpiod_lookup_table use con_id's instead of indexes. Signed-off-by: Dmitry Osipenko Fixes: 72daceb (net: rfkill: gpio: Add default GPIO driver mappings for ACPI) Acked-by: Alexandre Courbot Reviewed-by: Marc Dietrich Tested-by: Marc Dietrich Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tegra/board-paz00.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index fbe74c6806f3b..49d1110cff534 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -39,8 +39,8 @@ static struct platform_device wifi_rfkill_device = { static struct gpiod_lookup_table wifi_gpio_lookup = { .dev_id = "rfkill_gpio", .table = { - GPIO_LOOKUP_IDX("tegra-gpio", 25, NULL, 0, 0), - GPIO_LOOKUP_IDX("tegra-gpio", 85, NULL, 1, 0), + GPIO_LOOKUP("tegra-gpio", 25, "reset", 0), + GPIO_LOOKUP("tegra-gpio", 85, "shutdown", 0), { }, }, }; From 8f62287e6ecac096ba276f1905c7661ba8e59318 Mon Sep 17 00:00:00 2001 From: Holger Busse Date: Wed, 26 Aug 2015 10:45:45 +0200 Subject: [PATCH 1356/2091] ARM: at91/dt: corrections to i2c1 declaration to sama5d4 commit d1a9c24ad16ab2b26f1574bc3f2c165a7beff5df upstream. Correcting the dma declaration for i2c1 dma. Signed-off-by: Holger Busse Signed-off-by: Nicolas Ferre Fixes: 4cc7cdf35c5f ("ARM: at91/dt: add i2c1 declaration to sama5d4") Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d4.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index a5f5f4090af6d..9cf0ab62db7d2 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -918,11 +918,11 @@ reg = <0xf8018000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 6>; dmas = <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(4)>, + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(4))>, <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(5)>; + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(5))>; dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; From 3b389e9e73e0c5cc15d6e002129fc8b85aa700b6 Mon Sep 17 00:00:00 2001 From: Patrick Doyle Date: Fri, 16 Oct 2015 12:39:05 +0200 Subject: [PATCH 1357/2091] ARM: at91: pm: at91_pm_suspend_in_sram() must be 8-byte aligned commit 5fcf8d1a0e84792b2bc44922c5d833dab96a9c1e upstream. fncpy() requires that the source and the destination are both 8-byte aligned. Signed-off-by: Patrick Doyle Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Fixes: d94e688cae56 ("ARM: at91/pm: move the copying the sram function to the sram initialization phase") Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-at91/pm_suspend.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index bd22b2c8a0519..d3161c7ee1fd0 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -81,6 +81,8 @@ tmp2 .req r5 * @r2: base address of second SDRAM Controller or 0 if not present * @r3: pm information */ +/* at91_pm_suspend_in_sram must be 8-byte aligned per the requirements of fncpy() */ + .align 3 ENTRY(at91_pm_suspend_in_sram) /* Save registers on stack */ stmfd sp!, {r4 - r12, lr} From 6bafb3e3e48827d634624ee25395a396aa675279 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 16 Oct 2015 12:32:32 -0700 Subject: [PATCH 1358/2091] ARM: dts: Fix WLAN regression on omap5-uevm commit 0efc898a9bea7a2e8e583c6efab0e19dc7093078 upstream. Commit 99f84cae43df ("ARM: dts: add wl12xx/wl18xx bindings") added device tree bindings for the TI WLAN SDIO on many omap variants. I recall wondering how come omap5-uevm did not have the WLAN added and this issue has been bugging me for a while now, and I finally tracked it down to a bad pinmux regression, and a missing deferred probe handling for the 32k clock from palmas that's requested by twl6040. Basically 392adaf796b9 ("ARM: dts: omap5-evm: Add mcspi data") added pin muxing for mcspi4 that conflicts with the onboard WLAN. While some omap5-uevm don't have WLAN populated, the pins are not reused for other devices. And as the SDIO bus should be probed, let's try to enable WLAN by default. Let's fix the regression and add the WLAN configuration as done for the other boards in 99f84cae43df ("ARM: dts: add wl12xx/wl18xx bindings"). And let's use the new MMC pwrseq for the 32k clock as suggested by Javier Martinez Canillas . Note that without a related deferred probe fix for twl6040, the 32k clock is not initialized if palmas-clk is a module and twl6040 is built-in. Let's also use the generic "non-removable" instead of the legacy "ti,non-removable" property while at it. And finally, note that omap5 seems to require WAKEUP_EN for the WLAN GPIO interrupt. Fixes: 392adaf796b9 ("ARM: dts: omap5-evm: Add mcspi data") Cc: Sourav Poddar Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap5-uevm.dts | 66 ++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 1b958e92d6742..2e7c1364cb005 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -31,6 +31,24 @@ regulator-max-microvolt = <3000000>; }; + mmc3_pwrseq: sdhci0_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clk32kgaudio>; + clock-names = "ext_clock"; + }; + + vmmcsdio_fixed: fixedregulator-mmcsdio { + compatible = "regulator-fixed"; + regulator-name = "vmmcsdio_fixed"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio5 12 GPIO_ACTIVE_HIGH>; /* gpio140 WLAN_EN */ + enable-active-high; + startup-delay-us = <70000>; + pinctrl-names = "default"; + pinctrl-0 = <&wlan_pins>; + }; + /* HS USB Host PHY on PORT 2 */ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; @@ -197,12 +215,20 @@ >; }; - mcspi4_pins: pinmux_mcspi4_pins { + mmc3_pins: pinmux_mmc3_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x01a4, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_clk */ + OMAP5_IOPAD(0x01a6, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_cmd */ + OMAP5_IOPAD(0x01a8, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data0 */ + OMAP5_IOPAD(0x01aa, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data1 */ + OMAP5_IOPAD(0x01ac, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data2 */ + OMAP5_IOPAD(0x01ae, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data3 */ + >; + }; + + wlan_pins: pinmux_wlan_pins { pinctrl-single,pins = < - 0x164 (PIN_INPUT | MUX_MODE1) /* mcspi4_clk */ - 0x168 (PIN_INPUT | MUX_MODE1) /* mcspi4_simo */ - 0x16a (PIN_INPUT | MUX_MODE1) /* mcspi4_somi */ - 0x16c (PIN_INPUT | MUX_MODE1) /* mcspi4_cs0 */ + OMAP5_IOPAD(0x1bc, PIN_OUTPUT | MUX_MODE6) /* mcspi1_clk.gpio5_140 */ >; }; @@ -276,6 +302,12 @@ 0x1A (PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */ >; }; + + wlcore_irq_pin: pinmux_wlcore_irq_pin { + pinctrl-single,pins = < + OMAP5_IOPAD(0x040, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ + >; + }; }; &mmc1 { @@ -290,8 +322,25 @@ }; &mmc3 { + vmmc-supply = <&vmmcsdio_fixed>; + mmc-pwrseq = <&mmc3_pwrseq>; bus-width = <4>; - ti,non-removable; + non-removable; + cap-power-off-card; + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins &wlcore_irq_pin>; + interrupts-extended = <&gic GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH + &omap5_pmx_core 0x168>; + + #address-cells = <1>; + #size-cells = <0>; + wlcore: wlcore@2 { + compatible = "ti,wl1271"; + reg = <2>; + interrupt-parent = <&gpio1>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */ + ref-clock-frequency = <26000000>; + }; }; &mmc4 { @@ -591,11 +640,6 @@ pinctrl-0 = <&mcspi3_pins>; }; -&mcspi4 { - pinctrl-names = "default"; - pinctrl-0 = <&mcspi4_pins>; -}; - &uart1 { pinctrl-names = "default"; pinctrl-0 = <&uart1_pins>; From 18f6d80c7e3bd107b346ebbba3268efc727bc9a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Oct 2015 15:46:08 +0200 Subject: [PATCH 1359/2091] ARM: pxa: remove incorrect __init annotation on pxa27x_set_pwrmode commit 54c09889bff6d99c8733eed4a26c9391b177c88b upstream. The z2 machine calls pxa27x_set_pwrmode() in order to power off the machine, but this function gets discarded early at boot because it is marked __init, as pointed out by kbuild: WARNING: vmlinux.o(.text+0x145c4): Section mismatch in reference from the function z2_power_off() to the function .init.text:pxa27x_set_pwrmode() The function z2_power_off() references the function __init pxa27x_set_pwrmode(). This is often because z2_power_off lacks a __init annotation or the annotation of pxa27x_set_pwrmode is wrong. This removes the __init section modifier to fix rebooting and the build error. Signed-off-by: Arnd Bergmann Fixes: ba4a90a6d86a ("ARM: pxa/z2: fix building error of pxa27x_cpu_suspend() no longer available") Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/include/mach/pxa27x.h | 2 +- arch/arm/mach-pxa/pxa27x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-pxa/include/mach/pxa27x.h b/arch/arm/mach-pxa/include/mach/pxa27x.h index 599b925a657c4..1a4291936c582 100644 --- a/arch/arm/mach-pxa/include/mach/pxa27x.h +++ b/arch/arm/mach-pxa/include/mach/pxa27x.h @@ -19,7 +19,7 @@ #define ARB_CORE_PARK (1<<24) /* Be parked with core when idle */ #define ARB_LOCK_FLAG (1<<23) /* Only Locking masters gain access to the bus */ -extern int __init pxa27x_set_pwrmode(unsigned int mode); +extern int pxa27x_set_pwrmode(unsigned int mode); extern void pxa27x_cpu_pm_enter(suspend_state_t state); #endif /* __MACH_PXA27x_H */ diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index af423a48c2e3b..782e6b98dd9a7 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -251,7 +251,7 @@ static struct clk_lookup pxa27x_clkregs[] = { */ static unsigned int pwrmode = PWRMODE_SLEEP; -int __init pxa27x_set_pwrmode(unsigned int mode) +int pxa27x_set_pwrmode(unsigned int mode) { switch (mode) { case PWRMODE_SLEEP: From 86006fae34f3e49755cc78c8147e01ac55cd7226 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 25 Oct 2015 23:21:42 +0100 Subject: [PATCH 1360/2091] MIPS: lantiq: add clk_round_rate() commit 4e7d30dba493b60a80e9b590add1b4402265cc83 upstream. This adds a basic implementation of clk_round_rate() The clk_round_rate() function is called by multiple drivers and subsystems now and the lantiq clk driver is supposed to export this, but doesn't do so, this causes linking problems like this one: ERROR: "clk_round_rate" [drivers/media/v4l2-core/videodev.ko] undefined! Signed-off-by: Hauke Mehrtens Acked-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11358/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/clk.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index 3fc2e6d70c779..a0706fd4ce0a0 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -99,6 +99,23 @@ int clk_set_rate(struct clk *clk, unsigned long rate) } EXPORT_SYMBOL(clk_set_rate); +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + if (unlikely(!clk_good(clk))) + return 0; + if (clk->rates && *clk->rates) { + unsigned long *r = clk->rates; + + while (*r && (*r != rate)) + r++; + if (!*r) { + return clk->rate; + } + } + return rate; +} +EXPORT_SYMBOL(clk_round_rate); + int clk_enable(struct clk *clk) { if (unlikely(!clk_good(clk))) From 331066063527962063ca866bafd8e2c9b95950c0 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:18 +0000 Subject: [PATCH 1361/2091] MIPS: KVM: Fix ASID restoration logic commit 002374f371bd02df864cce1fe85d90dc5b292837 upstream. ASID restoration on guest resume should determine the guest execution mode based on the guest Status register rather than bit 30 of the guest PC. Fix the two places in locore.S that do this, loading the guest status from the cop0 area. Note, this assembly is specific to the trap & emulate implementation of KVM, so it doesn't need to check the supervisor bit as that mode is not implemented in the guest. Fixes: b680f70fc111 ("KVM/MIPS32: Entry point for trampolining to...") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/locore.S | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index c567240386a0f..d1ee95a7f7dd8 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -165,9 +165,11 @@ FEXPORT(__kvm_mips_vcpu_run) FEXPORT(__kvm_mips_load_asid) /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: @@ -482,9 +484,11 @@ __kvm_mips_return_to_guest: mtc0 t0, CP0_EPC /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: From 99caef3188117f9f097a103dff1c1e2a224e6b92 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:19 +0000 Subject: [PATCH 1362/2091] MIPS: KVM: Fix CACHE immediate offset sign extension commit c5c2a3b998f1ff5a586f9d37e154070b8d550d17 upstream. The immediate field of the CACHE instruction is signed, so ensure that it gets sign extended by casting it to an int16_t rather than just masking the low 16 bits. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index d5fa3eaf39a10..41b1b090f56f6 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1581,7 +1581,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, base = (inst >> 21) & 0x1f; op_inst = (inst >> 16) & 0x1f; - offset = inst & 0xffff; + offset = (int16_t)inst; cache = (inst >> 16) & 0x3; op = (inst >> 18) & 0x7; From 6294fbb5243d83ecb0be7c2a0336eefad70d5e18 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:20 +0000 Subject: [PATCH 1363/2091] MIPS: KVM: Uninit VCPU in vcpu_create error path commit 585bb8f9a5e592f2ce7abbe5ed3112d5438d2754 upstream. If either of the memory allocations in kvm_arch_vcpu_create() fail, the vcpu which has been allocated and kvm_vcpu_init'd doesn't get uninit'd in the error handling path. Add a call to kvm_vcpu_uninit() to fix this. Fixes: 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 52f205ae12815..22ee0afc7d5dc 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -277,7 +277,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (!gebase) { err = -ENOMEM; - goto out_free_cpu; + goto out_uninit_cpu; } kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); @@ -341,6 +341,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) out_free_gebase: kfree(gebase); +out_uninit_cpu: + kvm_vcpu_uninit(vcpu); + out_free_cpu: kfree(vcpu); From d5f45ba87a1d13f68a480a75e1ef3b4c801d2283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 8 Oct 2015 20:23:33 +0200 Subject: [PATCH 1364/2091] kvm: x86: set KVM_REQ_EVENT when updating IRR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c77f3fab441c3e466b4c3601a475fc31ce156b06 upstream. After moving PIR to IRR, the interrupt needs to be delivered manually. Reported-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 67d07e0514368..7dd9a8d3911ae 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -339,6 +339,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) struct kvm_lapic *apic = vcpu->arch.apic; __kvm_apic_update_irr(pir, apic->regs); + + kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); From b62c38079ebaa36c0ccd77647fd1fdd46315bc98 Mon Sep 17 00:00:00 2001 From: Eric Northup Date: Tue, 3 Nov 2015 18:03:53 +0100 Subject: [PATCH 1365/2091] KVM: x86: work around infinite loop in microcode when #AC is delivered commit 54a20552e1eae07aa240fa370a0293e006b5faed upstream. It was found that a guest can DoS a host by triggering an infinite stream of "alignment check" (#AC) exceptions. This causes the microcode to enter an infinite loop where the core never receives another interrupt. The host kernel panics pretty quickly due to the effects (CVE-2015-5307). Signed-off-by: Eric Northup Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm.c | 8 ++++++++ arch/x86/kvm/vmx.c | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b5d7640abc5d6..8a4add8e46393 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -100,6 +100,7 @@ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ { SVM_EXIT_INTR, "interrupt" }, \ { SVM_EXIT_NMI, "nmi" }, \ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7858cd9acfe4a..454ccb082e182 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1105,6 +1105,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); + set_exception_intercept(svm, AC_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1791,6 +1792,12 @@ static int ud_interception(struct vcpu_svm *svm) return 1; } +static int ac_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); + return 1; +} + static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3361,6 +3368,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc3041e1abbc8..a243854c35d55 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) u32 eb; eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << NM_VECTOR) | (1u << DB_VECTOR); + (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -5127,6 +5127,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { + case AC_VECTOR: + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & From f53827c8129b9d26765f42c38fb91aa3ab753be2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Oct 2015 13:30:45 +0200 Subject: [PATCH 1366/2091] x86/setup: Extend low identity map to cover whole kernel range commit f5f3497cad8c8416a74b9aaceb127908755d020a upstream. On 32-bit systems, the initial_page_table is reused by efi_call_phys_prolog as an identity map to call SetVirtualAddressMap. efi_call_phys_prolog takes care of converting the current CPU's GDT to a physical address too. For PAE kernels the identity mapping is achieved by aliasing the first PDPE for the kernel memory mapping into the first PDPE of initial_page_table. This makes the EFI stub's trick "just work". However, for non-PAE kernels there is no guarantee that the identity mapping in the initial_page_table extends as far as the GDT; in this case, accesses to the GDT will cause a page fault (which quickly becomes a triple fault). Fix this by copying the kernel mappings from swapper_pg_dir to initial_page_table twice, both at PAGE_OFFSET and at identity mapping. For some reason, this is only reproducible with QEMU's dynamic translation mode, and not for example with KVM. However, even under KVM one can clearly see that the page table is bogus: $ qemu-system-i386 -pflash OVMF.fd -M q35 vmlinuz0 -s -S -daemonize $ gdb (gdb) target remote localhost:1234 (gdb) hb *0x02858f6f Hardware assisted breakpoint 1 at 0x2858f6f (gdb) c Continuing. Breakpoint 1, 0x02858f6f in ?? () (gdb) monitor info registers ... GDT= 0724e000 000000ff IDT= fffbb000 000007ff CR0=0005003b CR2=ff896000 CR3=032b7000 CR4=00000690 ... The page directory is sane: (gdb) x/4wx 0x32b7000 0x32b7000: 0x03398063 0x03399063 0x0339a063 0x0339b063 (gdb) x/4wx 0x3398000 0x3398000: 0x00000163 0x00001163 0x00002163 0x00003163 (gdb) x/4wx 0x3399000 0x3399000: 0x00400003 0x00401003 0x00402003 0x00403003 but our particular page directory entry is empty: (gdb) x/1wx 0x32b7000 + (0x724e000 >> 22) * 4 0x32b7070: 0x00000000 [ It appears that you can skate past this issue if you don't receive any interrupts while the bogus GDT pointer is loaded, or if you avoid reloading the segment registers in general. Andy Lutomirski provides some additional insight: "AFAICT it's entirely permissible for the GDTR and/or LDT descriptor to point to unmapped memory. Any attempt to use them (segment loads, interrupts, IRET, etc) will try to access that memory as if the access came from CPL 0 and, if the access fails, will generate a valid page fault with CR2 pointing into the GDT or LDT." Up until commit 23a0d4e8fa6d ("efi: Disable interrupts around EFI calls, not in the epilog/prolog calls") interrupts were disabled around the prolog and epilog calls, and the functional GDT was re-installed before interrupts were re-enabled. Which explains why no one has hit this issue until now. ] Signed-off-by: Paolo Bonzini Reported-by: Laszlo Ersek Cc: Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Signed-off-by: Matt Fleming [ Updated changelog. ] Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d74ac33290ae3..34308dd73420d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1194,6 +1194,14 @@ void __init setup_arch(char **cmdline_p) clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); #endif tboot_probe(); From e4438be9feaa0f9edb212bbd39148e42f7fb68e1 Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Fri, 6 Nov 2015 14:18:36 +0100 Subject: [PATCH 1367/2091] x86/setup: Fix low identity map for >= 2GB kernel range commit 68accac392d859d24adcf1be3a90e41f978bd54c upstream. The commit f5f3497cad8c extended the low identity mapping. However, if the kernel uses more than 2 GB (VMSPLIT_2G_OPT or VMSPLIT_1G memory split), the normal memory mapping is overwritten by the low identity mapping causing a crash. To avoid overwritting, limit the low identity map to cover only memory before kernel range (PAGE_OFFSET). Fixes: f5f3497cad8c "x86/setup: Extend low identity map to cover whole kernel range Signed-off-by: Krzysztof Mazur Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Laszlo Ersek Cc: Matt Fleming Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/1446815916-22105-1-git-send-email-krzysiek@podlesie.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 34308dd73420d..1473a02e6ccb2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1201,7 +1201,7 @@ void __init setup_arch(char **cmdline_p) */ clone_pgd_range(initial_page_table, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); #endif tboot_probe(); From fe1b81c583f0690ce66ebd8caf0e4b4fc27d00d1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 5 Nov 2015 16:57:56 +0100 Subject: [PATCH 1368/2091] x86/cpu: Call verify_cpu() after having entered long mode too commit 04633df0c43d710e5f696b06539c100898678235 upstream. When we get loaded by a 64-bit bootloader, kernel entry point is startup_64 in head_64.S. We don't trust any and all bootloaders because some will fiddle with CPU configuration so we go ahead and massage each CPU into sanity again. For example, some dell BIOSes have this XD disable feature which set IA32_MISC_ENABLE[34] and disable NX. This might be some dumb workaround for other OSes but Linux sure doesn't need it. A similar thing is present in the Surface 3 firmware - see https://bugzilla.kernel.org/show_bug.cgi?id=106051 - which sets this bit only on the BSP: # rdmsr -a 0x1a0 400850089 850089 850089 850089 I know, right?! There's not even an off switch in there. So fix all those cases by sanitizing the 64-bit entry point too. For that, make verify_cpu() callable in 64-bit mode also. Requested-and-debugged-by: "H. Peter Anvin" Reported-and-tested-by: Bastien Nocera Signed-off-by: Borislav Petkov Cc: Matt Fleming Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1446739076-21303-1-git-send-email-bp@alien8.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_64.S | 8 ++++++++ arch/x86/kernel/verify_cpu.S | 12 +++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7e5da2cbe59ed..174fa035a09a1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -65,6 +65,9 @@ startup_64: * tables and then reload them. */ + /* Sanitize CPU configuration */ + call verify_cpu + /* * Compute the delta between the address I am compiled to run at and the * address I am actually running at. @@ -174,6 +177,9 @@ ENTRY(secondary_startup_64) * after the boot processor executes this code. */ + /* Sanitize CPU configuration */ + call verify_cpu + movq $(init_level4_pgt - __START_KERNEL_map), %rax 1: @@ -288,6 +294,8 @@ ENTRY(secondary_startup_64) pushq %rax # target address in negative space lretq +#include "verify_cpu.S" + #ifdef CONFIG_HOTPLUG_CPU /* * Boot CPU0 entry point. It's called from play_dead(). Everything has been set diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index b9242bacbe59a..4cf401f581e7e 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -34,10 +34,11 @@ #include verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl + pushf # Save caller passed flags + push $0 # Kill any dangerous flags + popf +#ifndef __x86_64__ pushfl # standard way to check for cpuid popl %eax movl %eax,%ebx @@ -48,6 +49,7 @@ verify_cpu: popl %eax cmpl %eax,%ebx jz verify_cpu_no_longmode # cpu has no cpuid +#endif movl $0x0,%eax # See if cpuid 1 is implemented cpuid @@ -130,10 +132,10 @@ verify_cpu_sse_test: jmp verify_cpu_sse_test # try again verify_cpu_no_longmode: - popfl # Restore caller passed flags + popf # Restore caller passed flags movl $1,%eax ret verify_cpu_sse_ok: - popfl # Restore caller passed flags + popf # Restore caller passed flags xorl %eax, %eax ret From d6f199d3189a8cc1d8915fae52aa12dd0caec1ea Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 3 Jun 2015 10:31:14 +0100 Subject: [PATCH 1369/2091] x86/cpu: Fix SMAP check in PVOPS environments commit 581b7f158fe0383b492acd1ce3fb4e99d4e57808 upstream. There appears to be no formal statement of what pv_irq_ops.save_fl() is supposed to return precisely. Native returns the full flags, while lguest and Xen only return the Interrupt Flag, and both have comments by the implementations stating that only the Interrupt Flag is looked at. This may have been true when initially implemented, but no longer is. To make matters worse, the Xen PVOP leaves the upper bits undefined, making the BUG_ON() undefined behaviour. Experimentally, this now trips for 32bit PV guests on Broadwell hardware. The BUG_ON() is consistent for an individual build, but not consistent for all builds. It has also been a sitting timebomb since SMAP support was introduced. Use native_save_fl() instead, which will obtain an accurate view of the AC flag. Signed-off-by: Andrew Cooper Reviewed-by: David Vrabel Tested-by: Rusty Russell Cc: Rusty Russell Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Cc: Xen-devel Link: http://lkml.kernel.org/r/1433323874-6927-1-git-send-email-andrew.cooper3@citrix.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 205e0f3df501b..5732326ec1260 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -291,10 +291,9 @@ __setup("nosmap", setup_disable_smap); static __always_inline void setup_smap(struct cpuinfo_x86 *c) { - unsigned long eflags; + unsigned long eflags = native_save_fl(); /* This should have been cleared long ago */ - raw_local_save_flags(eflags); BUG_ON(eflags & X86_EFLAGS_AC); if (cpu_has(c, X86_FEATURE_SMAP)) { From 64e2fe7afcb55788bc888290b1ab60ebcfa0a714 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Sun, 25 Oct 2015 10:59:38 +0200 Subject: [PATCH 1370/2091] mac80211: Fix local deauth while associating commit a64cba3c5330704a034bd3179270b8d04daf6987 upstream. Local request to deauthenticate wasn't handled while associating, thus the association could continue even when the user space required to disconnect. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 26053bf2faa8f..425ae97bb29b5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4946,6 +4946,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, return 0; } + if (ifmgd->assoc_data && + ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) { + sdata_info(sdata, + "aborting association with %pM by local choice (Reason: %u=%s)\n", + req->bssid, req->reason_code, + ieee80211_get_reason_code_string(req->reason_code)); + + drv_mgd_prepare_tx(sdata->local, sdata); + ieee80211_send_deauth_disassoc(sdata, req->bssid, + IEEE80211_STYPE_DEAUTH, + req->reason_code, tx, + frame_buf); + ieee80211_destroy_assoc_data(sdata, false); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + req->reason_code); + return 0; + } + if (ifmgd->associated && ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { sdata_info(sdata, From f1a112cce43b4901894ac01380261b2fac10a69e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Aug 2015 10:52:53 +0200 Subject: [PATCH 1371/2091] mac80211: fix driver RSSI event calculations commit 8ec6d97871f37e4743678ea4a455bd59580aa0f4 upstream. The ifmgd->ave_beacon_signal value cannot be taken as is for comparisons, it must be divided by since it's represented like that for better accuracy of the EWMA calculations. This would lead to invalid driver RSSI events. Fix the used value. Fixes: 615f7b9bb1f8 ("mac80211: add driver RSSI threshold events") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 425ae97bb29b5..a93906103f8b0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3340,7 +3340,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold && ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { - int sig = ifmgd->ave_beacon_signal; + int sig = ifmgd->ave_beacon_signal / 16; int last_sig = ifmgd->last_ave_beacon_signal; struct ieee80211_event event = { .type = RSSI_EVENT, From b45a2ff53cf39b31f69c7b6e34ffabe5f8c723c3 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 25 Oct 2015 10:59:41 +0200 Subject: [PATCH 1372/2091] mac80211: allow null chandef in tracing commit 254d3dfe445f94a764e399ca12e04365ac9413ed upstream. In TDLS channel-switch operations the chandef can sometimes be NULL. Avoid an oops in the trace code for these cases and just print a chandef full of zeros. Fixes: a7a6bdd0670fe ("mac80211: introduce TDLS channel switch ops") Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/trace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 4c2e7690226a8..ab19f3c2104dc 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -33,11 +33,11 @@ __field(u32, chan_width) \ __field(u32, center_freq1) \ __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ - __entry->chan_width = (c)->width; \ - __entry->center_freq1 = (c)->center_freq1; \ - __entry->center_freq2 = (c)->center_freq2; +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ + __entry->chan_width = (c) ? (c)->width : 0; \ + __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ + __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; #define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" #define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 From 9da175204e4b721d1b7850337b2c1c76bdc95493 Mon Sep 17 00:00:00 2001 From: "Janusz.Dziedzic@tieto.com" Date: Tue, 27 Oct 2015 08:35:11 +0100 Subject: [PATCH 1373/2091] mac80211: fix divide by zero when NOA update commit 519ee6918b91abdc4bc9720deae17599a109eb40 upstream. In case of one shot NOA the interval can be 0, catch that instead of potentially (depending on the driver) crashing like this: divide error: 0000 [#1] SMP [...] Call Trace: [] ieee80211_extend_absent_time+0x6c/0xb0 [mac80211] [] ieee80211_update_p2p_noa+0xb7/0xe0 [mac80211] [] ath9k_p2p_ps_timer+0x170/0x190 [ath9k] [] ath_gen_timer_isr+0xc8/0xf0 [ath9k_hw] [] ath9k_tasklet+0x296/0x2f0 [ath9k] [] tasklet_action+0xe5/0xf0 [...] Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b864ebc6ab8fb..67fec9ba97fce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2984,6 +2984,13 @@ ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i) if (end > 0) return false; + /* One shot NOA */ + if (data->count[i] == 1) + return false; + + if (data->desc[i].interval == 0) + return false; + /* End time is in the past, check for repetitions */ skip = DIV_ROUND_UP(-end, data->desc[i].interval); if (data->count[i] < 255) { From 95457aee1b0e4cd37cf75823b2e13df16b20729e Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Thu, 29 Oct 2015 07:04:58 +0100 Subject: [PATCH 1374/2091] nl80211: Fix potential memory leak from parse_acl_data commit 4baf6bea37247e59f1971e8009d13aeda95edba2 upstream. If parse_acl_data succeeds but the subsequent parsing of smps attributes fails, there will be a memory leak due to early returns. Fix that by moving the ACL parsing later. Fixes: 18998c381b19b ("cfg80211: allow requesting SMPS mode on ap start") Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dd78445c7d506..04b6f3f6ee0b4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3407,12 +3407,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { - params.acl = parse_acl_data(&rdev->wiphy, info); - if (IS_ERR(params.acl)) - return PTR_ERR(params.acl); - } - if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -3436,6 +3430,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.smps_mode = NL80211_SMPS_OFF; } + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { From 2854c585c3a4ab7d3a527758f39ff7dae00bd951 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:20 +0100 Subject: [PATCH 1375/2091] NFC: nci: Fix incorrect data chaining when sending data commit 500c4ef02277eaadbfe20537f963b6221f6ac007 upstream. When sending HCI data over NCI, cmd information should be present only on the first packet. Each packet shall be specifically allocated and sent to the NCI layer. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index b33fed6d15841..32a7558bf96b6 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -146,18 +146,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (!conn_info) return -EPROTO; - skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + + i = 0; + skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); + skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *skb_push(skb, 1) = data_type; - i = 0; - len = conn_info->max_pkt_payload_len; - do { + len = conn_info->max_pkt_payload_len; + /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { @@ -177,9 +177,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return r; i += len; + if (i < data_len) { - skb_trim(skb, 0); - skb_pull(skb, len); + skb = nci_skb_alloc(ndev, + conn_info->max_pkt_payload_len + + NCI_DATA_HDR_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); From 8e5c139309481f323ecf4a73db09e7b56ea4e08b Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:21 +0100 Subject: [PATCH 1376/2091] NFC: nci: Fix improper management of HCI return code commit d8cd37ed2fc871c66b4c79c59f651dc2cdf7091c upstream. When sending HCI data over NCI, HCI return code is part of the NCI data. In order to get correctly the HCI return code, we assume the NCI communication is successful and extract the return code for the nci_hci functions return code. This is done because nci_to_errno does not match hci return code value. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 66 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 32a7558bf96b6..e0909da10d796 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -101,6 +101,20 @@ struct nci_hcp_packet { #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) +static int nci_hci_result_to_errno(u8 result) +{ + switch (result) { + case NCI_HCI_ANY_OK: + return 0; + case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: + return -EOPNOTSUPP; + case NCI_HCI_ANY_E_TIMEOUT: + return -ETIME; + default: + return -1; + } +} + /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { @@ -218,7 +232,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { - struct nci_conn_info *conn_info; + struct nci_hcp_message *message; + struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -238,9 +253,15 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - - if (r == NCI_STATUS_OK && skb) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } @@ -334,9 +355,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct nci_conn_info *conn_info; u8 status = result; - if (result != NCI_HCI_ANY_OK) - goto exit; - conn_info = ndev->hci_dev->conn_info; if (!conn_info) { status = NCI_STATUS_REJECTED; @@ -346,7 +364,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, conn_info->rx_skb = skb; exit: - nci_req_complete(ndev, status); + nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. @@ -401,7 +419,7 @@ void nci_hci_data_received_cb(void *context, { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; - u8 pipe, type, instruction; + u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; @@ -440,7 +458,7 @@ void nci_hci_data_received_cb(void *context, *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { - msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; + msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } @@ -458,11 +476,10 @@ void nci_hci_data_received_cb(void *context, packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { - pipe = packet->header; - instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); - skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + - NCI_HCI_HCP_MESSAGE_HEADER_LEN); - nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb); + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); + skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); + nci_hci_hcp_message_rx(ndev, pipe, type, + NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); @@ -494,6 +511,7 @@ EXPORT_SYMBOL(nci_hci_open_pipe); int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -526,6 +544,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + } kfree(tmp); return r; @@ -535,6 +559,7 @@ EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -559,8 +584,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - if (r == NCI_STATUS_OK) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } From 345e7449e059f9f6f12606dfb00c7788ee8b1cb6 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:22 +0100 Subject: [PATCH 1377/2091] NFC: nci: extract pipe value using NCI_HCP_MSG_GET_PIPE commit e65917b6d54f8b47d8293ea96adfa604fd46cf0d upstream. When receiving data in nci_hci_msg_rx_work, extract pipe value using NCI_HCP_MSG_GET_PIPE macro. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index e0909da10d796..91ecbd1c2ec17 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -402,7 +402,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work) u8 pipe, type, instruction; while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { - pipe = skb->data[0]; + pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); @@ -439,7 +439,7 @@ void nci_hci_data_received_cb(void *context, /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { - pipe = packet->header & NCI_HCI_FRAGMENT; + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; From 964e8570ae6e2b61a39b669f769042cf861a8da3 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 21 Oct 2015 19:55:32 +0300 Subject: [PATCH 1378/2091] iwlwifi: pcie: fix (again) prepare card flow commit 03a19cbb91994212be72ce15ac3406fa9f8ba079 upstream. The hardware bug in the commit mentioned below forces us not to re-enable the clock gating in the Host Cluster. The impact on the power consumption is minimal and it allows the WAKE_ME interrupt to propagate. Fixes: c9fdec9f3970 ("iwlwifi: pcie: fix prepare card flow") Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 699a4802835fa..1de80a8e357a0 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -572,10 +572,8 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) do { ret = iwl_pcie_set_hw_ready(trans); - if (ret >= 0) { - ret = 0; - goto out; - } + if (ret >= 0) + return 0; usleep_range(200, 1000); t += 200; @@ -585,10 +583,6 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) IWL_ERR(trans, "Couldn't prepare the card\n"); -out: - iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, - CSR_RESET_LINK_PWR_MGMT_DISABLED); - return ret; } From 158b78a4a0c7c967682bb15d95dd7c1585b76d9b Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Wed, 28 Oct 2015 12:32:20 +0200 Subject: [PATCH 1379/2091] iwlwifi: Add new PCI IDs for the 8260 series commit 4ab75944c4b324c1f5f01dbd4c4d122d2b9da187 upstream. Add some new PCI IDs for the 8260 series which were missing. The following sub-system IDs were added: 0x0130, 0x1130, 0x0132, 0x1132, 0x1150, 0x8110, 0x9110, 0x8130, 0x9130, 0x8132, 0x9132, 0x8150, 0x9150, 0x0044, 0x0930 Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/drv.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 8b16949a9cb98..88bf80a942b4f 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -421,14 +421,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD010, iwl8260_2ac_cfg)}, @@ -437,18 +444,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)}, {IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)}, #endif /* CONFIG_IWLMVM */ {0} From b4a0135c37a55136dda22928bcdd50344537eca6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 25 Sep 2015 18:09:35 +0200 Subject: [PATCH 1380/2091] net: mvneta: Fix CPU_MAP registers initialisation commit 2502d0ef272da7058ef303b849a2c8dc324c2e2e upstream. The CPU_MAP register is duplicated for each CPUs at different addresses, each instance being at a different address. However, the code so far was using CONFIG_NR_CPUS to initialise the CPU_MAP registers for each registers, while the SoCs embed at most 4 CPUs. This is especially an issue with multi_v7_defconfig, where CONFIG_NR_CPUS is currently set to 16, resulting in writes to registers that are not CPU_MAP. Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Maxime Ripard Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4d608f0117cd9..e07afc673d7a8 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -949,7 +949,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Set CPU queue access map - all CPUs have access to all RX * queues and to all TX queues */ - for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++) + for_each_present_cpu(cpu) mvreg_write(pp, MVNETA_CPU_MAP(cpu), (MVNETA_CPU_RXQ_ACCESS_ALL_MASK | MVNETA_CPU_TXQ_ACCESS_ALL_MASK)); From 669b3319d0817b4f10db614b7ab68624d24be9d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Sep 2015 15:59:17 +0200 Subject: [PATCH 1381/2091] fs/proc, core/debug: Don't expose absolute kernel addresses via wchan commit b2f73922d119686323f14fbbe46587f863852328 upstream. So the /proc/PID/stat 'wchan' field (the 30th field, which contains the absolute kernel address of the kernel function a task is blocked in) leaks absolute kernel addresses to unprivileged user-space: seq_put_decimal_ull(m, ' ', wchan); The absolute address might also leak via /proc/PID/wchan as well, if KALLSYMS is turned off or if the symbol lookup fails for some reason: static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long wchan; char symname[KSYM_NAME_LEN]; wchan = get_wchan(task); if (lookup_symbol_name(wchan, symname) < 0) { if (!ptrace_may_access(task, PTRACE_MODE_READ)) return 0; seq_printf(m, "%lu", wchan); } else { seq_printf(m, "%s", symname); } return 0; } This isn't ideal, because for example it trivially leaks the KASLR offset to any local attacker: fomalhaut:~> printf "%016lx\n" $(cat /proc/$$/stat | cut -d' ' -f35) ffffffff8123b380 Most real-life uses of wchan are symbolic: ps -eo pid:10,tid:10,wchan:30,comm and procps uses /proc/PID/wchan, not the absolute address in /proc/PID/stat: triton:~/tip> strace -f ps -eo pid:10,tid:10,wchan:30,comm 2>&1 | grep wchan | tail -1 open("/proc/30833/wchan", O_RDONLY) = 6 There's one compatibility quirk here: procps relies on whether the absolute value is non-zero - and we can provide that functionality by outputing "0" or "1" depending on whether the task is blocked (whether there's a wchan address). These days there appears to be very little legitimate reason user-space would be interested in the absolute address. The absolute address is mostly historic: from the days when we didn't have kallsyms and user-space procps had to do the decoding itself via the System.map. So this patch sets all numeric output to "0" or "1" and keeps only symbolic output, in /proc/PID/wchan. ( The absolute sleep address can generally still be profiled via perf, by tasks with sufficient privileges. ) Reviewed-by: Thomas Gleixner Acked-by: Kees Cook Acked-by: Linus Torvalds Cc: Al Viro Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Kostya Serebryany Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Sasha Levin Cc: kasan-dev Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20150930135917.GA3285@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/proc.txt | 5 +++-- fs/proc/array.c | 16 ++++++++++++++-- fs/proc/base.c | 9 +++------ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index c3b6b301d8b00..749b7bae0c00e 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -140,7 +140,8 @@ Table 1-1: Process specific entries in /proc stat Process status statm Process memory status information status Process status in human readable form - wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + wchan Present with CONFIG_KALLSYMS=y: it shows the kernel function + symbol the task is blocked in - or "0" if not blocked. pagemap Page table stack Report full stack trace, enable via CONFIG_STACKTRACE smaps a extension based on maps, showing the memory consumption of @@ -309,7 +310,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) blocked bitmap of blocked signals sigign bitmap of ignored signals sigcatch bitmap of caught signals - wchan address where process went to sleep + 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead) 0 (place holder) 0 (place holder) exit_signal signal to send to parent thread on exit diff --git a/fs/proc/array.c b/fs/proc/array.c index fd02a9ebfc30e..70f9c4cba31f6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -364,7 +364,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { - unsigned long vsize, eip, esp, wchan = ~0UL; + unsigned long vsize, eip, esp, wchan = 0; int priority, nice; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; @@ -496,7 +496,19 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', wchan); + + /* + * We used to output the absolute kernel address, but that's an + * information leak - so instead we show a 0/1 flag here, to signal + * to user-space whether there's a wchan field in /proc/PID/wchan. + * + * This works with older implementations of procps as well. + */ + if (wchan) + seq_puts(m, " 1"); + else + seq_puts(m, " 0"); + seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ll(m, ' ', task->exit_signal); diff --git a/fs/proc/base.c b/fs/proc/base.c index 093ca14f57015..fcdeb1eb39211 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -238,13 +238,10 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (lookup_symbol_name(wchan, symname) < 0) { - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - return 0; - seq_printf(m, "%lu", wchan); - } else { + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) seq_printf(m, "%s", symname); - } + else + seq_putc(m, '0'); return 0; } From 3b5167fa51be69e5770c34aebb21bc42c7f76f59 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 23 Oct 2015 11:36:01 +0200 Subject: [PATCH 1382/2091] clk: versatile-icst: fix memory leak commit 7bdccef34fc67d3fce6778a018601dd41e43c5ce upstream. A static code checker found a memory leak in the Versatile ICST code. Fix it. Fixes: a183da637c52 "clk: versatile: respect parent rate in ICST clock" Reported-by: Stephen Boyd Signed-off-by: Linus Walleij Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/versatile/clk-icst.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index bc96f103bd7ca..9064636a867f2 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -156,8 +156,10 @@ struct clk *icst_clk_register(struct device *dev, icst->lockreg = base + desc->lock_offset; clk = clk_register(dev, &icst->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + kfree(pclone); kfree(icst); + } return clk; } From 7c306d85f13d6d2504a44a39bb420e992a45d072 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 18 Sep 2015 09:29:04 -0700 Subject: [PATCH 1383/2091] mfd: twl6040: Fix deferred probe handling for clk32k commit 75c08f17ec87c2d742487bb87408d6feebc526bd upstream. Commit 68bab8662f49 ("mfd: twl6040: Optional clk32k clock handling") added clock handling for the 32k clock from palmas-clk. However, that patch did not consider a typical situation where twl6040 is built-in, and palmas-clk is a loadable module like we have in omap2plus_defconfig. If palmas-clk is not loaded before twl6040 probes, we will get a "clk32k is not handled" warning during booting. This means that any drivers relying on this clock will mysteriously fail, including omap5-uevm WLAN and audio. Note that for WLAN, we probably should also eventually get the clk32kgaudio for MMC3 directly as that's shared between audio and WLAN SDIO at least for omap5-uevm. It seems the WLAN chip cannot get it as otherwise MMC3 won't get properly probed. Fixes: 68bab8662f49 ("mfd: twl6040: Optional clk32k clock handling") Signed-off-by: Tony Lindgren Reviewed-by: Felipe Balbi Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/twl6040.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index c5265c1262c50..6aacd205a774b 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -647,6 +647,8 @@ static int twl6040_probe(struct i2c_client *client, twl6040->clk32k = devm_clk_get(&client->dev, "clk32k"); if (IS_ERR(twl6040->clk32k)) { + if (PTR_ERR(twl6040->clk32k) == -EPROBE_DEFER) + return -EPROBE_DEFER; dev_info(&client->dev, "clk32k is not handled\n"); twl6040->clk32k = NULL; } From b0b69c7d6e11c481e5e597f5641dd883cdfcf89c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 21 Sep 2015 19:19:53 +0300 Subject: [PATCH 1384/2091] mwifiex: fix mwifiex_rdeeprom_read() commit 1f9c6e1bc1ba5f8a10fcd6e99d170954d7c6d382 upstream. There were several bugs here. 1) The done label was in the wrong place so we didn't copy any information out when there was no command given. 2) We were using PAGE_SIZE as the size of the buffer instead of "PAGE_SIZE - pos". 3) snprintf() returns the number of characters that would have been printed if there were enough space. If there was not enough space (and we had fixed the memory corruption bug #2) then it would result in an information leak when we do simple_read_from_buffer(). I've changed it to use scnprintf() instead. I also removed the initialization at the start of the function, because I thought it made the code a little more clear. Fixes: 5e6e3a92b9a4 ('wireless: mwifiex: initial commit for Marvell mwifiex driver') Signed-off-by: Dan Carpenter Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/debugfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mwifiex/debugfs.c b/drivers/net/wireless/mwifiex/debugfs.c index 1fb329dc67445..24e48bddf186a 100644 --- a/drivers/net/wireless/mwifiex/debugfs.c +++ b/drivers/net/wireless/mwifiex/debugfs.c @@ -593,7 +593,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, (struct mwifiex_private *) file->private_data; unsigned long addr = get_zeroed_page(GFP_KERNEL); char *buf = (char *) addr; - int pos = 0, ret = 0, i; + int pos, ret, i; u8 value[MAX_EEPROM_DATA]; if (!buf) @@ -601,7 +601,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, if (saved_offset == -1) { /* No command has been given */ - pos += snprintf(buf, PAGE_SIZE, "0"); + pos = snprintf(buf, PAGE_SIZE, "0"); goto done; } @@ -610,17 +610,17 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, (u16) saved_bytes, value); if (ret) { ret = -EINVAL; - goto done; + goto out_free; } - pos += snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes); + pos = snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes); for (i = 0; i < saved_bytes; i++) - pos += snprintf(buf + strlen(buf), PAGE_SIZE, "%d ", value[i]); - - ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos); + pos += scnprintf(buf + pos, PAGE_SIZE - pos, "%d ", value[i]); done: + ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos); +out_free: free_page(addr); return ret; } From adb2ed85a10effa1d22ee849468f38663e5bd3bf Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 18 Oct 2015 22:14:48 -0500 Subject: [PATCH 1385/2091] staging: rtl8712: Add device ID for Sitecom WLA2100 commit 1e6e63283691a2a9048a35d9c6c59cf0abd342e4 upstream. This adds the USB ID for the Sitecom WLA2100. The Windows 10 inf file was checked to verify that the addition is correct. Reported-by: Frans van de Wiel Signed-off-by: Larry Finger Cc: Frans van de Wiel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index f8b5b332e7c3d..943a0e2045329 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -144,6 +144,7 @@ static struct usb_device_id rtl871x_usb_id_tbl[] = { {USB_DEVICE(0x0DF6, 0x0058)}, {USB_DEVICE(0x0DF6, 0x0049)}, {USB_DEVICE(0x0DF6, 0x004C)}, + {USB_DEVICE(0x0DF6, 0x006C)}, {USB_DEVICE(0x0DF6, 0x0064)}, /* Skyworth */ {USB_DEVICE(0x14b2, 0x3300)}, From 4fb2de67b1600cd3c085c150ba23a1440ae85a77 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 7 Sep 2015 12:05:41 +0200 Subject: [PATCH 1386/2091] Bluetooth: hidp: fix device disconnect on idle timeout commit 660f0fc07d21114549c1862e67e78b1cf0c90c29 upstream. The HIDP specs define an idle-timeout which automatically disconnects a device. This has always been implemented in the HIDP layer and forced a synchronous shutdown of the hidp-scheduler. This works just fine, but lacks a forced disconnect on the underlying l2cap channels. This has been broken since: commit 5205185d461d5902325e457ca80bd421127b7308 Author: David Herrmann Date: Sat Apr 6 20:28:47 2013 +0200 Bluetooth: hidp: remove old session-management The old session-management always forced an l2cap error on the ctrl/intr channels when shutting down. The new session-management skips this, as we don't want to enforce channel policy on the caller. In other words, if user-space removes an HIDP device, the underlying channels (which are *owned* and *referenced* by user-space) are still left active. User-space needs to call shutdown(2) or close(2) to release them. Unfortunately, this does not work with idle-timeouts. There is no way to signal user-space that the HIDP layer has been stopped. The API simply does not support any event-passing except for poll(2). Hence, we restore old behavior and force EUNATCH on the sockets if the HIDP layer is disconnected due to idle-timeouts (behavior of explicit disconnects remains unmodified). User-space can still call getsockopt(..., SO_ERROR, ...) ..to retrieve the EUNATCH error and clear sk_err. Hence, the channels can still be re-used (which nobody does so far, though). Therefore, the API still supports the new behavior, but with this patch it's also compatible to the old implicit channel shutdown. Reported-by: Mark Haun Reported-by: Luiz Augusto von Dentz Signed-off-by: David Herrmann Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 9070dfd6b4adc..4a0015e16d4f3 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg) { struct hidp_session *session = (struct hidp_session *) arg; + /* The HIDP user-space API only contains calls to add and remove + * devices. There is no way to forward events of any kind. Therefore, + * we have to forcefully disconnect a device on idle-timeouts. This is + * unfortunate and weird API design, but it is spec-compliant and + * required for backwards-compatibility. Hence, on idle-timeout, we + * signal driver-detach events, so poll() will be woken up with an + * error-condition on both sockets. + */ + + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + wake_up_interruptible(sk_sleep(session->intr_sock->sk)); + wake_up_interruptible(sk_sleep(session->ctrl_sock->sk)); + hidp_session_terminate(session); } From d53162cb4dc127247c05c720b0245370f231a047 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Mon, 5 Oct 2015 19:29:33 +0300 Subject: [PATCH 1387/2091] Bluetooth: ath3k: Add new AR3012 0930:021c id commit cd355ff071cd37e7197eccf9216770b2b29369f7 upstream. This adapter works with the existing linux-firmware. T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=02 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0930 ProdID=021c Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1502781 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index e527a3e13939c..60c15f8bc09d0 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -93,6 +93,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x0930, 0x0219) }, + { USB_DEVICE(0x0930, 0x021c) }, { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0930, 0x0227) }, { USB_DEVICE(0x0b05, 0x17d0) }, @@ -153,6 +154,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c655015392248..245e1f21f9e57 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -191,6 +191,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, From a1475c6b60a079251d440140bb51fb6ddf1a5a6e Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Fri, 16 Oct 2015 11:45:26 +0300 Subject: [PATCH 1388/2091] Bluetooth: ath3k: Add support of AR3012 0cf3:817b device commit 18e0afab8ce3f1230ce3fef52b2e73374fd9c0e7 upstream. T: Bus=04 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=817b Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1506615 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 60c15f8bc09d0..fa893c3ec4087 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -105,6 +105,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0x311F) }, { USB_DEVICE(0x0cf3, 0x3121) }, { USB_DEVICE(0x0CF3, 0x817a) }, + { USB_DEVICE(0x0CF3, 0x817b) }, { USB_DEVICE(0x0cf3, 0xe003) }, { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, @@ -166,6 +167,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0CF3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 245e1f21f9e57..7bf87d9bfd7d5 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -203,6 +203,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, From d4815c10674240ee535ddcb19c408ba13102d9e6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 19 Oct 2015 10:51:47 +0300 Subject: [PATCH 1389/2091] Bluetooth: Fix removing connection parameters when unpairing commit a6ad2a6b9cc1d9d791aee5462cfb8528f366f1d4 upstream. The commit 89cbb0638e9b7 introduced support for deferred connection parameter removal when unpairing by removing them only once an existing connection gets disconnected. However, it failed to address the scenario when we're *not* connected and do an unpair operation. What makes things worse is that most user space BlueZ versions will first issue a disconnect request and only then unpair, meaning the buggy code will be triggered every time. This effectively causes the kernel to resume scanning and reconnect to a device for which we've removed all keys and GATT database information. This patch fixes the issue by adding the missing call to the hci_conn_params_del() function to a branch which handles the case of no existing connection. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/mgmt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7fd87e7135b52..58d60cbbc33f8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2962,6 +2962,11 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, } else { u8 addr_type; + if (cp->addr.type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (conn) { @@ -2977,13 +2982,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, */ if (!cp->disconnect) conn = NULL; + } else { + hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type); } - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); From 85f576c6806ee1a73be10b67b57a2a8f33a5611c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 30 Oct 2015 13:48:19 +0100 Subject: [PATCH 1390/2091] can: Use correct type in sizeof() in nla_put() commit 562b103a21974c2f9cd67514d110f918bb3e1796 upstream. The sizeof() is invoked on an incorrect variable, likely due to some copy-paste error, and this might result in memory corruption. Fix this. Signed-off-by: Marek Vasut Cc: Wolfgang Grandegger Cc: netdev@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index aede704605c66..141c2a42d7eda 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -915,7 +915,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put(skb, IFLA_CAN_BITTIMING_CONST, sizeof(*priv->bittiming_const), priv->bittiming_const)) || - nla_put(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock) || + nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || nla_put_u32(skb, IFLA_CAN_STATE, state) || nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || From 39851b0e4e62065a24ef2d96c7e32f6e4a5bc581 Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Tue, 10 Nov 2015 14:59:34 +0100 Subject: [PATCH 1391/2091] can: sja1000: clear interrupts on start commit 7cecd9ab80f43972c056dc068338f7bcc407b71c upstream. According to SJA1000 data sheet error-warning (EI) interrupt is not cleared by setting the controller in to reset-mode. Then if we have the following case: - system is suspended (echo mem > /sys/power/state) and SJA1000 is left in operating state - A bus error condition occurs which activates EI interrupt, system is still suspended which means EI interrupt will be not be handled nor cleared. If the above two events occur, on resume there is no way to return the SJA1000 to operating state, except to cycle power to it. By simply reading the IR register on start we will clear any previous conditions that could be present. Signed-off-by: Mirza Krak Reported-by: Christian Magnusson Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/sja1000/sja1000.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 32bd7f451aa42..0c048e261ee63 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev) priv->write_reg(priv, SJA1000_RXERR, 0x0); priv->read_reg(priv, SJA1000_ECC); + /* clear interrupt flags */ + priv->read_reg(priv, SJA1000_IR); + /* leave reset mode */ set_normal_mode(dev); } From a95468b014b7389e858b847449bb56cb7c4857a4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Oct 2015 15:41:52 +0100 Subject: [PATCH 1392/2091] arm64: Fix compat register mappings commit 5accd17d0eb523350c9ef754d655e379c9bb93b3 upstream. For reasons not entirely apparent, but now enshrined in history, the architectural mapping of AArch32 banked registers to AArch64 registers actually orders SP_ and LR_ backwards compared to the intuitive r13/r14 order, for all modes except FIQ. Fix the compat__ macros accordingly, in the hope of avoiding subtle bugs with KVM and AArch32 guests. Signed-off-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/ptrace.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index d6dd9fdbc3bee..d4264bb0a409b 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -83,14 +83,14 @@ #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] -#define compat_sp_irq regs[16] -#define compat_lr_irq regs[17] -#define compat_sp_svc regs[18] -#define compat_lr_svc regs[19] -#define compat_sp_abt regs[20] -#define compat_lr_abt regs[21] -#define compat_sp_und regs[22] -#define compat_lr_und regs[23] +#define compat_lr_irq regs[16] +#define compat_sp_irq regs[17] +#define compat_lr_svc regs[18] +#define compat_sp_svc regs[19] +#define compat_lr_abt regs[20] +#define compat_sp_abt regs[21] +#define compat_lr_und regs[22] +#define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] From d3a80fbec20ffa27f71295d24ce456934ad8c7a7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 26 Oct 2015 21:42:33 +0000 Subject: [PATCH 1393/2091] arm64: page-align sections for DEBUG_RODATA commit cb083816ab5ac3d10a9417527f07fc5962cc3808 upstream. A kernel built with DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA doesn't have .text aligned to a page boundary, though fixup_executable works at page-granularity thanks to its use of create_mapping. If .text is not page-aligned, the first page it exists in may be marked non-executable, leading to failures when an attempt is made to execute code in said page. This patch upgrades ALIGN_DEBUG_RO and ALIGN_DEBUG_RO_MIN to force page alignment for DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA kernels, ensuring that all sections with specific RWX permission requirements are mapped with the correct permissions. Signed-off-by: Mark Rutland Reported-by: Jeremy Linton Reviewed-by: Laura Abbott Acked-by: Ard Biesheuvel Cc: Suzuki Poulose Cc: Will Deacon Fixes: da141706aea52c1a ("arm64: add better page protections to arm64") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/vmlinux.lds.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index a2c29865c3fe5..aff07bcad8827 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -54,9 +54,12 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif -#ifdef CONFIG_DEBUG_ALIGN_RODATA +#if defined(CONFIG_DEBUG_ALIGN_RODATA) #define ALIGN_DEBUG_RO . = ALIGN(1< Date: Tue, 3 Nov 2015 11:51:33 +0530 Subject: [PATCH 1394/2091] ath10k: fix invalid NSS for 4x4 devices commit f680f70adbeab28b35f849016b964dd645db6237 upstream. The number of spatial streams that are derived from chain mask for 4x4 devices is using wrong bitmask and conditional check. This is affecting downlink throughput for QCA99x0 devices. Earlier cfg_tx_chainmask is not filled by default until user configured it and so get_nss_from_chainmask never be called. This issue is exposed by recent commit 166de3f1895d ("ath10k: remove supported chain mask"). By default maximum supported chain mask is filled in cfg_tx_chainmask. Fixes: 5572a95b4b ("ath10k: apply chainmask settings to vdev on creation") Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 5e021b0b3f9e2..1734cc50ded81 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3183,7 +3183,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed) static u32 get_nss_from_chainmask(u16 chain_mask) { - if ((chain_mask & 0x15) == 0x15) + if ((chain_mask & 0xf) == 0xf) return 4; else if ((chain_mask & 0x7) == 0x7) return 3; From 3ba8381fd8653d7438cdd3fe3e7fc4d9004b7c27 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 26 Oct 2015 08:41:29 +0100 Subject: [PATCH 1395/2091] KVM: s390: SCA must not cross page boundaries commit c5c2c393468576bad6d10b2b5fefff8cd25df3f4 upstream. We seemed to have missed a few corner cases in commit f6c137ff00a4 ("KVM: s390: randomize sca address"). The SCA has a maximum size of 2112 bytes. By setting the sca_offset to some unlucky numbers, we exceed the page. 0x7c0 (1984) -> Fits exactly 0x7d0 (2000) -> 16 bytes out 0x7e0 (2016) -> 32 bytes out 0x7f0 (2032) -> 48 bytes out One VCPU entry is 32 bytes long. For the last two cases, we actually write data to the other page. 1. The address of the VCPU. 2. Injection/delivery/clearing of SIGP externall calls via SIGP IF. Especially the 2. happens regularly. So this could produce two problems: 1. The guest losing/getting external calls. 2. Random memory overwrites in the host. So this problem happens on every 127 + 128 created VM with 64 VCPUs. Acked-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8cd8e7b288c5d..b1aa06b3fe7f4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1031,7 +1031,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); - sca_offset = (sca_offset + 16) & 0x7f0; + sca_offset += 16; + if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + sca_offset = 0; kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); From af2bcce2e22c7d59816e890d67ab1af91fe0d95a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:03:50 +0100 Subject: [PATCH 1396/2091] KVM: Provide function for VCPU lookup by id commit db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 upstream. Let's provide a function to lookup a VCPU by id. Reviewed-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [split patch from refactoring patch] Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad45054309a0f..29a57a5b7cee0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -423,6 +423,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu->vcpu_id == id) + return vcpu; + return NULL; +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ From 22b017c37086e870866aaba204534c41a4a7e816 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:06:06 +0100 Subject: [PATCH 1397/2091] KVM: s390: fix wrong lookup of VCPUs by array index commit 152e9f65d66f0a3891efc3869440becc0e7ff53f upstream. For now, VCPUs were always created sequentially with incrementing VCPU ids. Therefore, the index in the VCPUs array matched the id. As sequential creation might change with cpu hotplug, let's use the correct lookup function to find a VCPU by id, not array index. Let's also use kvm_lookup_vcpu() for validation of the sending VCPU on external call injection. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 3 +-- arch/s390/kvm/sigp.c | 8 ++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b745a109bfc15..5d2667effedd8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1054,8 +1054,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) src_id, 0, 2); /* sending vcpu invalid */ - if (src_id >= KVM_MAX_VCPUS || - kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; if (sclp_has_sigpif()) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 72e58bd2bee78..7171056fc24dd 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -294,12 +294,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, u16 cpu_addr, u32 parameter, u64 *status_reg) { int rc; - struct kvm_vcpu *dst_vcpu; + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; @@ -481,7 +477,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); if (order_code == SIGP_EXTERNAL_CALL) { - dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); kvm_s390_vcpu_wakeup(dest_vcpu); From 1c122e95e73cfafcea67c821240ff828cc57c6cf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:38:15 +0100 Subject: [PATCH 1398/2091] KVM: s390: avoid memory overwrites on emergency signal injection commit b85de33a1a3433487b6a721cfdce25ec8673e622 upstream. Commit 383d0b050106 ("KVM: s390: handle pending local interrupts via bitmap") introduced a possible memory overwrite from user space. User space could pass an invalid emergency signal code (sending VCPU) and therefore exceed the bitmap. Let's take care of this case and check that the id is in the valid range. Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5d2667effedd8..3dbba9a2bb0f7 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1133,6 +1133,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, irq->u.emerg.code, 0, 2); + /* sending vcpu invalid */ + if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL) + return -EINVAL; + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); From 6ea81ff2449fe9f22c5fdaca530a17a7b0f4b25a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Aug 2015 19:48:28 +0300 Subject: [PATCH 1399/2091] Revert "usb: dwc3: gadget: drop unnecessary loop when cleaning up TRBs" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d115d7050a0d2c4967532f18c9cb522fea6b7280 upstream. This reverts commit 8f2c9544aba636134303105ecb164190a39dece4. As it breaks g_ether on my Baytrail FFRD8 device. Everything starts out fine, but after a bit of data has been transferred it just stops flowing. Note that I do get a bunch of these "NOHZ: local_softirq_pending 08" when booting the machine, but I'm not really sure if they're related to this problem. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 333a7c0078fcf..f0a30ce0d2d6f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1859,27 +1859,32 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, unsigned int i; int ret; - req = next_request(&dep->req_queued); - if (!req) { - WARN_ON_ONCE(1); - return 1; - } - i = 0; do { - slot = req->start_slot + i; - if ((slot == DWC3_TRB_NUM - 1) && + req = next_request(&dep->req_queued); + if (!req) { + WARN_ON_ONCE(1); + return 1; + } + i = 0; + do { + slot = req->start_slot + i; + if ((slot == DWC3_TRB_NUM - 1) && usb_endpoint_xfer_isoc(dep->endpoint.desc)) - slot++; - slot %= DWC3_TRB_NUM; - trb = &dep->trb_pool[slot]; + slot++; + slot %= DWC3_TRB_NUM; + trb = &dep->trb_pool[slot]; + + ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, + event, status); + if (ret) + break; + } while (++i < req->request.num_mapped_sgs); + + dwc3_gadget_giveback(dep, req, status); - ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, - event, status); if (ret) break; - } while (++i < req->request.num_mapped_sgs); - - dwc3_gadget_giveback(dep, req, status); + } while (1); if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && list_empty(&dep->req_queued)) { From 7ea7903e91418e5adde88c40fb65ba8047e77f67 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 16 Nov 2015 19:22:08 +0100 Subject: [PATCH 1400/2091] usb: gadget: atmel_usba_udc: Expose correct device speed commit d134c48d889ddceadf4c990e6f3df16b816ed5d4 upstream. Following changes that appeared in lk 4.0.0, the gadget udc driver for some ARM based Atmel SoCs (e.g. at91sam9x5 and sama5d3 families) incorrectly deduced full-speed USB link speed even when the hardware had negotiated a high-speed link. The fix is to make sure that the UDPHS Interrupt Enable Register value does not mask the SPEED bit in the Interrupt Status Register. For a mass storage gadget this problem lead to failures when the host had a USB 3 port with the xhci_hcd driver. If the host was a USB 2 port using the ehci_hcd driver then the mass storage gadget worked (but probably at a lower speed than it should have). Signed-off-by: Douglas Gilbert Reviewed-by: Boris Brezillon Fixes: 9870d895ad87 ("usb: atmel_usba_udc: Mask status with enabled irqs") Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/atmel_usba_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 351d48550c332..d6ca3697d3c8d 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -1634,7 +1634,7 @@ static irqreturn_t usba_udc_irq(int irq, void *devid) spin_lock(&udc->lock); int_enb = usba_int_enb_get(udc); - status = usba_readl(udc, INT_STA) & int_enb; + status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED); DBG(DBG_INT, "irq, status=%#08x\n", status); if (status & USBA_DET_SUSPEND) { From d23ccdaf7fce2f728db596f32b991b048be6f694 Mon Sep 17 00:00:00 2001 From: Ben McCauley Date: Mon, 16 Nov 2015 10:47:24 -0600 Subject: [PATCH 1401/2091] usb: dwc3: gadget: let us set lower max_speed commit b9e51b2b1fda19143f48d182ed7a2943f21e1ae4 upstream. In some SoCs, dwc3 is implemented as a USB2.0 only core, meaning that it can't ever achieve SuperSpeed. Currect driver always sets gadget.max_speed to USB_SPEED_SUPER unconditionally. This can causes issues to some Host stacks where the host will issue a GetBOS() request and we will reply with a BOS containing Superspeed Capability Descriptor. At least Windows seems to be upset by this fact and prints a warning that we should connect $this device to another port. [ balbi@ti.com : rewrote entire commit, including source code comment to make a lot clearer what the problem is ] Signed-off-by: Ben McCauley Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f0a30ce0d2d6f..6fbf461d523c6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2714,11 +2714,33 @@ int dwc3_gadget_init(struct dwc3 *dwc) } dwc->gadget.ops = &dwc3_gadget_ops; - dwc->gadget.max_speed = USB_SPEED_SUPER; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->gadget.sg_supported = true; dwc->gadget.name = "dwc3-gadget"; + /* + * FIXME We might be setting max_speed to revision < DWC3_REVISION_220A) + dwc3_trace(trace_dwc3_gadget, + "Changing max_speed on rev %08x\n", + dwc->revision); + + dwc->gadget.max_speed = dwc->maximum_speed; + /* * Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize * on ep out. From 13ba3569a6693d05c345590cee38ac3593f36640 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 12 Dec 2014 09:11:42 +0800 Subject: [PATCH 1402/2091] usb: chipidea: otg: gadget module load and unload support commit 85da852df66e5e0d3aba761b0fece7c958ff0685 upstream. This patch is to support load and unload gadget driver in full OTG mode. Signed-off-by: Li Jun Signed-off-by: Peter Chen Tested-by: Jiada Wang Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 6e53c24fa1cb2..92937c14f8189 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1730,6 +1730,22 @@ static int ci_udc_start(struct usb_gadget *gadget, return retval; } +static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci) +{ + if (!ci_otg_is_fsm_mode(ci)) + return; + + mutex_lock(&ci->fsm.lock); + if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) { + ci->fsm.a_bidl_adis_tmout = 1; + ci_hdrc_otg_fsm_start(ci); + } else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) { + ci->fsm.protocol = PROTO_UNDEF; + ci->fsm.otg->state = OTG_STATE_UNDEFINED; + } + mutex_unlock(&ci->fsm.lock); +} + /** * ci_udc_stop: unregister a gadget driver */ @@ -1754,6 +1770,7 @@ static int ci_udc_stop(struct usb_gadget *gadget) ci->driver = NULL; spin_unlock_irqrestore(&ci->lock, flags); + ci_udc_stop_for_otg_fsm(ci); return 0; } From 443635f85070a57047c5fdc3b24e5454bda71052 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 7 Aug 2015 11:04:14 -0700 Subject: [PATCH 1403/2091] usb: dwc3: pci: Add the Synopsys HAPS AXI Product ID commit 41adc59caece02aa2e988a0e8f9fe8e6f426f82e upstream. This ID is for the Synopsys DWC_usb3 core with AXI interface on PCIe HAPS platform. This core has the debug registers mapped at a separate BAR in order to support enhanced hibernation. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index b773fb53d6a7c..746ff78fa4ed2 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -25,6 +25,7 @@ #include "platform_data.h" #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd +#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce #define PCI_DEVICE_ID_INTEL_BYT 0x0f37 #define PCI_DEVICE_ID_INTEL_MRFLD 0x119e #define PCI_DEVICE_ID_INTEL_BSW 0x22B7 @@ -136,6 +137,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3), }, + { + PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, + PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI), + }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, From 9426f37e18d0841732906aa0d478c135d439a513 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 7 Aug 2015 11:47:25 -0700 Subject: [PATCH 1404/2091] usb: dwc3: pci: Add the PCI Product ID for Synopsys USB 3.1 commit e8095a25364a30216ad40dbe8893ed5c3c235949 upstream. This adds the PCI product ID for the Synopsys USB 3.1 IP core (DWC_usb31) on a HAPS-based PCI development platform. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 746ff78fa4ed2..4df2e32c2233a 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -26,6 +26,7 @@ #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce +#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31 0xabcf #define PCI_DEVICE_ID_INTEL_BYT 0x0f37 #define PCI_DEVICE_ID_INTEL_MRFLD 0x119e #define PCI_DEVICE_ID_INTEL_BSW 0x22B7 @@ -141,6 +142,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI), }, + { + PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, + PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31), + }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, From ee7683c9fe19be73f9833c8e9ede856fc10dec6e Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 4 Sep 2015 19:15:10 -0700 Subject: [PATCH 1405/2091] usb: dwc3: Support Synopsys USB 3.1 IP commit 690fb3718a70c66004342f6f5e2e8a5f95b977db upstream. This patch allows the dwc3 driver to run on the new Synopsys USB 3.1 IP core, albeit in USB 3.0 mode only. The Synopsys USB 3.1 IP (DWC_usb31) retains mostly the same register interface and programming model as the existing USB 3.0 controller IP (DWC_usb3). However the GSNPSID and version numbers are different. Add checking for the new ID to pass driver probe. Also, since the DWC_usb31 version number is lower in value than the full GSNPSID of the DWC_usb3 IP, we set the high bit to identify DWC_usb31 and to ensure the values are higher. Finally, add a documentation note about the revision numbering scheme. Any future revision checks (for STARS, workarounds, and new features) should take into consideration how it applies to both the 3.1/3.0 IP. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 10 ++++++++-- drivers/usb/dwc3/core.h | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2bbab3d86fffe..8e9518fe77638 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -445,12 +445,18 @@ static int dwc3_core_init(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GSNPSID); /* This should read as U3 followed by revision number */ - if ((reg & DWC3_GSNPSID_MASK) != 0x55330000) { + if ((reg & DWC3_GSNPSID_MASK) == 0x55330000) { + /* Detected DWC_usb3 IP */ + dwc->revision = reg; + } else if ((reg & DWC3_GSNPSID_MASK) == 0x33310000) { + /* Detected DWC_usb31 IP */ + dwc->revision = dwc3_readl(dwc->regs, DWC3_VER_NUMBER); + dwc->revision |= DWC3_REVISION_IS_DWC31; + } else { dev_err(dwc->dev, "this is not a DesignWare USB3 DRD Core\n"); ret = -ENODEV; goto err0; } - dwc->revision = reg; /* * Write Linux Version Code to our GUID register so it's easy to figure diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index c0eafa6fd4031..173edd4ca20eb 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -107,6 +107,9 @@ #define DWC3_GPRTBIMAP_FS0 0xc188 #define DWC3_GPRTBIMAP_FS1 0xc18c +#define DWC3_VER_NUMBER 0xc1a0 +#define DWC3_VER_TYPE 0xc1a4 + #define DWC3_GUSB2PHYCFG(n) (0xc200 + (n * 0x04)) #define DWC3_GUSB2I2CCTL(n) (0xc240 + (n * 0x04)) @@ -752,6 +755,14 @@ struct dwc3 { u32 num_event_buffers; u32 u1u2; u32 maximum_speed; + + /* + * All 3.1 IP version constants are greater than the 3.0 IP + * version constants. This works for most version checks in + * dwc3. However, in the future, this may not apply as + * features may be developed on newer versions of the 3.0 IP + * that are not in the 3.1 IP. + */ u32 revision; #define DWC3_REVISION_173A 0x5533173a @@ -774,6 +785,13 @@ struct dwc3 { #define DWC3_REVISION_270A 0x5533270a #define DWC3_REVISION_280A 0x5533280a +/* + * NOTICE: we're using bit 31 as a "is usb 3.1" flag. This is really + * just so dwc31 revisions are always larger than dwc3. + */ +#define DWC3_REVISION_IS_DWC31 0x80000000 +#define DWC3_USB31_REVISION_110A (0x3131302a | DWC3_REVISION_IS_USB31) + enum dwc3_ep0_next ep0_next_event; enum dwc3_ep0_state ep0state; enum dwc3_link_state link_state; From 697fdbae08daee5004883a928618dcd086d25d72 Mon Sep 17 00:00:00 2001 From: John Youn Date: Sat, 26 Sep 2015 00:11:15 -0700 Subject: [PATCH 1406/2091] usb: dwc3: pci: Add platform data for Synopsys HAPS commit bb7f3d6d323a56b9c3b3e727380d1395a7f10107 upstream. Add platform data and set usb3_lpm_capable and has_lpm_erratum. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 4df2e32c2233a..830f020230c46 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -67,6 +67,21 @@ static int dwc3_pci_quirks(struct pci_dev *pdev) sizeof(pdata)); } + if (pdev->vendor == PCI_VENDOR_ID_SYNOPSYS && + (pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 || + pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI || + pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31)) { + + struct dwc3_platform_data pdata; + + memset(&pdata, 0, sizeof(pdata)); + pdata.usb3_lpm_capable = true; + pdata.has_lpm_erratum = true; + + return platform_device_add_data(pci_get_drvdata(pdev), &pdata, + sizeof(pdata)); + } + return 0; } From fca6d6f19873f5070f191046134cc2645308b385 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 16 Sep 2015 09:40:51 +0800 Subject: [PATCH 1407/2091] usb: chipidea: imx: refine clock operations to adapt for all platforms commit ae3e57ae26cdcc85728bb566f999bcb9a7cc6954 upstream. Some i.mx platforms need three clocks to let controller work, but others only need one, refine clock operation to adapt for all platforms, it fixes a regression found at i.mx27. Signed-off-by: Peter Chen Tested-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 131 +++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 18 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index fa774323ebda3..846ceb91ec14e 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -68,6 +68,12 @@ struct ci_hdrc_imx_data { struct imx_usbmisc_data *usbmisc_data; bool supports_runtime_pm; bool in_lpm; + /* SoC before i.mx6 (except imx23/imx28) needs three clks */ + bool need_three_clks; + struct clk *clk_ipg; + struct clk *clk_ahb; + struct clk *clk_per; + /* --------------------------------- */ }; /* Common functions shared by usbmisc drivers */ @@ -119,6 +125,102 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) } /* End of common functions shared by usbmisc drivers*/ +static int imx_get_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + data->clk_ipg = devm_clk_get(dev, "ipg"); + if (IS_ERR(data->clk_ipg)) { + /* If the platform only needs one clocks */ + data->clk = devm_clk_get(dev, NULL); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + dev_err(dev, + "Failed to get clks, err=%ld,%ld\n", + PTR_ERR(data->clk), PTR_ERR(data->clk_ipg)); + return ret; + } + return ret; + } + + data->clk_ahb = devm_clk_get(dev, "ahb"); + if (IS_ERR(data->clk_ahb)) { + ret = PTR_ERR(data->clk_ahb); + dev_err(dev, + "Failed to get ahb clock, err=%d\n", ret); + return ret; + } + + data->clk_per = devm_clk_get(dev, "per"); + if (IS_ERR(data->clk_per)) { + ret = PTR_ERR(data->clk_per); + dev_err(dev, + "Failed to get per clock, err=%d\n", ret); + return ret; + } + + data->need_three_clks = true; + return ret; +} + +static int imx_prepare_enable_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + if (data->need_three_clks) { + ret = clk_prepare_enable(data->clk_ipg); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ipg clk, err=%d\n", + ret); + return ret; + } + + ret = clk_prepare_enable(data->clk_ahb); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ahb clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + + ret = clk_prepare_enable(data->clk_per); + if (ret) { + dev_err(dev, + "Failed to prepare/enable per clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + } else { + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(dev, + "Failed to prepare/enable clk, err=%d\n", + ret); + return ret; + } + } + + return ret; +} + +static void imx_disable_unprepare_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + + if (data->need_three_clks) { + clk_disable_unprepare(data->clk_per); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + } else { + clk_disable_unprepare(data->clk); + } +} static int ci_hdrc_imx_probe(struct platform_device *pdev) { @@ -137,23 +239,18 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + platform_set_drvdata(pdev, data); data->usbmisc_data = usbmisc_get_init_data(&pdev->dev); if (IS_ERR(data->usbmisc_data)) return PTR_ERR(data->usbmisc_data); - data->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(data->clk)) { - dev_err(&pdev->dev, - "Failed to get clock, err=%ld\n", PTR_ERR(data->clk)); - return PTR_ERR(data->clk); - } + ret = imx_get_clks(&pdev->dev); + if (ret) + return ret; - ret = clk_prepare_enable(data->clk); - if (ret) { - dev_err(&pdev->dev, - "Failed to prepare or enable clock, err=%d\n", ret); + ret = imx_prepare_enable_clks(&pdev->dev); + if (ret) return ret; - } data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { @@ -196,8 +293,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) goto disable_device; } - platform_set_drvdata(pdev, data); - if (data->supports_runtime_pm) { pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); @@ -210,7 +305,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); err_clk: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return ret; } @@ -224,7 +319,7 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); } ci_hdrc_remove_device(data->ci_pdev); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return 0; } @@ -236,7 +331,7 @@ static int imx_controller_suspend(struct device *dev) dev_dbg(dev, "at %s\n", __func__); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); data->in_lpm = true; return 0; @@ -254,7 +349,7 @@ static int imx_controller_resume(struct device *dev) return 0; } - ret = clk_prepare_enable(data->clk); + ret = imx_prepare_enable_clks(dev); if (ret) return ret; @@ -269,7 +364,7 @@ static int imx_controller_resume(struct device *dev) return 0; clk_disable: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); return ret; } From 3577eb6d02c3f5bf751f60f125e34c2197605c65 Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Mon, 9 Nov 2015 12:13:55 +0100 Subject: [PATCH 1408/2091] ALSA: usb: Add native DSD support for Aune X1S commit 16771c7c704769c5f3d70c024630b6e5b3eafa67 upstream. This patch adds native DSD support for the Aune X1S 32BIT/384 DSD DAC Signed-off-by: Jurgen Kramer Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 00ebc0ca008e0..528fa6e48293f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1271,6 +1271,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x000a): /* Gustard DAC-X20U */ case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */ case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */ + case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */ if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; From 81f136ae6cac5c2fd92136bd530cd69a42dbddfa Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 23 Aug 2015 15:01:08 +0200 Subject: [PATCH 1409/2091] usb: ehci-orion: fix probe for !GENERIC_PHY commit db1319e166c5e872c4be54eac4e47454133708cf upstream. Commit d445913ce0ab7f ("usb: ehci-orion: add optional PHY support") added support for optional phys, but devm_phy_optional_get returns -ENOSYS if GENERIC_PHY is not enabled. This causes probe failures, even when there are no phys specified: [ 1.443365] orion-ehci f1058000.usb: init f1058000.usb fail, -38 [ 1.449403] orion-ehci: probe of f1058000.usb failed with error -38 Similar to dwc3, treat -ENOSYS as no phy. Fixes: d445913ce0ab7f ("usb: ehci-orion: add optional PHY support") Signed-off-by: Jonas Gorski Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-orion.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c index bfcbb9aa8816b..ee8d5faa01947 100644 --- a/drivers/usb/host/ehci-orion.c +++ b/drivers/usb/host/ehci-orion.c @@ -224,7 +224,8 @@ static int ehci_orion_drv_probe(struct platform_device *pdev) priv->phy = devm_phy_optional_get(&pdev->dev, "usb"); if (IS_ERR(priv->phy)) { err = PTR_ERR(priv->phy); - goto err_phy_get; + if (err != -ENOSYS) + goto err_phy_get; } else { err = phy_init(priv->phy); if (err) From 058b97489fc05b9aa19456c83084104e6bfbfc72 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Nov 2015 10:27:00 +0100 Subject: [PATCH 1410/2091] usblp: do not set TASK_INTERRUPTIBLE before lock commit 19cd80a214821f4b558560ebd76bfb2c38b4f3d8 upstream. It is not permitted to set task state before lock. usblp_wwait sets the state to TASK_INTERRUPTIBLE and calls mutex_lock_interruptible. Upon return from that function, the state will be TASK_RUNNING again. This is clearly a bug and a warning is generated with LOCKDEP too: WARNING: CPU: 1 PID: 5109 at kernel/sched/core.c:7404 __might_sleep+0x7d/0x90() do not call blocking ops when !TASK_RUNNING; state=1 set at [] usblp_wwait+0xa0/0x310 [usblp] Modules linked in: ... CPU: 1 PID: 5109 Comm: captmon Tainted: G W 4.2.5-0.gef2823b-default #1 Hardware name: LENOVO 23252SG/23252SG, BIOS G2ET33WW (1.13 ) 07/24/2012 ffffffff81a4edce ffff880236ec7ba8 ffffffff81716651 0000000000000000 ffff880236ec7bf8 ffff880236ec7be8 ffffffff8106e146 0000000000000282 ffffffff81a50119 000000000000028b 0000000000000000 ffff8802dab7c508 Call Trace: ... [] warn_slowpath_fmt+0x46/0x50 [] __might_sleep+0x7d/0x90 [] mutex_lock_interruptible_nested+0x2f/0x4b0 [] usblp_wwait+0xcc/0x310 [usblp] [] usblp_write+0x72/0x350 [usblp] [] __vfs_write+0x28/0xf0 ... Commit 7f477358e2384c54b190cc3b6ce28277050a041b (usblp: Implement the ENOSPC convention) moved the set prior locking. So move it back after the lock. Signed-off-by: Jiri Slaby Fixes: 7f477358e2 ("usblp: Implement the ENOSPC convention") Acked-By: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 0924ee40a9661..b9adc2ec49ddb 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -869,11 +869,11 @@ static int usblp_wwait(struct usblp *usblp, int nonblock) add_wait_queue(&usblp->wwait, &waita); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); if (mutex_lock_interruptible(&usblp->mut)) { rc = -EINTR; break; } + set_current_state(TASK_INTERRUPTIBLE); rc = usblp_wtest(usblp, nonblock); mutex_unlock(&usblp->mut); if (rc <= 0) From 93414949d3eba5e70ee931c389508ce09cc56607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Tue, 3 Nov 2015 11:25:28 +0100 Subject: [PATCH 1411/2091] USB: qcserial: Add support for Quectel EC20 Mini PCIe module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9d5b5ed796d7afd7e8d2ac4b4fb77c6a49463f4b upstream. It seems like this device has same vendor and product IDs as G2K devices, but it has different number of interfaces(4 vs 5) and also different interface layout which makes it currently unusable: usbcore: registered new interface driver qcserial usbserial: USB Serial support registered for Qualcomm USB modem usb 2-1.2: unknown number of interfaces: 5 lsusb output: Bus 002 Device 003: ID 05c6:9215 Qualcomm, Inc. Acer Gobi 2000 Wireless Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x05c6 Qualcomm, Inc. idProduct 0x9215 Acer Gobi 2000 Wireless Modem bcdDevice 2.32 iManufacturer 1 Quectel iProduct 2 Quectel LTE Module iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 209 bNumInterfaces 5 bConfigurationValue 1 iConfiguration 0 bmAttributes 0xa0 (Bus Powered) Remote Wakeup MaxPower 500mA Signed-off-by: Petr Štetiar [johan: rename define and add comment ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index f49d262e926ba..1d7a2a48552c8 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -22,6 +22,8 @@ #define DRIVER_AUTHOR "Qualcomm Inc" #define DRIVER_DESC "Qualcomm USB Serial driver" +#define QUECTEL_EC20_PID 0x9215 + /* standard device layouts supported by this driver */ enum qcserial_layouts { QCSERIAL_G2K = 0, /* Gobi 2000 */ @@ -169,6 +171,38 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); +static int handle_quectel_ec20(struct device *dev, int ifnum) +{ + int altsetting = 0; + + /* + * Quectel EC20 Mini PCIe LTE module layout: + * 0: DM/DIAG (use libqcdm from ModemManager for communication) + * 1: NMEA + * 2: AT-capable modem port + * 3: Modem interface + * 4: NDIS + */ + switch (ifnum) { + case 0: + dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n"); + break; + case 1: + dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n"); + break; + case 2: + case 3: + dev_dbg(dev, "Quectel EC20 Modem port found\n"); + break; + case 4: + /* Don't claim the QMI/net interface */ + altsetting = -1; + break; + } + + return altsetting; +} + static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) { struct usb_host_interface *intf = serial->interface->cur_altsetting; @@ -237,6 +271,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) altsetting = -1; break; case QCSERIAL_G2K: + /* handle non-standard layouts */ + if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) { + altsetting = handle_quectel_ec20(dev, ifnum); + goto done; + } + /* * Gobi 2K+ USB layout: * 0: QMI/net From 794ffe19dbb74f207e593cb38b3f0c9db5cbb736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 16 Nov 2015 13:15:46 +0100 Subject: [PATCH 1412/2091] USB: qcserial: Fix support for HP lt4112 LTE/HSPA+ Gobi 4G Modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 59536da34513c594af2a6fd35ba65ea45b6960a1 upstream. The DEVICE_HWI type was added under the faulty assumption that Huawei devices based on Qualcomm chipsets and firmware use the static USB interface numbering known from Gobi devices. But this model does not apply to Huawei devices like the HP branded lt4112 (Huawei me906e). Huawei firmwares will dynamically assign interface numbers. Functions are renumbered when the firmware is reconfigured. Fix by changing the DEVICE_HWI type to use a simplified version of Huawei's subclass + protocol scheme: Blacklisting known network interface combinations and assuming the rest are serial. Reported-and-tested-by: Muri Nicanor Tested-by: Martin Hauke Fixes: e7181d005e84 ("USB: qcserial: Add support for HP lt4112 LTE/HSPA+ Gobi 4G Modem") Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 54 ++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 1d7a2a48552c8..514fa91cf74e4 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -212,6 +212,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) __u8 ifnum; int altsetting = -1; + /* we only support vendor specific functions */ + if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + goto done; + nintf = serial->dev->actconfig->desc.bNumInterfaces; dev_dbg(dev, "Num Interfaces = %d\n", nintf); ifnum = intf->desc.bInterfaceNumber; @@ -337,29 +341,39 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case QCSERIAL_HWI: /* - * Huawei layout: - * 0: AT-capable modem port - * 1: DM/DIAG - * 2: AT-capable modem port - * 3: CCID-compatible PCSC interface - * 4: QMI/net - * 5: NMEA + * Huawei devices map functions by subclass + protocol + * instead of interface numbers. The protocol identify + * a specific function, while the subclass indicate a + * specific firmware source + * + * This is a blacklist of functions known to be + * non-serial. The rest are assumed to be serial and + * will be handled by this driver */ - switch (ifnum) { - case 0: - case 2: - dev_dbg(dev, "Modem port found\n"); - break; - case 1: - dev_dbg(dev, "DM/DIAG interface found\n"); - break; - case 5: - dev_dbg(dev, "NMEA GPS interface found\n"); - break; - default: - /* don't claim any unsupported interface */ + switch (intf->desc.bInterfaceProtocol) { + /* QMI combined (qmi_wwan) */ + case 0x07: + case 0x37: + case 0x67: + /* QMI data (qmi_wwan) */ + case 0x08: + case 0x38: + case 0x68: + /* QMI control (qmi_wwan) */ + case 0x09: + case 0x39: + case 0x69: + /* NCM like (huawei_cdc_ncm) */ + case 0x16: + case 0x46: + case 0x76: altsetting = -1; break; + default: + dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n", + intf->desc.bInterfaceClass, + intf->desc.bInterfaceSubClass, + intf->desc.bInterfaceProtocol); } break; default: From 6418b4c85b6f2e1f8c1162f0ba2c30c74655c35c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 23 Oct 2015 09:53:50 +0200 Subject: [PATCH 1413/2091] usb: musb: core: fix order of arguments to ulpi write callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 705e63d2b29c8bbf091119084544d353bda70393 upstream. There is a bit of a mess in the order of arguments to the ulpi write callback. There is int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val) in drivers/usb/common/ulpi.c; struct usb_phy_io_ops { ... int (*write)(struct usb_phy *x, u32 val, u32 reg); } in include/linux/usb/phy.h. The callback registered by the musb driver has to comply to the latter, but up to now had "offset" first which effectively made the function broken for correct users. So flip the order and while at it also switch to the parameter names of struct usb_phy_io_ops's write. Fixes: ffb865b1e460 ("usb: musb: add ulpi access operations") Signed-off-by: Uwe Kleine-König Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 6dca3d794ced6..9f65d84773726 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -132,7 +132,7 @@ static inline struct musb *dev_to_musb(struct device *dev) /*-------------------------------------------------------------------------*/ #ifndef CONFIG_BLACKFIN -static int musb_ulpi_read(struct usb_phy *phy, u32 offset) +static int musb_ulpi_read(struct usb_phy *phy, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -151,7 +151,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) * ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM. */ - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR); @@ -176,7 +176,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) return ret; } -static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) +static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -191,8 +191,8 @@ static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) power &= ~MUSB_POWER_SUSPENDM; musb_writeb(addr, MUSB_POWER, power); - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); - musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); + musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ); while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL) From ea7f479eb3467ca2db0798a1daf3540493eca8a5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Nov 2015 16:49:30 +0000 Subject: [PATCH 1414/2091] USB: ti_usb_3410_5052: Add Honeywell HGI80 ID commit 1bcb49e663f88bccee35b8688e6a3da2bea31fd4 upstream. The Honeywell HGI80 is a wireless interface to the evohome connected thermostat. It uses a TI 3410 USB-serial port. Signed-off-by: David Woodhouse Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 2 ++ drivers/usb/serial/ti_usb_3410_5052.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e9da41d9fe7fc..2694df2f4559b 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -159,6 +159,7 @@ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; @@ -191,6 +192,7 @@ static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h index 4a2423e84d558..98f35c656c02d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ b/drivers/usb/serial/ti_usb_3410_5052.h @@ -56,6 +56,10 @@ #define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID #define ABBOTT_STRIP_PORT_ID 0x3420 +/* Honeywell vendor and product IDs */ +#define HONEYWELL_VENDOR_ID 0x10ac +#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ + /* Commands */ #define TI_GET_VERSION 0x01 #define TI_GET_PORT_STATUS 0x02 From 31fe65f261093ab3b6ded26a9b53645c3309beb7 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 11 Nov 2015 19:51:40 +0100 Subject: [PATCH 1415/2091] USB: serial: option: add support for Novatel MiFi USB620L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e07af133c3e2716db25e3e1e1d9f10c2088e9c1a upstream. Also known as Verizon U620L. The device is modeswitched from 1410:9020 to 1410:9022 by selecting the 4th USB configuration: $ sudo usb_modeswitch –v 0x1410 –p 0x9020 –u 4 This configuration provides a ECM interface as well as TTYs ('Enterprise Mode' according to the U620 Linux integration guide). Signed-off-by: Aleksander Morgado Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7c8eb4c4c1752..2ee44355e1903 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -162,6 +162,7 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 #define NOVATELWIRELESS_PRODUCT_E362 0x9010 #define NOVATELWIRELESS_PRODUCT_E371 0x9011 +#define NOVATELWIRELESS_PRODUCT_U620L 0x9022 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 @@ -1060,6 +1061,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, From 52066c02fc214cd83d0354b98fb12dbe2199095f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Nov 2015 21:12:33 +0100 Subject: [PATCH 1416/2091] USB: option: add XS Stick W100-2 from 4G Systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 638148e20c7f8f6e95017fdc13bce8549a6925e0 upstream. Thomas reports " 4gsystems sells two total different LTE-surfsticks under the same name. .. The newer version of XS Stick W100 is from "omega" .. Under windows the driver switches to the same ID, and uses MI03\6 for network and MI01\6 for modem. .. echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b01 Rev=02.32 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Now all important things are there: wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at) There is also ttyUSB0, but it is not usable, at least not for at. The device works well with qmi and ModemManager-NetworkManager. " Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2ee44355e1903..4021846139c93 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -358,6 +358,7 @@ static void option_instat_callback(struct urb *urb); /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick * * It seems to contain a Qualcomm QSC6240/6290 chipset */ #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 +#define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -523,6 +524,11 @@ static const struct option_blacklist_info four_g_w14_blacklist = { .sendsetup = BIT(0) | BIT(1), }; +static const struct option_blacklist_info four_g_w100_blacklist = { + .sendsetup = BIT(1) | BIT(2), + .reserved = BIT(3), +}; + static const struct option_blacklist_info alcatel_x200_blacklist = { .sendsetup = BIT(0) | BIT(1), .reserved = BIT(4), @@ -1655,6 +1661,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14), .driver_info = (kernel_ulong_t)&four_g_w14_blacklist }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), + .driver_info = (kernel_ulong_t)&four_g_w100_blacklist + }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, From 237508eb5ac73c95b51229d9bbcf152da9354156 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 15 Nov 2015 22:37:44 +0100 Subject: [PATCH 1417/2091] ALSA: usb-audio: add packet size quirk for the Medeli DD305 commit 98d362becb6621bebdda7ed0eac7ad7ec6c37898 upstream. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 417ebb11cf489..4bed82c70256b 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1341,6 +1341,7 @@ static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi, * Various chips declare a packet size larger than 4 bytes, but * do not actually work with larger packets: */ + case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */ case USB_ID(0x0a92, 0x1020): /* ESI M4U */ case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */ case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */ From 96c9346aee41d4fb14fa0ab6fdd81b187f496311 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 15 Nov 2015 22:38:29 +0100 Subject: [PATCH 1418/2091] ALSA: usb-audio: prevent CH345 multiport output SysEx corruption commit 1ca8b201309d842642f221db7f02f71c0af5be2d upstream. The CH345 USB MIDI chip has two output ports. However, they are multiplexed through one pin, and the number of ports cannot be reduced even for hardware that implements only one connector, so for those devices, data sent to either port ends up on the same hardware output. This becomes a problem when both ports are used at the same time, as longer MIDI commands (such as SysEx messages) are likely to be interrupted by messages from the other port, and thus to get lost. It would not be possible for the driver to detect how many ports the device actually has, except that in practice, _all_ devices built with the CH345 have only one port. So we can just ignore the device's descriptors, and hardcode one output port. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 3 +++ sound/usb/quirks-table.h | 11 +++++++++++ sound/usb/quirks.c | 1 + sound/usb/usbaudio.h | 1 + 4 files changed, 16 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 4bed82c70256b..5010aadb2534f 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -2374,6 +2374,9 @@ int snd_usbmidi_create(struct snd_card *card, if (err < 0) break; + err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); + break; + case QUIRK_MIDI_CH345: err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; default: diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index e4756651a52c8..ecc2a4ea014d2 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2820,6 +2820,17 @@ YAMAHA_DEVICE(0x7010, "UB99"), .idProduct = 0x1020, }, +/* QinHeng devices */ +{ + USB_DEVICE(0x1a86, 0x752d), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "QinHeng", + .product_name = "CH345", + .ifnum = 1, + .type = QUIRK_MIDI_CH345 + } +}, + /* KeithMcMillen Stringport */ { USB_DEVICE(0x1f38, 0x0001), diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 528fa6e48293f..eef9b8e4b9498 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -535,6 +535,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, [QUIRK_MIDI_CME] = create_any_midi_quirk, [QUIRK_MIDI_AKAI] = create_any_midi_quirk, [QUIRK_MIDI_FTDI] = create_any_midi_quirk, + [QUIRK_MIDI_CH345] = create_any_midi_quirk, [QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk, [QUIRK_AUDIO_FIXED_ENDPOINT] = create_fixed_stream_quirk, [QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk, diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 91d0380431b4f..991aa84491cd0 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -94,6 +94,7 @@ enum quirk_type { QUIRK_MIDI_AKAI, QUIRK_MIDI_US122L, QUIRK_MIDI_FTDI, + QUIRK_MIDI_CH345, QUIRK_AUDIO_STANDARD_INTERFACE, QUIRK_AUDIO_FIXED_ENDPOINT, QUIRK_AUDIO_EDIROL_UAXX, From 4676cd7863ab38502bd7b93854a8924446372d78 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 15 Nov 2015 22:39:08 +0100 Subject: [PATCH 1419/2091] ALSA: usb-audio: work around CH345 input SysEx corruption commit a91e627e3f0ed820b11d86cdc04df38f65f33a70 upstream. One of the many faults of the QinHeng CH345 USB MIDI interface chip is that it does not handle received SysEx messages correctly -- every second event packet has a wrong code index number, which is the one from the last seen message, instead of 4. For example, the two messages "FE F0 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E F7" result in the following event packets: correct: CH345: 0F FE 00 00 0F FE 00 00 04 F0 01 02 04 F0 01 02 04 03 04 05 0F 03 04 05 04 06 07 08 04 06 07 08 04 09 0A 0B 0F 09 0A 0B 04 0C 0D 0E 04 0C 0D 0E 05 F7 00 00 05 F7 00 00 A class-compliant driver must interpret an event packet with CIN 15 as having a single data byte, so the other two bytes would be ignored. The message received by the host would then be missing two bytes out of six; in this example, "F0 01 02 03 06 07 08 09 0C 0D 0E F7". These corrupted SysEx event packages contain only data bytes, while the CH345 uses event packets with a correct CIN value only for messages with a status byte, so it is possible to distinguish between these two cases by checking for the presence of this status byte. (Other bugs in the CH345's input handling, such as the corruption resulting from running status, cannot be worked around.) Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 5010aadb2534f..bec63e0d26056 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -174,6 +174,8 @@ struct snd_usb_midi_in_endpoint { u8 running_status_length; } ports[0x10]; u8 seen_f5; + bool in_sysex; + u8 last_cin; u8 error_resubmit; int current_port; }; @@ -467,6 +469,39 @@ static void snd_usbmidi_maudio_broken_running_status_input( } } +/* + * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4 + * but the previously seen CIN, but still with three data bytes. + */ +static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep, + uint8_t *buffer, int buffer_length) +{ + unsigned int i, cin, length; + + for (i = 0; i + 3 < buffer_length; i += 4) { + if (buffer[i] == 0 && i > 0) + break; + cin = buffer[i] & 0x0f; + if (ep->in_sysex && + cin == ep->last_cin && + (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0) + cin = 0x4; +#if 0 + if (buffer[i + 1] == 0x90) { + /* + * Either a corrupted running status or a real note-on + * message; impossible to detect reliably. + */ + } +#endif + length = snd_usbmidi_cin_length[cin]; + snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length); + ep->in_sysex = cin == 0x4; + if (!ep->in_sysex) + ep->last_cin = cin; + } +} + /* * CME protocol: like the standard protocol, but SysEx commands are sent as a * single USB packet preceded by a 0x0F byte. @@ -660,6 +695,12 @@ static struct usb_protocol_ops snd_usbmidi_cme_ops = { .output_packet = snd_usbmidi_output_standard_packet, }; +static struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = { + .input = ch345_broken_sysex_input, + .output = snd_usbmidi_standard_output, + .output_packet = snd_usbmidi_output_standard_packet, +}; + /* * AKAI MPD16 protocol: * @@ -2377,6 +2418,7 @@ int snd_usbmidi_create(struct snd_card *card, err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_CH345: + umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; default: From 8d0fe5721d2725e58429a9b54149a20f9f590451 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 08:52:31 -0500 Subject: [PATCH 1420/2091] tty: audit: Fix audit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 upstream. The data to audit/record is in the 'from' buffer (ie., the input read buffer). Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode") Cc: Miloslav Trmač Signed-off-by: Peter Hurley Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 2 +- drivers/tty/tty_audit.c | 2 +- include/linux/tty.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 6b6c6606af5f9..e5edf45e9d4cd 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty, { struct n_tty_data *ldata = tty->disc_data; - tty_audit_add_data(tty, to, n, ldata->icanon); + tty_audit_add_data(tty, from, n, ldata->icanon); return copy_to_user(to, from, n); } diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 90ca082935f63..3d245cd3d8e62 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty, * * Audit @data of @size from @tty, if necessary. */ -void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon) { struct tty_audit_buf *buf; diff --git a/include/linux/tty.h b/include/linux/tty.h index d76631f615c22..9580c09afdbe6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -605,7 +605,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -613,8 +613,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) From ff4fcbdf43bc4ae760ef1e7782e233156ed90a75 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 11 Nov 2015 08:03:54 -0500 Subject: [PATCH 1421/2091] tty: Fix tty_send_xchar() lock order inversion commit ee0c1a65cf95230d5eb3d9de94fd2ead9a428c67 upstream. The correct lock order is atomic_write_lock => termios_rwsem, as established by tty_write() => n_tty_write(). Fixes: c274f6ef1c666 ("tty: Hold termios_rwsem for tcflow(TCIxxx)") Reported-and-Tested-by: Dmitry Vyukov Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 4 ++++ drivers/tty/tty_ioctl.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 21837f14a403a..5a5c1ab5a3756 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1287,18 +1287,22 @@ int tty_send_xchar(struct tty_struct *tty, char ch) int was_stopped = tty->stopped; if (tty->ops->send_xchar) { + down_read(&tty->termios_rwsem); tty->ops->send_xchar(tty, ch); + up_read(&tty->termios_rwsem); return 0; } if (tty_write_lock(tty, 0) < 0) return -ERESTARTSYS; + down_read(&tty->termios_rwsem); if (was_stopped) start_tty(tty); tty->ops->write(tty, &ch, 1); if (was_stopped) stop_tty(tty); + up_read(&tty->termios_rwsem); tty_write_unlock(tty); return 0; } diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 8e53fe4696647..7bbf86b94716a 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -1144,16 +1144,12 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, spin_unlock_irq(&tty->flow_lock); break; case TCIOFF: - down_read(&tty->termios_rwsem); if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); - up_read(&tty->termios_rwsem); break; case TCION: - down_read(&tty->termios_rwsem); if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); - up_read(&tty->termios_rwsem); break; default: return -EINVAL; From 84390caa6331908d76aa63b8eda0e69f557dda06 Mon Sep 17 00:00:00 2001 From: Rajmohan Mani Date: Wed, 18 Nov 2015 10:48:20 +0200 Subject: [PATCH 1422/2091] xhci: Workaround to get Intel xHCI reset working more reliably commit a5964396190d0c40dd549c23848c282fffa5d1f2 upstream. Existing Intel xHCI controllers require a delay of 1 mS, after setting the CMD_RESET bit in command register, before accessing any HC registers. This allows the HC to complete the reset operation and be ready for HC register access. Without this delay, the subsequent HC register access, may result in a system hang, very rarely. Verified CherryView / Braswell platforms go through over 5000 warm reboot cycles (which was not possible without this patch), without any xHCI reset hang. Signed-off-by: Rajmohan Mani Tested-by: Joe Lawrence Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 26f62b2b33f84..1e6d7579709e7 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -175,6 +175,16 @@ int xhci_reset(struct xhci_hcd *xhci) command |= CMD_RESET; writel(command, &xhci->op_regs->command); + /* Existing Intel xHCI controllers require a delay of 1 mS, + * after setting the CMD_RESET bit, and before accessing any + * HC registers. This allows the HC to complete the + * reset operation and be ready for HC register access. + * Without this delay, the subsequent HC register access, + * may result in a system hang very rarely. + */ + if (xhci->quirks & XHCI_INTEL_HOST) + udelay(1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, 10 * 1000 * 1000); if (ret) From 5051ab9f8dbc131c01acc79c0a8b010e0651fbbe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 27 Sep 2015 16:45:01 -0400 Subject: [PATCH 1423/2091] staging/lustre: use jiffies for lp_last_query times commit 9f088dba3cc267ea11ec0da318cd0175575b5f9b upstream. The recently introduced lnet_peer_set_alive() function uses get_seconds() to read the current time into a shared variable, but all other uses of that variable compare it to jiffies values. This changes the current use to jiffies as well for consistency. Signed-off-by: Arnd Bergmann Fixes: af3fa7c71bf ("staging/lustre/lnet: peer aliveness status and NI status") Cc: Liang Zhen Cc: James Simmons Cc: Isaac Huang Signed-off-by: Oleg Drokin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/include/linux/lnet/lib-lnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h index 0038d29a37fe1..a470e32c49c1a 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -874,7 +874,7 @@ void lnet_debug_peer(lnet_nid_t nid); static inline void lnet_peer_set_alive(lnet_peer_t *lp) { - lp->lp_last_alive = lp->lp_last_query = get_seconds(); + lp->lp_last_alive = lp->lp_last_query = jiffies; if (!lp->lp_alive) lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); } From 3f11689e161c77fa0bcda8a5aa3b4829c7da7fab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 6 Nov 2015 12:08:48 +0100 Subject: [PATCH 1424/2091] KVM: s390: enable SIMD only when no VCPUs were created commit 5967c17b118a2bd1dd1d554cc4eee16233e52bec upstream. We should never allow to enable/disable any facilities for the guest when other VCPUs were already created. kvm_arch_vcpu_(load|put) relies on SIMD not changing during runtime. If somebody would create and run VCPUs and then decides to enable SIMD, undefined behaviour could be possible (e.g. vector save area not being set up). Acked-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b1aa06b3fe7f4..c3805cf4b9823 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -283,12 +283,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - if (MACHINE_HAS_VX) { + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac->mask, 129); set_kvm_facility(kvm->arch.model.fac->list, 129); r = 0; } else r = -EINVAL; + mutex_unlock(&kvm->lock); break; case KVM_CAP_S390_USER_STSI: kvm->arch.user_stsi = 1; From a52ec6de6d1638e8c203d7188c55627f75371612 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 Dec 2015 14:13:19 +0800 Subject: [PATCH 1425/2091] netlink: Add missing goto statement to netlink_insert The backport of 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink: Fix autobind race condition that leads to zero port ID") missed a goto statement, which causes netlink to break subtly. This was discovered by Stefan Priebe . Fixes: 4e2776241766 ("netlink: Fix autobind race condition that...") Reported-by: Stefan Priebe Reported-by: Philipp Hahn Signed-off-by: Herbert Xu Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d139c43ac6e50..0d6038c87bef7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1118,6 +1118,7 @@ static int netlink_insert(struct sock *sk, u32 portid) if (err == -EEXIST) err = -EADDRINUSE; sock_put(sk); + goto err; } /* We need to ensure that the socket is hashed and visible. */ From cb371265c2f1a0dd0cee03bd7fff413d671c53f0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Dec 2015 14:03:42 -0500 Subject: [PATCH 1426/2091] Linux 4.1.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d5d229db61d56..091280d664527 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 13 +SUBLEVEL = 14 EXTRAVERSION = NAME = Series 4800 From 5c77e26862ce604edea05b3442ed765e9756fe0f Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Fri, 20 Nov 2015 22:07:23 +0000 Subject: [PATCH 1427/2091] unix: avoid use-after-free in ep_remove_wait_queue [ Upstream commit 7d267278a9ece963d77eefec61630223fce08c6c ] Rainer Weikusat writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 183 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 165 insertions(+), 19 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index dfe4ddfbb43c5..e830c3dff61a1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -63,6 +63,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; + wait_queue_t peer_wake; }; static inline struct unix_sock *unix_sk(struct sock *sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 76e66695621cc..1975fd8d1c100 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -316,6 +316,118 @@ static struct sock *unix_find_socket_byinode(struct inode *i) return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static inline int unix_writable(struct sock *sk) { return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; @@ -420,6 +532,8 @@ static void unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -648,6 +762,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1015,6 +1130,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1453,6 +1570,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; int max_level; int data_len = 0; + int sk_locked; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1532,12 +1650,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1545,10 +1665,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1574,21 +1698,38 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { - err = -EAGAIN; - goto out_unlock; + if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; } - timeo = unix_wait_for_peer(other, timeo); + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { + err = -EAGAIN; + sk_locked = 1; + goto out_unlock; + } - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1602,6 +1743,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -2245,14 +2388,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable) From 393914a470e4c90527b09e220c7d355c617c3aa3 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Wed, 11 Nov 2015 14:24:27 -0800 Subject: [PATCH 1428/2091] tools/net: Use include/uapi with __EXPORTED_HEADERS__ [ Upstream commit d7475de58575c904818efa369c82e88c6648ce2e ] Use the local uapi headers to keep in sync with "recently" added #define's (e.g. SKF_AD_VLAN_TPID). Refactored CFLAGS, and bpf_asm doesn't need -I. Fixes: 3f356385e8a4 ("filter: bpf_asm: add minimal bpf asm tool") Signed-off-by: Kamal Mostafa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/net/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/net/Makefile b/tools/net/Makefile index ee577ea03ba50..ddf8880106524 100644 --- a/tools/net/Makefile +++ b/tools/net/Makefile @@ -4,6 +4,9 @@ CC = gcc LEX = flex YACC = bison +CFLAGS += -Wall -O2 +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include + %.yacc.c: %.y $(YACC) -o $@ -d $< @@ -12,15 +15,13 @@ YACC = bison all : bpf_jit_disasm bpf_dbg bpf_asm -bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' +bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl bpf_jit_disasm : bpf_jit_disasm.o -bpf_dbg : CFLAGS = -Wall -O2 bpf_dbg : LDLIBS = -lreadline bpf_dbg : bpf_dbg.o -bpf_asm : CFLAGS = -Wall -O2 -I. bpf_asm : LDLIBS = bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o bpf_exp.lex.o : bpf_exp.yacc.c From 01c0b875a4199a969a6c71807fceecc6d9c533e7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:40 +0100 Subject: [PATCH 1429/2091] packet: do skb_probe_transport_header when we actually have data [ Upstream commit efdfa2f7848f64517008136fb41f53c4a1faf93a ] In tpacket_fill_skb() commit c1aad275b029 ("packet: set transport header before doing xmit") and later on 40893fd0fd4e ("net: switch to use skb_probe_transport_header()") was probing for a transport header on the skb from a ring buffer slot, but at a time, where the skb has _not even_ been filled with data yet. So that call into the flow dissector is pretty useless. Lets do it after we've set up the skb frags. Fixes: c1aad275b029 ("packet: set transport header before doing xmit") Reported-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 686e601874010..16a8b37da7c81 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2151,8 +2151,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, 0); if (unlikely(po->tp_tx_has_off)) { int off_min, off_max, off; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -2232,6 +2230,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, len = ((to_write > len_max) ? len_max : to_write); } + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, 0); + return tp_len; } From 302f6f05fec37e4a75d668b23251ed366b408f25 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:41 +0100 Subject: [PATCH 1430/2091] packet: always probe for transport header [ Upstream commit 8fd6c80d9dd938ca338c70698533a7e304752846 ] We concluded that the skb_probe_transport_header() should better be called unconditionally. Avoiding the call into the flow dissector has also not really much to do with the direct xmit mode. While it seems that only virtio_net code makes use of GSO from non RX/TX ring packet socket paths, we should probe for a transport header nevertheless before they hit devices. Reference: http://thread.gmane.org/gmane.linux.network/386173/ Signed-off-by: Daniel Borkmann Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 16a8b37da7c81..313119c5d02ab 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2230,8 +2230,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, len = ((to_write > len_max) ? len_max : to_write); } - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, 0); + skb_probe_transport_header(skb, 0); return tp_len; } @@ -2583,8 +2582,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) len += vnet_hdr_len; } - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, reserve); + skb_probe_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) skb->no_fcs = 1; From c59fd2eaa2b6c98233d1d264153335c202c4163d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:42 +0100 Subject: [PATCH 1431/2091] packet: only allow extra vlan len on ethernet devices [ Upstream commit 3c70c132488794e2489ab045559b0ce0afcf17de ] Packet sockets can be used by various net devices and are not really restricted to ARPHRD_ETHER device types. However, when currently checking for the extra 4 bytes that can be transmitted in VLAN case, our assumption is that we generally probe on ARPHRD_ETHER devices. Therefore, before looking into Ethernet header, check the device type first. This also fixes the issue where non-ARPHRD_ETHER devices could have no dev->hard_header_len in TX_RING SOCK_RAW case, and thus the check would test unfilled linear part of the skb (instead of non-linear). Fixes: 57f89bfa2140 ("network: Allow af_packet to transmit +4 bytes for VLAN packets.") Fixes: 52f1454f629f ("packet: allow to transmit +4 byte in TX_RING slot for VLAN case") Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 60 ++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 313119c5d02ab..36ece6f721a0b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1524,6 +1524,20 @@ static void fanout_release(struct sock *sk) mutex_unlock(&fanout_mutex); } +static bool packet_extra_vlan_len_allowed(const struct net_device *dev, + struct sk_buff *skb) +{ + /* Earlier code assumed this would be a VLAN pkt, double-check + * this now that we have the actual packet in hand. We can only + * do this check on Ethernet devices. + */ + if (unlikely(dev->type != ARPHRD_ETHER)) + return false; + + skb_reset_mac_header(skb); + return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); +} + static const struct proto_ops packet_ops; static const struct proto_ops packet_ops_spkt; @@ -1685,18 +1699,10 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, goto retry; } - if (len > (dev->mtu + dev->hard_header_len + extra_len)) { - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ - struct ethhdr *ehdr; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) { - err = -EMSGSIZE; - goto out_unlock; - } + if (len > (dev->mtu + dev->hard_header_len + extra_len) && + !packet_extra_vlan_len_allowed(dev, skb)) { + err = -EMSGSIZE; + goto out_unlock; } skb->protocol = proto; @@ -2308,18 +2314,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); if (likely(tp_len >= 0) && - tp_len > dev->mtu + dev->hard_header_len) { - struct ethhdr *ehdr; - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ + tp_len > dev->mtu + dev->hard_header_len && + !packet_extra_vlan_len_allowed(dev, skb)) + tp_len = -EMSGSIZE; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) - tp_len = -EMSGSIZE; - } if (unlikely(tp_len < 0)) { if (po->tp_loss) { __packet_set_status(po, ph, @@ -2540,18 +2538,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (!gso_type && (len > dev->mtu + reserve + extra_len)) { - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ - struct ethhdr *ehdr; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) { - err = -EMSGSIZE; - goto out_free; - } + if (!gso_type && (len > dev->mtu + reserve + extra_len) && + !packet_extra_vlan_len_allowed(dev, skb)) { + err = -EMSGSIZE; + goto out_free; } skb->protocol = proto; From 292d3287bbd5913f6c8a8989899a54c6e6aee3ca Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:43 +0100 Subject: [PATCH 1432/2091] packet: infer protocol from ethernet header if unset [ Upstream commit c72219b75fde768efccf7666342282fab7f9e4e7 ] In case no struct sockaddr_ll has been passed to packet socket's sendmsg() when doing a TX_RING flush run, then skb->protocol is set to po->num instead, which is the protocol passed via socket(2)/bind(2). Applications only xmitting can go the path of allocating the socket as socket(PF_PACKET, , 0) and do a bind(2) on the TX_RING with sll_protocol of 0. That way, register_prot_hook() is neither called on creation nor on bind time, which saves cycles when there's no interest in capturing anyway. That leaves us however with po->num 0 instead and therefore the TX_RING flush run sets skb->protocol to 0 as well. Eric reported that this leads to problems when using tools like trafgen over bonding device. I.e. the bonding's hash function could invoke the kernel's flow dissector, which depends on skb->protocol being properly set. In the current situation, all the traffic is then directed to a single slave. Fix it up by inferring skb->protocol from the Ethernet header when not set and we have ARPHRD_ETHER device type. This is only done in case of SOCK_RAW and where we have a dev->hard_header_len length. In case of ARPHRD_ETHER devices, this is guaranteed to cover ETH_HLEN, and therefore being accessed on the skb after the skb_store_bits(). Reported-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 36ece6f721a0b..2d83124212191 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2121,6 +2121,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len) return false; } +static void tpacket_set_protocol(const struct net_device *dev, + struct sk_buff *skb) +{ + if (dev->type == ARPHRD_ETHER) { + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + } +} + static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) @@ -2202,6 +2211,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, dev->hard_header_len); if (unlikely(err)) return err; + if (!skb->protocol) + tpacket_set_protocol(dev, skb); data += dev->hard_header_len; to_write -= dev->hard_header_len; From b89d1e92c7844931b2b257f156c4eed10f4b15af Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:44 +0100 Subject: [PATCH 1433/2091] packet: fix tpacket_snd max frame len [ Upstream commit 5cfb4c8d05b4409c4044cb9c05b19705c1d9818b ] Since it's introduction in commit 69e3c75f4d54 ("net: TX_RING and packet mmap"), TX_RING could be used from SOCK_DGRAM and SOCK_RAW side. When used with SOCK_DGRAM only, the size_max > dev->mtu + reserve check should have reserve as 0, but currently, this is unconditionally set (in it's original form as dev->hard_header_len). I think this is not correct since tpacket_fill_skb() would then take dev->mtu and dev->hard_header_len into account for SOCK_DGRAM, the extra VLAN_HLEN could be possible in both cases. Presumably, the reserve code was copied from packet_snd(), but later on missed the check. Make it similar as we have it in packet_snd(). Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2d83124212191..ebc39e66d7049 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2293,12 +2293,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; - reserve = dev->hard_header_len + VLAN_HLEN; + if (po->sk.sk_socket->type == SOCK_RAW) + reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if (size_max > dev->mtu + reserve) - size_max = dev->mtu + reserve; + if (size_max > dev->mtu + reserve + VLAN_HLEN) + size_max = dev->mtu + reserve + VLAN_HLEN; do { ph = packet_current_frame(po, &po->tx_ring, @@ -2325,7 +2326,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); if (likely(tp_len >= 0) && - tp_len > dev->mtu + dev->hard_header_len && + tp_len > dev->mtu + reserve && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; From 32c3ff4e847888df0b54f8ccfb864d01108be38c Mon Sep 17 00:00:00 2001 From: lucien Date: Thu, 12 Nov 2015 13:07:07 +0800 Subject: [PATCH 1434/2091] sctp: translate host order to network order when setting a hmacid [ Upstream commit ed5a377d87dc4c87fb3e1f7f698cba38cd893103 ] now sctp auth cannot work well when setting a hmacid manually, which is caused by that we didn't use the network order for hmacid, so fix it by adding the transformation in sctp_auth_ep_set_hmacs. even we set hmacid with the network order in userspace, it still can't work, because of this condition in sctp_auth_ep_set_hmacs(): if (id > SCTP_AUTH_HMAC_ID_MAX) return -EOPNOTSUPP; so this wasn't working before and thus it won't break compatibility. Fixes: 65b07e5d0d09 ("[SCTP]: API updates to suport SCTP-AUTH extensions.") Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 4f15b7d730e13..1543e39f47c33 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, if (!has_sha1) return -EINVAL; - memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], - hmacs->shmac_num_idents * sizeof(__u16)); + for (i = 0; i < hmacs->shmac_num_idents; i++) + ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + hmacs->shmac_num_idents * sizeof(__u16)); return 0; From 6cb5485ed1ddeedb9c56af19ebcf69b484c36ca9 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 12 Nov 2015 17:35:58 +0100 Subject: [PATCH 1435/2091] ip_tunnel: disable preemption when updating per-cpu tstats [ Upstream commit b4fe85f9c9146f60457e9512fb6055e69e6a7a65 ] Drivers like vxlan use the recently introduced udp_tunnel_xmit_skb/udp_tunnel6_xmit_skb APIs. udp_tunnel6_xmit_skb makes use of ip6tunnel_xmit, and ip6tunnel_xmit, after sending the packet, updates the struct stats using the usual u64_stats_update_begin/end calls on this_cpu_ptr(dev->tstats). udp_tunnel_xmit_skb makes use of iptunnel_xmit, which doesn't touch tstats, so drivers like vxlan, immediately after, call iptunnel_xmit_stats, which does the same thing - calls u64_stats_update_begin/end on this_cpu_ptr(dev->tstats). While vxlan is probably fine (I don't know?), calling a similar function from, say, an unbound workqueue, on a fully preemptable kernel causes real issues: [ 188.434537] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u8:0/6 [ 188.435579] caller is debug_smp_processor_id+0x17/0x20 [ 188.435583] CPU: 0 PID: 6 Comm: kworker/u8:0 Not tainted 4.2.6 #2 [ 188.435607] Call Trace: [ 188.435611] [] dump_stack+0x4f/0x7b [ 188.435615] [] check_preemption_disabled+0x19d/0x1c0 [ 188.435619] [] debug_smp_processor_id+0x17/0x20 The solution would be to protect the whole this_cpu_ptr(dev->tstats)/u64_stats_update_begin/end blocks with disabling preemption and then reenabling it. Signed-off-by: Jason A. Donenfeld Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_tunnel.h | 3 ++- include/net/ip_tunnels.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index b8529aa1dae7a..b0f7445c0fdc7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -83,11 +83,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else { stats->tx_errors++; stats->tx_aborted_errors++; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index d8214cb88bbcf..9c2897e56ee1a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -207,12 +207,13 @@ static inline void iptunnel_xmit_stats(int err, struct pcpu_sw_netstats __percpu *stats) { if (err > 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += err; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else if (err < 0) { err_stats->tx_errors++; err_stats->tx_aborted_errors++; From 97546faaba70ac9323e088ca57644d2a2179cdd6 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 16 Nov 2015 13:09:10 -0500 Subject: [PATCH 1436/2091] snmp: Remove duplicate OUTMCAST stat increment [ Upstream commit 41033f029e393a64e81966cbe34d66c6cf8a2e7e ] the OUTMCAST stat is double incremented, getting bumped once in the mcast code itself, and again in the common ip output path. Remove the mcast bump, as its not needed Validated by the reporter, with good results Signed-off-by: Neil Horman Reported-by: Claus Jensen CC: Claus Jensen CC: David Miller Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 083b2927fc67a..41e3b5ee8d0b6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1651,7 +1651,6 @@ static void mld_sendpack(struct sk_buff *skb) if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); } else { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); } @@ -2014,7 +2013,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len); } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); From 36c1943d828ebd990ea0b870b0a10bba85b7f26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Nov 2015 21:13:07 +0100 Subject: [PATCH 1437/2091] net: qmi_wwan: add XS Stick W100-2 from 4G Systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 68242a5a1e2edce39b069385cbafb82304eac0f1 ] Thomas reports " 4gsystems sells two total different LTE-surfsticks under the same name. .. The newer version of XS Stick W100 is from "omega" .. Under windows the driver switches to the same ID, and uses MI03\6 for network and MI01\6 for modem. .. echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b01 Rev=02.32 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Now all important things are there: wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at) There is also ttyUSB0, but it is not usable, at least not for at. The device works well with qmi and ModemManager-NetworkManager. " Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 4e0470d396a38..71190dc1eacfe 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -774,6 +774,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ + {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ From ec8769e1a662e06421d61b4dc52fe5235b7e4f99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 12:40:13 -0800 Subject: [PATCH 1438/2091] tcp: md5: fix lockdep annotation [ Upstream commit 1b8e6a01e19f001e9f93b39c32387961c91ed3cc ] When a passive TCP is created, we eventually call tcp_md5_do_add() with sk pointing to the child. It is not owner by the user yet (we will add this socket into listener accept queue a bit later anyway) But we do own the spinlock, so amend the lockdep annotation to avoid following splat : [ 8451.090932] net/ipv4/tcp_ipv4.c:923 suspicious rcu_dereference_protected() usage! [ 8451.090932] [ 8451.090932] other info that might help us debug this: [ 8451.090932] [ 8451.090934] [ 8451.090934] rcu_scheduler_active = 1, debug_locks = 1 [ 8451.090936] 3 locks held by socket_sockopt_/214795: [ 8451.090936] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x151/0xe90 [ 8451.090947] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x43/0x2b0 [ 8451.090952] #2: (slock-AF_INET){+.-...}, at: [] sk_clone_lock+0x1c5/0x500 [ 8451.090958] [ 8451.090958] stack backtrace: [ 8451.090960] CPU: 7 PID: 214795 Comm: socket_sockopt_ [ 8451.091215] Call Trace: [ 8451.091216] [] dump_stack+0x55/0x76 [ 8451.091229] [] lockdep_rcu_suspicious+0xeb/0x110 [ 8451.091235] [] tcp_md5_do_add+0x1bf/0x1e0 [ 8451.091239] [] tcp_v4_syn_recv_sock+0x1f1/0x4c0 [ 8451.091242] [] ? tcp_v4_md5_hash_skb+0x167/0x190 [ 8451.091246] [] tcp_check_req+0x3c8/0x500 [ 8451.091249] [] ? tcp_v4_inbound_md5_hash+0x11e/0x190 [ 8451.091253] [] tcp_v4_rcv+0x3c0/0x9f0 [ 8451.091256] [] ? ip_local_deliver_finish+0x43/0x2b0 [ 8451.091260] [] ip_local_deliver_finish+0xb6/0x2b0 [ 8451.091263] [] ? ip_local_deliver_finish+0x43/0x2b0 [ 8451.091267] [] ip_local_deliver+0x48/0x80 [ 8451.091270] [] ip_rcv_finish+0x160/0x700 [ 8451.091273] [] ip_rcv+0x29e/0x3d0 [ 8451.091277] [] __netif_receive_skb_core+0xb47/0xe90 Fixes: a8afca0329988 ("tcp: md5: protects md5sig_info with RCU") Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 441ca6f389815..88203e755af87 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -922,7 +922,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) From 039abed64e076c4be1306febf3a723ecf096cc47 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Nov 2015 18:17:30 -0800 Subject: [PATCH 1439/2091] tcp: disable Fast Open on timeouts after handshake [ Upstream commit 0e45f4da5981895e885dd72fe912a3f8e32bae73 ] Some middle-boxes black-hole the data after the Fast Open handshake (https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf). The exact reason is unknown. The work-around is to disable Fast Open temporarily after multiple recurring timeouts with few or no data delivered in the established state. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Reported-by: Christoph Paasch Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_timer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8c65dc147d8bc..c8f97858d6f68 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk) syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + /* Some middle-boxes may black-hole Fast Open _after_ + * the handshake. Therefore we conservatively disable + * Fast Open on this path on recurring timeouts with + * few or zero bytes acked after Fast Open. + */ + if (tp->syn_data_acked && + tp->bytes_acked <= tp->rx_opt.mss_clamp) { + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); + if (icsk->icsk_retransmits == sysctl_tcp_retries1) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } /* Black hole detection */ tcp_mtu_probing(icsk, sk); From ddb686ac700f589d138b39c5306d04299cb805a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 21:03:33 -0800 Subject: [PATCH 1440/2091] tcp: fix potential huge kmalloc() calls in TCP_REPAIR [ Upstream commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9 ] tcp_send_rcvq() is used for re-injecting data into tcp receive queue. Problems : - No check against size is performed, allowed user to fool kernel in attempting very large memory allocations, eventually triggering OOM when memory is fragmented. - In case of fault during the copy we do not return correct errno. Lets use alloc_skb_with_frags() to cook optimal skbs. Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c") Fixes: c0e88ff0f256 ("tcp: Repair socket queues") Signed-off-by: Eric Dumazet Cc: Pavel Emelyanov Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c9ab964189a01..7b1fc955b6b6a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4438,19 +4438,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_buff *skb; + int err = -ENOMEM; + int data_len = 0; bool fragstolen; if (size == 0) return 0; - skb = alloc_skb(size, sk->sk_allocation); + if (size > PAGE_SIZE) { + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); + + data_len = npages << PAGE_SHIFT; + size = data_len + (size & ~PAGE_MASK); + } + skb = alloc_skb_with_frags(size - data_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, + &err, sk->sk_allocation); if (!skb) goto err; + skb_put(skb, size - data_len); + skb->data_len = data_len; + skb->len = size; + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (memcpy_from_msg(skb_put(skb, size), msg, size)) + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (err) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; @@ -4466,7 +4481,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) err_free: kfree_skb(skb); err: - return -ENOMEM; + return err; + } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) From 967ade52834309a4bf19f2f16403407ba3e30041 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Nov 2015 08:18:14 -0800 Subject: [PATCH 1441/2091] tcp: initialize tp->copied_seq in case of cross SYN connection [ Upstream commit 142a2e7ece8d8ac0e818eb2c91f99ca894730e2a ] Dmitry provided a syzkaller (http://github.com/google/syzkaller) generated program that triggers the WARNING at net/ipv4/tcp.c:1729 in tcp_recvmsg() : WARN_ON(tp->copied_seq != tp->rcv_nxt && !(flags & (MSG_PEEK | MSG_TRUNC))); His program is specifically attempting a Cross SYN TCP exchange, that we support (for the pleasure of hackers ?), but it looks we lack proper tcp->copied_seq initialization. Thanks again Dmitry for your report and testings. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7b1fc955b6b6a..87463c8148962 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5638,6 +5638,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tp->copied_seq = tp->rcv_nxt; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is From 8f1c1d107722db69c6bf2536982b8d65d53d087f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Nov 2015 00:11:56 +0100 Subject: [PATCH 1442/2091] net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds [ Upstream commit 6900317f5eff0a7070c5936e5383f589e0de7a09 ] David and HacKurx reported a following/similar size overflow triggered in a grsecurity kernel, thanks to PaX's gcc size overflow plugin: (Already fixed in later grsecurity versions by Brad and PaX Team.) [ 1002.296137] PAX: size overflow detected in function scm_detach_fds net/core/scm.c:314 cicus.202_127 min, count: 4, decl: msg_controllen; num: 0; context: msghdr; [ 1002.296145] CPU: 0 PID: 3685 Comm: scm_rights_recv Not tainted 4.2.3-grsec+ #7 [ 1002.296149] Hardware name: Apple Inc. MacBookAir5,1/Mac-66F35F19FE2A0D05, [...] [ 1002.296153] ffffffff81c27366 0000000000000000 ffffffff81c27375 ffffc90007843aa8 [ 1002.296162] ffffffff818129ba 0000000000000000 ffffffff81c27366 ffffc90007843ad8 [ 1002.296169] ffffffff8121f838 fffffffffffffffc fffffffffffffffc ffffc90007843e60 [ 1002.296176] Call Trace: [ 1002.296190] [] dump_stack+0x45/0x57 [ 1002.296200] [] report_size_overflow+0x38/0x60 [ 1002.296209] [] scm_detach_fds+0x2ce/0x300 [ 1002.296220] [] unix_stream_read_generic+0x609/0x930 [ 1002.296228] [] unix_stream_recvmsg+0x4f/0x60 [ 1002.296236] [] ? unix_set_peek_off+0x50/0x50 [ 1002.296243] [] sock_recvmsg+0x47/0x60 [ 1002.296248] [] ___sys_recvmsg+0xe2/0x1e0 [ 1002.296257] [] __sys_recvmsg+0x46/0x80 [ 1002.296263] [] SyS_recvmsg+0x2c/0x40 [ 1002.296271] [] entry_SYSCALL_64_fastpath+0x12/0x85 Further investigation showed that this can happen when an *odd* number of fds are being passed over AF_UNIX sockets. In these cases CMSG_LEN(i * sizeof(int)) and CMSG_SPACE(i * sizeof(int)), where i is the number of successfully passed fds, differ by 4 bytes due to the extra CMSG_ALIGN() padding in CMSG_SPACE() to an 8 byte boundary on 64 bit. The padding is used to align subsequent cmsg headers in the control buffer. When the control buffer passed in from the receiver side *lacks* these 4 bytes (e.g. due to buggy/wrong API usage), then msg->msg_controllen will overflow in scm_detach_fds(): int cmlen = CMSG_LEN(i * sizeof(int)); <--- cmlen w/o tail-padding err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i * sizeof(int)); <--- cmlen w/ 4 byte extra tail-padding msg->msg_control += cmlen; msg->msg_controllen -= cmlen; <--- iff no tail-padding space here ... } ... wrap-around F.e. it will wrap to a length of 18446744073709551612 bytes in case the receiver passed in msg->msg_controllen of 20 bytes, and the sender properly transferred 1 fd to the receiver, so that its CMSG_LEN results in 20 bytes and CMSG_SPACE in 24 bytes. In case of MSG_CMSG_COMPAT (scm_detach_fds_compat()), I haven't seen an issue in my tests as alignment seems always on 4 byte boundary. Same should be in case of native 32 bit, where we end up with 4 byte boundaries as well. In practice, passing msg->msg_controllen of 20 to recvmsg() while receiving a single fd would mean that on successful return, msg->msg_controllen is being set by the kernel to 24 bytes instead, thus more than the input buffer advertised. It could f.e. become an issue if such application later on zeroes or copies the control buffer based on the returned msg->msg_controllen elsewhere. Maximum number of fds we can send is a hard upper limit SCM_MAX_FD (253). Going over the code, it seems like msg->msg_controllen is not being read after scm_detach_fds() in scm_recv() anymore by the kernel, good! Relevant recvmsg() handler are unix_dgram_recvmsg() (unix_seqpacket_recvmsg()) and unix_stream_recvmsg(). Both return back to their recvmsg() caller, and ___sys_recvmsg() places the updated length, that is, new msg_control - old msg_control pointer into msg->msg_controllen (hence the 24 bytes seen in the example). Long time ago, Wei Yongjun fixed something related in commit 1ac70e7ad24a ("[NET]: Fix function put_cmsg() which may cause usr application memory overflow"). RFC3542, section 20.2. says: The fields shown as "XX" are possible padding, between the cmsghdr structure and the data, and between the data and the next cmsghdr structure, if required by the implementation. While sending an application may or may not include padding at the end of last ancillary data in msg_controllen and implementations must accept both as valid. On receiving a portable application must provide space for padding at the end of the last ancillary data as implementations may copy out the padding at the end of the control message buffer and include it in the received msg_controllen. When recvmsg() is called if msg_controllen is too small for all the ancillary data items including any trailing padding after the last item an implementation may set MSG_CTRUNC. Since we didn't place MSG_CTRUNC for already quite a long time, just do the same as in 1ac70e7ad24a to avoid an overflow. Btw, even man-page author got this wrong :/ See db939c9b26e9 ("cmsg.3: Fix error in SCM_RIGHTS code sample"). Some people must have copied this (?), thus it got triggered in the wild (reported several times during boot by David and HacKurx). No Fixes tag this time as pre 2002 (that is, pre history tree). Reported-by: David Sterba Reported-by: HacKurx Cc: PaX Team Cc: Emese Revfy Cc: Brad Spengler Cc: Wei Yongjun Cc: Eric Dumazet Reviewed-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/scm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/scm.c b/net/core/scm.c index 3b6899b7d810d..8a1741b14302b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i*sizeof(int)); + if (msg->msg_controllen < cmlen) + cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } From 3621421a35c3070ab86741f2e7ac8da635f79980 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Nov 2015 13:54:19 +0100 Subject: [PATCH 1443/2091] net: ipmr: fix static mfc/dev leaks on table destruction [ Upstream commit 0e615e9601a15efeeb8942cf7cd4dadba0c8c5a7 ] When destroying an mrt table the static mfc entries and the static devices are kept, which leads to devices that can never be destroyed (because of refcnt taken) and leaked memory, for example: unreferenced object 0xffff880034c144c0 (size 192): comm "mfc-broken", pid 4777, jiffies 4320349055 (age 46001.964s) hex dump (first 32 bytes): 98 53 f0 34 00 88 ff ff 98 53 f0 34 00 88 ff ff .S.4.....S.4.... ef 0a 0a 14 01 02 03 04 00 00 00 00 01 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0x190/0x300 [] ip_mroute_setsockopt+0x5cb/0x910 [] do_ip_setsockopt.isra.11+0x105/0xff0 [] ip_setsockopt+0x30/0xa0 [] raw_setsockopt+0x33/0x90 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xc0 [] entry_SYSCALL_64_fastpath+0x16/0x7a [] 0xffffffffffffffff Make sure that everything is cleaned on netns destruction. Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index df28693f32e13..c3bfebd501ed1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); -static void mroute_clean_tables(struct mr_table *mrt); +static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -351,7 +351,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1209,7 +1209,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt) +static void mroute_clean_tables(struct mr_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1218,8 +1218,9 @@ static void mroute_clean_tables(struct mr_table *mrt) /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif_table[i].flags & VIFF_STATIC)) - vif_delete(mrt, i, 0, &list); + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) + continue; + vif_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); @@ -1227,7 +1228,7 @@ static void mroute_clean_tables(struct mr_table *mrt) for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); @@ -1262,7 +1263,7 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); } } rtnl_unlock(); From 0c3c060bbdaaced093f3a2ab43b30a8038fa1476 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Nov 2015 13:54:20 +0100 Subject: [PATCH 1444/2091] net: ip6mr: fix static mfc/dev leaks on table destruction [ Upstream commit 4c6980462f32b4f282c5d8e5f7ea8070e2937725 ] Similar to ipv4, when destroying an mrt table the static mfc entries and the static devices are kept, which leads to devices that can never be destroyed (because of refcnt taken) and leaked memory. Make sure that everything is cleaned up on netns destruction. Fixes: 8229efdaef1e ("netns: ip6mr: enable namespace support in ipv6 multicast forwarding code") CC: Benjamin Thery Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 5f36266b1f5ef..a7aef4b52d659 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, int cmd); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr6_table *mrt); +static void mroute_clean_tables(struct mr6_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -335,7 +335,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr6_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1543,7 +1543,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr6_table *mrt) +static void mroute_clean_tables(struct mr6_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1553,8 +1553,9 @@ static void mroute_clean_tables(struct mr6_table *mrt) * Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(mrt, i, &list); + if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + continue; + mif6_delete(mrt, i, &list); } unregister_netdevice_many(&list); @@ -1563,7 +1564,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) */ for (i = 0; i < MFC6_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; write_lock_bh(&mrt_lock); list_del(&c->list); @@ -1626,7 +1627,7 @@ int ip6mr_sk_done(struct sock *sk) net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); err = 0; break; } From ed454cb60aad2600fd466c11c2b71fef86c746c5 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 22 Nov 2015 01:08:54 +0200 Subject: [PATCH 1445/2091] broadcom: fix PHY_ID_BCM5481 entry in the id table [ Upstream commit 3c25a860d17b7378822f35d8c9141db9507e3beb ] Commit fcb26ec5b18d ("broadcom: move all PHY_ID's to header") updated broadcom_tbl to use PHY_IDs, but incorrectly replaced 0x0143bca0 with PHY_ID_BCM5482 (making a duplicate entry, and completely omitting the original). Fix that. Fixes: fcb26ec5b18d ("broadcom: move all PHY_ID's to header") Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/broadcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9c71295f2fefb..85e640440bd9b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -675,7 +675,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5461, 0xfffffff0 }, { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, - { PHY_ID_BCM5482, 0xfffffff0 }, + { PHY_ID_BCM5481, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, From c9cc12699e4ca4dd3f74ea7050668a48fbbc9212 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 24 Nov 2015 15:07:11 +0100 Subject: [PATCH 1446/2091] ipv6: distinguish frag queues by device for multicast and link-local packets [ Upstream commit 264640fc2c5f4f913db5c73fa3eb1ead2c45e9d7 ] If a fragmented multicast packet is received on an ethernet device which has an active macvlan on top of it, each fragment is duplicated and received both on the underlying device and the macvlan. If some fragments for macvlan are processed before the whole packet for the underlying device is reassembled, the "overlapping fragments" test in ip6_frag_queue() discards the whole fragment queue. To resolve this, add device ifindex to the search key and require it to match reassembling multicast packets and packets to link-local addresses. Note: similar patch has been already submitted by Yoshifuji Hideaki in http://patchwork.ozlabs.org/patch/220979/ but got lost and forgotten for some reason. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 1 + net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++-- net/ipv6/reassembly.c | 10 +++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eec8ad3c98432..ab8ebd319ca02 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -488,6 +488,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + int iif; u8 ecn; }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1bd..d235ed7f47ab5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static inline struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, - struct in6_addr *dst, u8 ecn) + struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.user = user; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; local_bh_disable(); @@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) fhdr = (struct frag_hdr *)skb_transport_header(clone); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce774..9d1f6a28b2841 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) return fq->id == arg->id && fq->user == arg->user && ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst); + ipv6_addr_equal(&fq->daddr, arg->dst) && + (arg->iif == fq->iif || + !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))); } EXPORT_SYMBOL(ip6_frag_match); @@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data) static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, - const struct in6_addr *dst, u8 ecn) + const struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; hash = inet6_hash_frag(id, src, dst); @@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq) { int ret; From 28492d307272fa3090f026d559e0d3ea8328b4dd Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 24 Nov 2015 17:13:21 -0500 Subject: [PATCH 1447/2091] RDS: fix race condition when sending a message on unbound socket [ Upstream commit 8c7188b23474cca017b3ef354c4a58456f68303a ] Sasha's found a NULL pointer dereference in the RDS connection code when sending a message to an apparently unbound socket. The problem is caused by the code checking if the socket is bound in rds_sendmsg(), which checks the rs_bound_addr field without taking a lock on the socket. This opens a race where rs_bound_addr is temporarily set but where the transport is not in rds_bind(), leading to a NULL pointer dereference when trying to dereference 'trans' in __rds_conn_create(). Vegard wrote a reproducer for this issue, so kindly ask him to share if you're interested. I cannot reproduce the NULL pointer dereference using Vegard's reproducer with this patch, whereas I could without. Complete earlier incomplete fix to CVE-2015-6937: 74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection") Cc: David S. Miller Cc: stable@vger.kernel.org Reviewed-by: Vegard Nossum Reviewed-by: Sasha Levin Acked-by: Santosh Shilimkar Signed-off-by: Quentin Casasnovas Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/connection.c | 6 ------ net/rds/send.c | 4 +++- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 9d66705f9d414..da6da57e5f36b 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -187,12 +187,6 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } } - if (trans == NULL) { - kmem_cache_free(rds_conn_slab, conn); - conn = ERR_PTR(-ENODEV); - goto out; - } - conn->c_trans = trans; ret = trans->conn_alloc(conn, gfp); diff --git a/net/rds/send.c b/net/rds/send.c index e9430f537f9c2..7b30c0f3180d1 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -986,11 +986,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) release_sock(sk); } - /* racing with another thread binding seems ok here */ + lock_sock(sk); if (daddr == 0 || rs->rs_bound_addr == 0) { + release_sock(sk); ret = -ENOTCONN; /* XXX not a great errno */ goto out; } + release_sock(sk); /* size of rm including all sgs */ ret = rds_rm_size(msg, payload_len); From 879e7d33bea095c7c3ed51fc19a58b6001e26797 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 30 Nov 2015 13:02:56 +0100 Subject: [PATCH 1448/2091] bpf, array: fix heap out-of-bounds access when updating elements [ Upstream commit fbca9d2d35c6ef1b323fae75cc9545005ba25097 ] During own review but also reported by Dmitry's syzkaller [1] it has been noticed that we trigger a heap out-of-bounds access on eBPF array maps when updating elements. This happens with each map whose map->value_size (specified during map creation time) is not multiple of 8 bytes. In array_map_alloc(), elem_size is round_up(attr->value_size, 8) and used to align array map slots for faster access. However, in function array_map_update_elem(), we update the element as ... memcpy(array->value + array->elem_size * index, value, array->elem_size); ... where we access 'value' out-of-bounds, since it was allocated from map_update_elem() from syscall side as kmalloc(map->value_size, GFP_USER) and later on copied through copy_from_user(value, uvalue, map->value_size). Thus, up to 7 bytes, we can access out-of-bounds. Same could happen from within an eBPF program, where in worst case we access beyond an eBPF program's designated stack. Since 1be7f75d1668 ("bpf: enable non-root eBPF programs") didn't hit an official release yet, it only affects priviledged users. In case of array_map_lookup_elem(), the verifier prevents eBPF programs from accessing beyond map->value_size through check_map_access(). Also from syscall side map_lookup_elem() only copies map->value_size back to user, so nothing could leak. [1] http://github.com/google/syzkaller Fixes: 28fbcfa08d8e ("bpf: add array type of eBPF maps") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/arraymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8a6616583f38a..1c1b8ab340373 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -109,7 +109,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_size * index, value, array->elem_size); + memcpy(array->value + array->elem_size * index, value, map->value_size); return 0; } From 81ed463384847813faa59e692285fe775da7375f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 19:37:57 -0800 Subject: [PATCH 1449/2091] ipv6: add complete rcu protection around np->opt [ Upstream commit 45f6fad84cc305103b28d73482b344d7f5b76f39 ] This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ipv6.h | 2 +- include/net/ipv6.h | 21 +++++++++++++++++++- net/dccp/ipv6.c | 33 ++++++++++++++++++++------------ net/ipv6/af_inet6.c | 13 +++++++++---- net/ipv6/datagram.c | 4 +++- net/ipv6/exthdrs.c | 3 ++- net/ipv6/inet6_connection_sock.c | 11 ++++++++--- net/ipv6/ipv6_sockglue.c | 33 +++++++++++++++++++++----------- net/ipv6/raw.c | 8 ++++++-- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 28 ++++++++++++++++----------- net/ipv6/udp.c | 8 ++++++-- net/l2tp/l2tp_ip6.c | 8 ++++++-- 13 files changed, 122 insertions(+), 52 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 82806c60aa427..e4b4649833227 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -224,7 +224,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ab8ebd319ca02..df555ecd40026 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -217,7 +218,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -250,6 +251,24 @@ struct ipv6_fl_socklist { struct rcu_head rcu; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5165571f397aa..a0490508d2135 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -202,7 +202,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -219,7 +221,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6.daddr = ireq->ir_v6_rmt_addr; - err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } @@ -415,6 +420,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -534,13 +540,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (np->opt != NULL) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -793,6 +801,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -892,7 +901,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -912,9 +922,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eef63b394c5ab..2d044d2a2ccff 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -425,9 +425,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -656,7 +658,10 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b10a88986a989..13ca4cf5616f8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,8 +167,10 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a7bbbe45570b2..adbd6958c3984 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char **)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 6927f3fb5597f..9beed302eb36f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_TCP; fl6->daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = ireq->ir_mark; @@ -207,7 +209,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { @@ -240,7 +244,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 63e6956917c9c..4449ad1f81147 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); sk_dst_reset(sk); return opt; @@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -486,6 +493,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -502,8 +510,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8072bd4139b75..2c639aee12cb6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -731,6 +731,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; @@ -837,8 +838,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -901,6 +904,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: dst_confirm(dst); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 21bc2eb53c57b..a4cf004f44d0b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -242,7 +242,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = ireq->ir_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e541d68dba8bd..cfb27f56c62fd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -121,6 +121,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; @@ -237,7 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -266,9 +268,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -464,7 +466,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); - err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), + np->tclass); err = net_xmit_eval(err); } @@ -994,6 +997,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1129,13 +1133,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ - if (np->opt) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_ca_openreq_child(newsk, dst); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51fc3eee6dbd..7333f3575fc54 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1107,6 +1107,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1260,8 +1261,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; connected = 0; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1370,6 +1373,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /* diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d1ded3777815e..0ce9da948ad7f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; @@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -631,6 +634,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; From e08ca7be16f178988ccf95f96edf6eff5d89bb0a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 1 Dec 2015 01:14:48 +0300 Subject: [PATCH 1450/2091] net/neighbour: fix crash at dumping device-agnostic proxy entries [ Upstream commit 6adc5fd6a142c6e2c80574c1db0c7c17dedaa42e ] Proxy entries could have null pointer to net-device. Signed-off-by: Konstantin Khlebnikov Fixes: 84920c1420e2 ("net: Allow ipv6 proxies and arp proxies be shown with iproute2") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2237c1b3cdd20..d6e8cfcb6f7cd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2207,7 +2207,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; - ndm->ndm_ifindex = pn->dev->ifindex; + ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) @@ -2282,7 +2282,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { - if (dev_net(n->dev) != net) + if (pneigh_net(n) != net) continue; if (idx < s_idx) goto next; From cbff65fb298f5b9d147eb39fd4fc540d57748f69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 07:20:07 -0800 Subject: [PATCH 1451/2091] ipv6: sctp: implement sctp_v6_destroy_sock() [ Upstream commit 602dd62dfbda3e63a2d6a3cbde953ebe82bf5087 ] Dmitry Vyukov reported a memory leak using IPV6 SCTP sockets. We need to call inet6_destroy_sock() to properly release inet6 specific fields. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5f6c4e61325b6..66d7960750509 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7387,6 +7387,13 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) +#include +static void sctp_v6_destroy_sock(struct sock *sk) +{ + sctp_destroy_sock(sk); + inet6_destroy_sock(sk); +} + struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, @@ -7396,7 +7403,7 @@ struct proto sctpv6_prot = { .accept = sctp_accept, .ioctl = sctp_ioctl, .init = sctp_init_sock, - .destroy = sctp_destroy_sock, + .destroy = sctp_v6_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, From 3d817ad351a182e94172ce0a2bbe4a168995ff53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 20:08:51 -0800 Subject: [PATCH 1452/2091] net_sched: fix qdisc_tree_decrease_qlen() races [ Upstream commit 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 ] qdisc_tree_decrease_qlen() suffers from two problems on multiqueue devices. One problem is that it updates sch->q.qlen and sch->qstats.drops on the mq/mqprio root qdisc, while it should not : Daniele reported underflows errors : [ 681.774821] PAX: sch->q.qlen: 0 n: 1 [ 681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head; [ 681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G O 4.2.6.201511282239-1-grsec #1 [ 681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015 [ 681.774956] ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c [ 681.774959] ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b [ 681.774960] ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001 [ 681.774962] Call Trace: [ 681.774967] [] dump_stack+0x4c/0x7f [ 681.774970] [] report_size_overflow+0x34/0x50 [ 681.774972] [] qdisc_tree_decrease_qlen+0x152/0x160 [ 681.774976] [] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel] [ 681.774978] [] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel] [ 681.774980] [] __qdisc_run+0x4d/0x1d0 [ 681.774983] [] net_tx_action+0xc2/0x160 [ 681.774985] [] __do_softirq+0xf1/0x200 [ 681.774987] [] run_ksoftirqd+0x1e/0x30 [ 681.774989] [] smpboot_thread_fn+0x150/0x260 [ 681.774991] [] ? sort_range+0x40/0x40 [ 681.774992] [] kthread+0xe4/0x100 [ 681.774994] [] ? kthread_worker_fn+0x170/0x170 [ 681.774995] [] ret_from_fork+0x3e/0x70 mq/mqprio have their own ways to report qlen/drops by folding stats on all their queues, with appropriate locking. A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup() without proper locking : concurrent qdisc updates could corrupt the list that qdisc_match_from_root() parses to find a qdisc given its handle. Fix first problem adding a TCQ_F_NOPARENT qdisc flag that qdisc_tree_decrease_qlen() can use to abort its tree traversal, as soon as it meets a mq/mqprio qdisc children. Second problem can be fixed by RCU protection. Qdisc are already freed after RCU grace period, so qdisc_list_add() and qdisc_list_del() simply have to use appropriate rcu list variants. A future patch will add a per struct netdev_queue list anchor, so that qdisc_tree_decrease_qlen() can have more efficient lookups. Reported-by: Daniele Fucini Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 3 +++ net/sched/sch_api.c | 27 ++++++++++++++++++--------- net/sched/sch_generic.c | 2 +- net/sched/sch_mq.c | 4 ++-- net/sched/sch_mqprio.c | 4 ++-- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6d778efcfdfd6..080b657ef8fb2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,6 +61,9 @@ struct Qdisc { */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : + * qdisc_tree_decrease_qlen() should stop. + */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1e1c89e51a118..d4b6f3682c14c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -253,7 +253,8 @@ int qdisc_set_default(const char *name) } /* We know handle. Find qdisc among all qdisc's attached to device - (root qdisc, all its children, children of children etc.) + * (root qdisc, all its children, children of children etc.) + * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) @@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry(q, &root->list, list) { + list_for_each_entry_rcu(q, &root->list, list) { if (q->handle == handle) return q; } @@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q) struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); - list_add_tail(&q->list, &root->list); + ASSERT_RTNL(); + list_add_tail_rcu(&q->list, &root->list); } } EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_del(&q->list); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + ASSERT_RTNL(); + list_del_rcu(&q->list); + } } EXPORT_SYMBOL(qdisc_list_del); @@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (n == 0) return; drops = max_t(int, n, 0); + rcu_read_lock(); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) - return; + break; + if (sch->flags & TCQ_F_NOPARENT) + break; + /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { - WARN_ON(parentid != TC_H_ROOT); - return; + WARN_ON_ONCE(parentid != TC_H_ROOT); + break; } cops = sch->ops->cl_ops; if (cops->qlen_notify) { @@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) sch->q.qlen -= n; __qdisc_qstats_drop(sch, drops); } + rcu_read_unlock(); } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); @@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); if (!netif_is_multiqueue(dev)) - sch->flags |= TCQ_F_ONETXQUEUE; + sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->handle = handle; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6efca30894aad..b453270be3fd4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -743,7 +743,7 @@ static void attach_one_default_qdisc(struct net_device *dev, return; } if (!netif_is_multiqueue(dev)) - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3cbaecd283af..3e82f047caaf4 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) if (qdisc == NULL) goto err; priv->qdiscs[ntx] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; @@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3811a745452cf..ad70ecf57ce79 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) goto err; } priv->qdiscs[i] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } /* If the mqprio options indicate that hardware should own @@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); From 83881c15e5cef225beec81a37e0906125e636d75 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 13 Oct 2015 15:15:00 +0100 Subject: [PATCH 1453/2091] Btrfs: fix file corruption and data loss after cloning inline extents commit 8039d87d9e473aeb740d4fdbd59b9d2f89b2ced9 upstream. Currently the clone ioctl allows to clone an inline extent from one file to another that already has other (non-inlined) extents. This is a problem because btrfs is not designed to deal with files having inline and regular extents, if a file has an inline extent then it must be the only extent in the file and must start at file offset 0. Having a file with an inline extent followed by regular extents results in EIO errors when doing reads or writes against the first 4K of the file. Also, the clone ioctl allows one to lose data if the source file consists of a single inline extent, with a size of N bytes, and the destination file consists of a single inline extent with a size of M bytes, where we have M > N. In this case the clone operation removes the inline extent from the destination file and then copies the inline extent from the source file into the destination file - we lose the M - N bytes from the destination file, a read operation will get the value 0x00 for any bytes in the the range [N, M] (the destination inode's i_size remained as M, that's why we can read past N bytes). So fix this by not allowing such destructive operations to happen and return errno EOPNOTSUPP to user space. Currently the fstest btrfs/035 tests the data loss case but it totally ignores this - i.e. expects the operation to succeed and does not check the we got data loss. The following test case for fstests exercises all these cases that result in file corruption and data loss: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner _require_btrfs_fs_feature "no_holes" _require_btrfs_mkfs_feature "no-holes" rm -f $seqres.full test_cloning_inline_extents() { local mkfs_opts=$1 local mount_opts=$2 _scratch_mkfs $mkfs_opts >>$seqres.full 2>&1 _scratch_mount $mount_opts # File bar, the source for all the following clone operations, consists # of a single inline extent (50 bytes). $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 50" $SCRATCH_MNT/bar \ | _filter_xfs_io # Test cloning into a file with an extent (non-inlined) where the # destination offset overlaps that extent. It should not be possible to # clone the inline extent from file bar into this file. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 16K" $SCRATCH_MNT/foo \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo # Doing IO against any range in the first 4K of the file should work. # Due to a past clone ioctl bug which allowed cloning the inline extent, # these operations resulted in EIO errors. echo "File foo data after clone operation:" # All bytes should have the value 0xaa (clone operation failed and did # not modify our file). od -t x1 $SCRATCH_MNT/foo $XFS_IO_PROG -c "pwrite -S 0xcc 0 100" $SCRATCH_MNT/foo | _filter_xfs_io # Test cloning the inline extent against a file which has a hole in its # first 4K followed by a non-inlined extent. It should not be possible # as well to clone the inline extent from file bar into this file. $XFS_IO_PROG -f -c "pwrite -S 0xdd 4K 12K" $SCRATCH_MNT/foo2 \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo2 # Doing IO against any range in the first 4K of the file should work. # Due to a past clone ioctl bug which allowed cloning the inline extent, # these operations resulted in EIO errors. echo "File foo2 data after clone operation:" # All bytes should have the value 0x00 (clone operation failed and did # not modify our file). od -t x1 $SCRATCH_MNT/foo2 $XFS_IO_PROG -c "pwrite -S 0xee 0 90" $SCRATCH_MNT/foo2 | _filter_xfs_io # Test cloning the inline extent against a file which has a size of zero # but has a prealloc extent. It should not be possible as well to clone # the inline extent from file bar into this file. $XFS_IO_PROG -f -c "falloc -k 0 1M" $SCRATCH_MNT/foo3 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo3 # Doing IO against any range in the first 4K of the file should work. # Due to a past clone ioctl bug which allowed cloning the inline extent, # these operations resulted in EIO errors. echo "First 50 bytes of foo3 after clone operation:" # Should not be able to read any bytes, file has 0 bytes i_size (the # clone operation failed and did not modify our file). od -t x1 $SCRATCH_MNT/foo3 $XFS_IO_PROG -c "pwrite -S 0xff 0 90" $SCRATCH_MNT/foo3 | _filter_xfs_io # Test cloning the inline extent against a file which consists of a # single inline extent that has a size not greater than the size of # bar's inline extent (40 < 50). # It should be possible to do the extent cloning from bar to this file. $XFS_IO_PROG -f -c "pwrite -S 0x01 0 40" $SCRATCH_MNT/foo4 \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo4 # Doing IO against any range in the first 4K of the file should work. echo "File foo4 data after clone operation:" # Must match file bar's content. od -t x1 $SCRATCH_MNT/foo4 $XFS_IO_PROG -c "pwrite -S 0x02 0 90" $SCRATCH_MNT/foo4 | _filter_xfs_io # Test cloning the inline extent against a file which consists of a # single inline extent that has a size greater than the size of bar's # inline extent (60 > 50). # It should not be possible to clone the inline extent from file bar # into this file. $XFS_IO_PROG -f -c "pwrite -S 0x03 0 60" $SCRATCH_MNT/foo5 \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo5 # Reading the file should not fail. echo "File foo5 data after clone operation:" # Must have a size of 60 bytes, with all bytes having a value of 0x03 # (the clone operation failed and did not modify our file). od -t x1 $SCRATCH_MNT/foo5 # Test cloning the inline extent against a file which has no extents but # has a size greater than bar's inline extent (16K > 50). # It should not be possible to clone the inline extent from file bar # into this file. $XFS_IO_PROG -f -c "truncate 16K" $SCRATCH_MNT/foo6 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo6 # Reading the file should not fail. echo "File foo6 data after clone operation:" # Must have a size of 16K, with all bytes having a value of 0x00 (the # clone operation failed and did not modify our file). od -t x1 $SCRATCH_MNT/foo6 # Test cloning the inline extent against a file which has no extents but # has a size not greater than bar's inline extent (30 < 50). # It should be possible to clone the inline extent from file bar into # this file. $XFS_IO_PROG -f -c "truncate 30" $SCRATCH_MNT/foo7 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo7 # Reading the file should not fail. echo "File foo7 data after clone operation:" # Must have a size of 50 bytes, with all bytes having a value of 0xbb. od -t x1 $SCRATCH_MNT/foo7 # Test cloning the inline extent against a file which has a size not # greater than the size of bar's inline extent (20 < 50) but has # a prealloc extent that goes beyond the file's size. It should not be # possible to clone the inline extent from bar into this file. $XFS_IO_PROG -f -c "falloc -k 0 1M" \ -c "pwrite -S 0x88 0 20" \ $SCRATCH_MNT/foo8 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo8 echo "File foo8 data after clone operation:" # Must have a size of 20 bytes, with all bytes having a value of 0x88 # (the clone operation did not modify our file). od -t x1 $SCRATCH_MNT/foo8 _scratch_unmount } echo -e "\nTesting without compression and without the no-holes feature...\n" test_cloning_inline_extents echo -e "\nTesting with compression and without the no-holes feature...\n" test_cloning_inline_extents "" "-o compress" echo -e "\nTesting without compression and with the no-holes feature...\n" test_cloning_inline_extents "-O no-holes" "" echo -e "\nTesting with compression and with the no-holes feature...\n" test_cloning_inline_extents "-O no-holes" "-o compress" status=0 exit Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 195 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 152 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8b2c82ce36b3a..87c720865ebf0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3166,6 +3166,150 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } +/* + * Make sure we do not end up inserting an inline extent into a file that has + * already other (non-inline) extents. If a file has an inline extent it can + * not have any other extents and the (single) inline extent must start at the + * file offset 0. Failing to respect these rules will lead to file corruption, + * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc + * + * We can have extents that have been already written to disk or we can have + * dirty ranges still in delalloc, in which case the extent maps and items are + * created only when we run delalloc, and the delalloc ranges might fall outside + * the range we are currently locking in the inode's io tree. So we check the + * inode's i_size because of that (i_size updates are done while holding the + * i_mutex, which we are holding here). + * We also check to see if the inode has a size not greater than "datal" but has + * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are + * protected against such concurrent fallocate calls by the i_mutex). + * + * If the file has no extents but a size greater than datal, do not allow the + * copy because we would need turn the inline extent into a non-inline one (even + * with NO_HOLES enabled). If we find our destination inode only has one inline + * extent, just overwrite it with the source inline extent if its size is less + * than the source extent's size, or we could copy the source inline extent's + * data into the destination inode's inline extent if the later is greater then + * the former. + */ +static int clone_copy_inline_extent(struct inode *src, + struct inode *dst, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_key *new_key, + const u64 drop_start, + const u64 datal, + const u64 skip, + const u64 size, + char *inline_data) +{ + struct btrfs_root *root = BTRFS_I(dst)->root; + const u64 aligned_end = ALIGN(new_key->offset + datal, + root->sectorsize); + int ret; + struct btrfs_key key; + + if (new_key->offset > 0) + return -EOPNOTSUPP; + + key.objectid = btrfs_ino(dst); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return ret; + } else if (ret > 0) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + goto copy_inline_extent; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) { + ASSERT(key.offset > 0); + return -EOPNOTSUPP; + } + } else if (i_size_read(dst) <= datal) { + struct btrfs_file_extent_item *ei; + u64 ext_len; + + /* + * If the file size is <= datal, make sure there are no other + * extents following (can happen do to an fallocate call with + * the flag FALLOC_FL_KEEP_SIZE). + */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + /* + * If it's an inline extent, it can not have other extents + * following it. + */ + if (btrfs_file_extent_type(path->nodes[0], ei) == + BTRFS_FILE_EXTENT_INLINE) + goto copy_inline_extent; + + ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); + if (ext_len > aligned_end) + return -EOPNOTSUPP; + + ret = btrfs_next_item(root, path); + if (ret < 0) { + return ret; + } else if (ret == 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) + return -EOPNOTSUPP; + } + } + +copy_inline_extent: + /* + * We have no extent items, or we have an extent at offset 0 which may + * or may not be inlined. All these cases are dealt the same way. + */ + if (i_size_read(dst) > datal) { + /* + * If the destination inode has an inline extent... + * This would require copying the data from the source inline + * extent into the beginning of the destination's inline extent. + * But this is really complex, both extents can be compressed + * or just one of them, which would require decompressing and + * re-compressing data (which could increase the new compressed + * size, not allowing the compressed data to fit anymore in an + * inline extent). + * So just don't support this case for now (it should be rare, + * we are not really saving space when cloning inline extents). + */ + return -EOPNOTSUPP; + } + + btrfs_release_path(path); + ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); + if (ret) + return ret; + ret = btrfs_insert_empty_item(trans, root, path, new_key, size); + if (ret) + return ret; + + if (skip) { + const u32 start = btrfs_file_extent_calc_inline_size(0); + + memmove(inline_data + start, inline_data + start + skip, datal); + } + + write_extent_buffer(path->nodes[0], inline_data, + btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]), + size); + inode_add_bytes(dst, datal); + + return 0; +} + /** * btrfs_clone() - clone a range from inode file to another * @@ -3432,21 +3576,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; - u64 aligned_end = 0; - - /* - * Don't copy an inline extent into an offset - * greater than zero. Having an inline extent - * at such an offset results in chaos as btrfs - * isn't prepared for such cases. Just skip - * this case for the same reasons as commented - * at btrfs_ioctl_clone(). - */ - if (last_dest_end > 0) { - ret = -EOPNOTSUPP; - btrfs_end_transaction(trans, root); - goto out; - } if (off > key.offset) { skip = off - key.offset; @@ -3464,42 +3593,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode, size -= skip + trim; datal -= skip + trim; - aligned_end = ALIGN(new_key.offset + datal, - root->sectorsize); - ret = btrfs_drop_extents(trans, root, inode, - drop_start, - aligned_end, - 1); + ret = clone_copy_inline_extent(src, inode, + trans, path, + &new_key, + drop_start, + datal, + skip, size, buf); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, ret); - btrfs_end_transaction(trans, root); - goto out; - } - - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); + root, + ret); btrfs_end_transaction(trans, root); goto out; } - - if (skip) { - u32 start = - btrfs_file_extent_calc_inline_size(0); - memmove(buf+start, buf+start+skip, - datal); - } - leaf = path->nodes[0]; slot = path->slots[0]; - write_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */ From f1008f6d21ec52d533f7473e2e46218408fb4580 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 16 Oct 2015 12:34:25 +0100 Subject: [PATCH 1454/2091] Btrfs: fix truncation of compressed and inlined extents commit 0305cd5f7fca85dae392b9ba85b116896eb7c1c7 upstream. When truncating a file to a smaller size which consists of an inline extent that is compressed, we did not discard (or made unusable) the data between the new file size and the old file size, wasting metadata space and allowing for the truncated data to be leaked and the data corruption/loss mentioned below. We were also not correctly decrementing the number of bytes used by the inode, we were setting it to zero, giving a wrong report for callers of the stat(2) syscall. The fsck tool also reported an error about a mismatch between the nbytes of the file versus the real space used by the file. Now because we weren't discarding the truncated region of the file, it was possible for a caller of the clone ioctl to actually read the data that was truncated, allowing for a security breach without requiring root access to the system, using only standard filesystem operations. The scenario is the following: 1) User A creates a file which consists of an inline and compressed extent with a size of 2000 bytes - the file is not accessible to any other users (no read, write or execution permission for anyone else); 2) The user truncates the file to a size of 1000 bytes; 3) User A makes the file world readable; 4) User B creates a file consisting of an inline extent of 2000 bytes; 5) User B issues a clone operation from user A's file into its own file (using a length argument of 0, clone the whole range); 6) User B now gets to see the 1000 bytes that user A truncated from its file before it made its file world readbale. User B also lost the bytes in the range [1000, 2000[ bytes from its own file, but that might be ok if his/her intention was reading stale data from user A that was never supposed to be public. Note that this contrasts with the case where we truncate a file from 2000 bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In this case reading any byte from the range [1000, 2000[ will return a value of 0x00, instead of the original data. This problem exists since the clone ioctl was added and happens both with and without my recent data loss and file corruption fixes for the clone ioctl (patch "Btrfs: fix file corruption and data loss after cloning inline extents"). So fix this by truncating the compressed inline extents as we do for the non-compressed case, which involves decompressing, if the data isn't already in the page cache, compressing the truncated version of the extent, writing the compressed content into the inline extent and then truncate it. The following test case for fstests reproduces the problem. In order for the test to pass both this fix and my previous fix for the clone ioctl that forbids cloning a smaller inline extent into a larger one, which is titled "Btrfs: fix file corruption and data loss after cloning inline extents", are needed. Without that other fix the test fails in a different way that does not leak the truncated data, instead part of destination file gets replaced with zeroes (because the destination file has a larger inline extent than the source). seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount "-o compress" # Create our test files. File foo is going to be the source of a clone operation # and consists of a single inline extent with an uncompressed size of 512 bytes, # while file bar consists of a single inline extent with an uncompressed size of # 256 bytes. For our test's purpose, it's important that file bar has an inline # extent with a size smaller than foo's inline extent. $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128" \ -c "pwrite -S 0x2a 128 384" \ $SCRATCH_MNT/foo | _filter_xfs_io $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io # Now durably persist all metadata and data. We do this to make sure that we get # on disk an inline extent with a size of 512 bytes for file foo. sync # Now truncate our file foo to a smaller size. Because it consists of a # compressed and inline extent, btrfs did not shrink the inline extent to the # new size (if the extent was not compressed, btrfs would shrink it to 128 # bytes), it only updates the inode's i_size to 128 bytes. $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo # Now clone foo's inline extent into bar. # This clone operation should fail with errno EOPNOTSUPP because the source # file consists only of an inline extent and the file's size is smaller than # the inline extent of the destination (128 bytes < 256 bytes). However the # clone ioctl was not prepared to deal with a file that has a size smaller # than the size of its inline extent (something that happens only for compressed # inline extents), resulting in copying the full inline extent from the source # file into the destination file. # # Note that btrfs' clone operation for inline extents consists of removing the # inline extent from the destination inode and copy the inline extent from the # source inode into the destination inode, meaning that if the destination # inode's inline extent is larger (N bytes) than the source inode's inline # extent (M bytes), some bytes (N - M bytes) will be lost from the destination # file. Btrfs could copy the source inline extent's data into the destination's # inline extent so that we would not lose any data, but that's currently not # done due to the complexity that would be needed to deal with such cases # (specially when one or both extents are compressed), returning EOPNOTSUPP, as # it's normally not a very common case to clone very small files (only case # where we get inline extents) and copying inline extents does not save any # space (unlike for normal, non-inlined extents). $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar # Now because the above clone operation used to succeed, and due to foo's inline # extent not being shinked by the truncate operation, our file bar got the whole # inline extent copied from foo, making us lose the last 128 bytes from bar # which got replaced by the bytes in range [128, 256[ from foo before foo was # truncated - in other words, data loss from bar and being able to read old and # stale data from foo that should not be possible to read anymore through normal # filesystem operations. Contrast with the case where we truncate a file from a # size N to a smaller size M, truncate it back to size N and then read the range # [M, N[, we should always get the value 0x00 for all the bytes in that range. # We expected the clone operation to fail with errno EOPNOTSUPP and therefore # not modify our file's bar data/metadata. So its content should be 256 bytes # long with all bytes having the value 0xbb. # # Without the btrfs bug fix, the clone operation succeeded and resulted in # leaking truncated data from foo, the bytes that belonged to its range # [128, 256[, and losing data from bar in that same range. So reading the # file gave us the following content: # # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 # * # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a # * # 0000400 echo "File bar's content after the clone operation:" od -t x1 $SCRATCH_MNT/bar # Also because the foo's inline extent was not shrunk by the truncate # operation, btrfs' fsck, which is run by the fstests framework everytime a # test completes, failed reporting the following error: # # root 5 inode 257 errors 400, nbytes wrong status=0 exit Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 82 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e3b39f0c46662..88680afd8cfb1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4184,6 +4184,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } +static int truncate_inline_extent(struct inode *inode, + struct btrfs_path *path, + struct btrfs_key *found_key, + const u64 item_end, + const u64 new_size) +{ + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + struct btrfs_file_extent_item *fi; + u32 size = (u32)(new_size - found_key->offset); + struct btrfs_root *root = BTRFS_I(inode)->root; + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { + loff_t offset = new_size; + loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); + + /* + * Zero out the remaining of the last page of our inline extent, + * instead of directly truncating our inline extent here - that + * would be much more complex (decompressing all the data, then + * compressing the truncated data, which might be bigger than + * the size of the inline extent, resize the extent, etc). + * We release the path because to get the page we might need to + * read the extent item from disk (data not in the page cache). + */ + btrfs_release_path(path); + return btrfs_truncate_page(inode, offset, page_end - offset, 0); + } + + btrfs_set_file_extent_ram_bytes(leaf, fi, size); + size = btrfs_file_extent_calc_inline_size(size); + btrfs_truncate_item(root, path, size, 1); + + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + inode_sub_bytes(inode, item_end + 1 - new_size); + + return 0; +} + /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -4378,27 +4419,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * special encodings */ if (!del_item && - btrfs_file_extent_compression(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_other_encoding(leaf, fi) == 0) { - u32 size = new_size - found_key.offset; - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) - inode_sub_bytes(inode, item_end + 1 - - new_size); /* - * update the ram bytes to properly reflect - * the new size of our item + * Need to release path in order to truncate a + * compressed extent. So delete any accumulated + * extent items so far. */ - btrfs_set_file_extent_ram_bytes(leaf, fi, size); - size = - btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(root, path, size, 1); + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE && pending_del_nr) { + err = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + if (err) { + btrfs_abort_transaction(trans, + root, + err); + goto error; + } + pending_del_nr = 0; + } + + err = truncate_inline_extent(inode, path, + &found_key, + item_end, + new_size); + if (err) { + btrfs_abort_transaction(trans, + root, err); + goto error; + } } else if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { - inode_sub_bytes(inode, item_end + 1 - - found_key.offset); + inode_sub_bytes(inode, item_end + 1 - new_size); } } delete: From 51ac5edb612f7890bdce6b67d9c84697aaac96e3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 6 Nov 2015 13:33:33 +0000 Subject: [PATCH 1455/2091] Btrfs: fix race leading to incorrect item deletion when dropping extents commit aeafbf8486c9e2bd53f5cc3c10c0b7fd7149d69c upstream. While running a stress test I got the following warning triggered: [191627.672810] ------------[ cut here ]------------ [191627.673949] WARNING: CPU: 8 PID: 8447 at fs/btrfs/file.c:779 __btrfs_drop_extents+0x391/0xa50 [btrfs]() (...) [191627.701485] Call Trace: [191627.702037] [] dump_stack+0x4f/0x7b [191627.702992] [] ? console_unlock+0x356/0x3a2 [191627.704091] [] warn_slowpath_common+0xa1/0xbb [191627.705380] [] ? __btrfs_drop_extents+0x391/0xa50 [btrfs] [191627.706637] [] warn_slowpath_null+0x1a/0x1c [191627.707789] [] __btrfs_drop_extents+0x391/0xa50 [btrfs] [191627.709155] [] ? cache_alloc_debugcheck_after.isra.32+0x171/0x1d0 [191627.712444] [] ? kmemleak_alloc_recursive.constprop.40+0x16/0x18 [191627.714162] [] insert_reserved_file_extent.constprop.40+0x83/0x24e [btrfs] [191627.715887] [] ? start_transaction+0x3bb/0x610 [btrfs] [191627.717287] [] btrfs_finish_ordered_io+0x273/0x4e2 [btrfs] [191627.728865] [] finish_ordered_fn+0x15/0x17 [btrfs] [191627.730045] [] normal_work_helper+0x14c/0x32c [btrfs] [191627.731256] [] btrfs_endio_write_helper+0x12/0x14 [btrfs] [191627.732661] [] process_one_work+0x24c/0x4ae [191627.733822] [] worker_thread+0x206/0x2c2 [191627.734857] [] ? process_scheduled_works+0x2f/0x2f [191627.736052] [] ? process_scheduled_works+0x2f/0x2f [191627.737349] [] kthread+0xef/0xf7 [191627.738267] [] ? time_hardirqs_on+0x15/0x28 [191627.739330] [] ? __kthread_parkme+0xad/0xad [191627.741976] [] ret_from_fork+0x42/0x70 [191627.743080] [] ? __kthread_parkme+0xad/0xad [191627.744206] ---[ end trace bbfddacb7aaada8d ]--- $ cat -n fs/btrfs/file.c 691 int __btrfs_drop_extents(struct btrfs_trans_handle *trans, (...) 758 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 759 if (key.objectid > ino || 760 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 761 break; 762 763 fi = btrfs_item_ptr(leaf, path->slots[0], 764 struct btrfs_file_extent_item); 765 extent_type = btrfs_file_extent_type(leaf, fi); 766 767 if (extent_type == BTRFS_FILE_EXTENT_REG || 768 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { (...) 774 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { (...) 778 } else { 779 WARN_ON(1); 780 extent_end = search_start; 781 } (...) This happened because the item we were processing did not match a file extent item (its key type != BTRFS_EXTENT_DATA_KEY), and even on this case we cast the item to a struct btrfs_file_extent_item pointer and then find a type field value that does not match any of the expected values (BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]). This scenario happens due to a tiny time window where a race can happen as exemplified below. For example, consider the following scenario where we're using the NO_HOLES feature and we have the following two neighbour leafs: Leaf X (has N items) Leaf Y [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ] [ (257 EXTENT_DATA 8192), ... ] slot N - 2 slot N - 1 slot 0 Our inode 257 has an implicit hole in the range [0, 8K[ (implicit rather than explicit because NO_HOLES is enabled). Now if our inode has an ordered extent for the range [4K, 8K[ that is finishing, the following can happen: CPU 1 CPU 2 btrfs_finish_ordered_io() insert_reserved_file_extent() __btrfs_drop_extents() Searches for the key (257 EXTENT_DATA 4096) through btrfs_lookup_file_extent() Key not found and we get a path where path->nodes[0] == leaf X and path->slots[0] == N Because path->slots[0] is >= btrfs_header_nritems(leaf X), we call btrfs_next_leaf() btrfs_next_leaf() releases the path inserts key (257 INODE_REF 4096) at the end of leaf X, leaf X now has N + 1 keys, and the new key is at slot N btrfs_next_leaf() searches for key (257 INODE_REF 256), with path->keep_locks set to 1, because it was the last key it saw in leaf X finds it in leaf X again and notices it's no longer the last key of the leaf, so it returns 0 with path->nodes[0] == leaf X and path->slots[0] == N (which is now < btrfs_header_nritems(leaf X)), pointing to the new key (257 INODE_REF 4096) __btrfs_drop_extents() casts the item at path->nodes[0], slot path->slots[0], to a struct btrfs_file_extent_item - it does not skip keys for the target inode with a type less than BTRFS_EXTENT_DATA_KEY (BTRFS_INODE_REF_KEY < BTRFS_EXTENT_DATA_KEY) sees a bogus value for the type field triggering the WARN_ON in the trace shown above, and sets extent_end = search_start (4096) does the if-then-else logic to fixup 0 length extent items created by a past bug from hole punching: if (extent_end == key.offset && extent_end >= search_start) goto delete_extent_item; that evaluates to true and it ends up deleting the key pointed to by path->slots[0], (257 INODE_REF 4096), from leaf X The same could happen for example for a xattr that ends up having a key with an offset value that matches search_start (very unlikely but not impossible). So fix this by ensuring that keys smaller than BTRFS_EXTENT_DATA_KEY are skipped, never casted to struct btrfs_file_extent_item and never deleted by accident. Also protect against the unexpected case of getting a key for a lower inode number by skipping that key and issuing a warning. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b072e17479aa8..2b0d84d32db45 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* From d22b284391fc9bb00857b02de3dfe0ccc0ff73b9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Nov 2015 00:33:58 +0000 Subject: [PATCH 1456/2091] Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow commit 1d512cb77bdbda80f0dd0620a3b260d697fd581d upstream. If we are using the NO_HOLES feature, we have a tiny time window when running delalloc for a nodatacow inode where we can race with a concurrent link or xattr add operation leading to a BUG_ON. This happens because at run_delalloc_nocow() we end up casting a leaf item of type BTRFS_INODE_[REF|EXTREF]_KEY or of type BTRFS_XATTR_ITEM_KEY to a file extent item (struct btrfs_file_extent_item) and then analyse its extent type field, which won't match any of the expected extent types (values BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]) and therefore trigger an explicit BUG_ON(1). The following sequence diagram shows how the race happens when running a no-cow dellaloc range [4K, 8K[ for inode 257 and we have the following neighbour leafs: Leaf X (has N items) Leaf Y [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ] [ (257 EXTENT_DATA 8192), ... ] slot N - 2 slot N - 1 slot 0 (Note the implicit hole for inode 257 regarding the [0, 8K[ range) CPU 1 CPU 2 run_dealloc_nocow() btrfs_lookup_file_extent() --> searches for a key with value (257 EXTENT_DATA 4096) in the fs/subvol tree --> returns us a path with path->nodes[0] == leaf X and path->slots[0] == N because path->slots[0] is >= btrfs_header_nritems(leaf X), it calls btrfs_next_leaf() btrfs_next_leaf() --> releases the path hard link added to our inode, with key (257 INODE_REF 500) added to the end of leaf X, so leaf X now has N + 1 keys --> searches for the key (257 INODE_REF 256), because it was the last key in leaf X before it released the path, with path->keep_locks set to 1 --> ends up at leaf X again and it verifies that the key (257 INODE_REF 256) is no longer the last key in the leaf, so it returns with path->nodes[0] == leaf X and path->slots[0] == N, pointing to the new item with key (257 INODE_REF 500) the loop iteration of run_dealloc_nocow() does not break out the loop and continues because the key referenced in the path at path->nodes[0] and path->slots[0] is for inode 257, its type is < BTRFS_EXTENT_DATA_KEY and its offset (500) is less then our delalloc range's end (8192) the item pointed by the path, an inode reference item, is (incorrectly) interpreted as a file extent item and we get an invalid extent type, leading to the BUG_ON(1): if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { (...) } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { (...) } else { BUG_ON(1) } The same can happen if a xattr is added concurrently and ends up having a key with an offset smaller then the delalloc's range end. So fix this by skipping keys with a type smaller than BTRFS_EXTENT_DATA_KEY. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 88680afd8cfb1..5136c73b3dce7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1294,8 +1294,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; From c39df1cf3663f94d5453942aaf4922e18122ca0b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Nov 2015 18:06:38 +0000 Subject: [PATCH 1457/2091] Btrfs: fix race when listing an inode's xattrs commit f1cd1f0b7d1b5d4aaa5711e8f4e4898b0045cb6d upstream. When listing a inode's xattrs we have a time window where we race against a concurrent operation for adding a new hard link for our inode that makes us not return any xattr to user space. In order for this to happen, the first xattr of our inode needs to be at slot 0 of a leaf and the previous leaf must still have room for an inode ref (or extref) item, and this can happen because an inode's listxattrs callback does not lock the inode's i_mutex (nor does the VFS does it for us), but adding a hard link to an inode makes the VFS lock the inode's i_mutex before calling the inode's link callback. If we have the following leafs: Leaf X (has N items) Leaf Y [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ] [ (257 XATTR_ITEM 12345), ... ] slot N - 2 slot N - 1 slot 0 The race illustrated by the following sequence diagram is possible: CPU 1 CPU 2 btrfs_listxattr() searches for key (257 XATTR_ITEM 0) gets path with path->nodes[0] == leaf X and path->slots[0] == N because path->slots[0] is >= btrfs_header_nritems(leaf X), it calls btrfs_next_leaf() btrfs_next_leaf() releases the path adds key (257 INODE_REF 666) to the end of leaf X (slot N), and leaf X now has N + 1 items searches for the key (257 INODE_REF 256), with path->keep_locks == 1, because that is the last key it saw in leaf X before releasing the path ends up at leaf X again and it verifies that the key (257 INODE_REF 256) is no longer the last key in leaf X, so it returns with path->nodes[0] == leaf X and path->slots[0] == N, pointing to the new item with key (257 INODE_REF 666) btrfs_listxattr's loop iteration sees that the type of the key pointed by the path is different from the type BTRFS_XATTR_ITEM_KEY and so it breaks the loop and stops looking for more xattr items --> the application doesn't get any xattr listed for our inode So fix this by breaking the loop only if the key's type is greater than BTRFS_XATTR_ITEM_KEY and skip the current key if its type is smaller. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e1c14..1fcd7b6e7564d 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) From 0330982c45ec56b0855c9288bf07d5c5edb12faf Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Nov 2015 19:23:24 +0100 Subject: [PATCH 1458/2091] rbd: don't put snap_context twice in rbd_queue_workfn() commit 70b16db86f564977df074072143284aec2cb1162 upstream. Commit 4e752f0ab0e8 ("rbd: access snapshot context and mapping size safely") moved ceph_get_snap_context() out of rbd_img_request_create() and into rbd_queue_workfn(), adding a ceph_put_snap_context() to the error path in rbd_queue_workfn(). However, rbd_img_request_create() consumes a ref on snapc, so calling ceph_put_snap_context() after a successful rbd_img_request_create() leads to an extra put. Fix it. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 1ec6441fe2a5a..09138ceba046b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3417,6 +3417,7 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } img_request->rq = rq; + snapc = NULL; /* img_request consumes a ref */ if (op_type == OBJ_OP_DISCARD) result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, From b8a7a30104317fd37389b2e2b75cc6f3fa7aef2a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 2 Oct 2015 23:54:58 -0400 Subject: [PATCH 1459/2091] ext4 crypto: fix memory leak in ext4_bio_write_page() commit 937d7b84dca58f2565715f2c8e52f14c3d65fb22 upstream. There are times when ext4_bio_write_page() is called even though we don't actually need to do any I/O. This happens when ext4_writepage() gets called by the jbd2 commit path when an inode needs to force its pages written out in order to provide data=ordered guarantees --- and a page is backed by an unwritten (e.g., uninitialized) block on disk, or if delayed allocation means the page's backing store hasn't been allocated yet. In that case, we need to skip the call to ext4_encrypt_page(), since in addition to wasting CPU, it leads to a bounce page and an ext4 crypto context getting leaked. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/page-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5765f88b39049..8082565c59a92 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -426,6 +426,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; + int nr_to_submit = 0; blocksize = 1 << inode->i_blkbits; @@ -478,11 +479,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } set_buffer_async_write(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); bh = head = page_buffers(page); - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && + nr_to_submit) { data_page = ext4_encrypt(inode, page); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); From 456dd91e06dd5194d263c0693811852308c4cc09 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sat, 17 Oct 2015 22:57:06 -0400 Subject: [PATCH 1460/2091] ext4: fix potential use after free in __ext4_journal_stop commit 6934da9238da947628be83635e365df41064b09b upstream. There is a use-after-free possibility in __ext4_journal_stop() in the case that we free the handle in the first jbd2_journal_stop() because we're referencing handle->h_err afterwards. This was introduced in 9705acd63b125dee8b15c705216d7186daea4625 and it is wrong. Fix it by storing the handle->h_err value beforehand and avoid referencing potentially freed handle. Fixes: 9705acd63b125dee8b15c705216d7186daea4625 Signed-off-by: Lukas Czerner Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_jbd2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d418431818187..e770c1ee4613e 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) return 0; } + err = handle->h_err; if (!handle->h_transaction) { - err = jbd2_journal_stop(handle); - return handle->h_err ? handle->h_err : err; + rc = jbd2_journal_stop(handle); + return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; rc = jbd2_journal_stop(handle); if (!err) From 3e447b618b9d6f486ab441bc4bd4507628318209 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sun, 18 Oct 2015 17:02:56 -0400 Subject: [PATCH 1461/2091] ext4, jbd2: ensure entering into panic after recording an error in superblock commit 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 upstream. If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the journaling will be aborted first and the error number will be recorded into JBD2 superblock and, finally, the system will enter into the panic state in "errors=panic" option. But, in the rare case, this sequence is little twisted like the below figure and it will happen that the system enters into panic state, which means the system reset in mobile environment, before completion of recording an error in the journal superblock. In this case, e2fsck cannot recognize that the filesystem failure occurred in the previous run and the corruption wouldn't be fixed. Task A Task B ext4_handle_error() -> jbd2_journal_abort() -> __journal_abort_soft() -> __jbd2_journal_abort_hard() | -> journal->j_flags |= JBD2_ABORT; | | __ext4_abort() | -> jbd2_journal_abort() | | -> __journal_abort_soft() | | -> if (journal->j_flags & JBD2_ABORT) | | return; | -> panic() | -> jbd2_journal_update_sb_errno() Tested-by: Hobin Woo Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 12 ++++++++++-- fs/jbd2/journal.c | 6 +++++- include/linux/jbd2.h | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ff89971e3ee0f..8a3b9f14d1988 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -396,9 +396,13 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= MS_RDONLY; } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); + } } #define ext4_error_ratelimit(sb) \ @@ -587,8 +591,12 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs panic from previous error\n"); + } } void __ext4_msg(struct super_block *sb, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 7003c09257601..0469f32918a5a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2086,8 +2086,12 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) + if (errno) { jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); + } } /** diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index eb1cebed3f36e..c90c9b70e5685 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1007,6 +1007,7 @@ struct journal_s #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer From 16693ba9dea8ec10ba94efa683e0530672aad27d Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 3 Nov 2015 01:46:21 +0100 Subject: [PATCH 1462/2091] firewire: ohci: fix JMicron JMB38x IT context discovery commit 100ceb66d5c40cc0c7018e06a9474302470be73c upstream. Reported by Clifford and Craig for JMicron OHCI-1394 + SDHCI combo controllers: Often or even most of the time, the controller is initialized with the message "added OHCI v1.10 device as card 0, 4 IR + 0 IT contexts, quirks 0x10". With 0 isochronous transmit DMA contexts (IT contexts), applications like audio output are impossible. However, OHCI-1394 demands that at least 4 IT contexts are implemented by the link layer controller, and indeed JMicron JMB38x do implement four of them. Only their IsoXmitIntMask register is unreliable at early access. With my own JMB381 single function controller I found: - I can reproduce the problem with a lower probability than Craig's. - If I put a loop around the section which clears and reads IsoXmitIntMask, then either the first or the second attempt will return the correct initial mask of 0x0000000f. I never encountered a case of needing more than a second attempt. - Consequently, if I put a dummy reg_read(...IsoXmitIntMaskSet) before the first write, the subsequent read will return the correct result. - If I merely ignore a wrong read result and force the known real result, later isochronous transmit DMA usage works just fine. So let's just fix this chip bug up by the latter method. Tested with JMB381 on kernel 3.13 and 4.3. Since OHCI-1394 generally requires 4 IT contexts at a minium, this workaround is simply applied whenever the initial read of IsoXmitIntMask returns 0, regardless whether it's a JMicron chip or not. I never heard of this issue together with any other chip though. I am not 100% sure that this fix works on the OHCI-1394 part of JMB380 and JMB388 combo controllers exactly the same as on the JMB381 single- function controller, but so far I haven't had a chance to let an owner of a combo chip run a patched kernel. Strangely enough, IsoRecvIntMask is always reported correctly, even though it is probed right before IsoXmitIntMask. Reported-by: Clifford Dunn Reported-by: Craig Moore Signed-off-by: Stefan Richter Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/ohci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index f51d376d10ba6..c2f5117fd8cb0 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev, reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0); ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet); + /* JMicron JMB38x often shows 0 at first read, just ignore it */ + if (!ohci->it_context_support) { + ohci_notice(ohci, "overriding IsoXmitIntMask\n"); + ohci->it_context_support = 0xf; + } reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0); ohci->it_context_mask = ohci->it_context_support; ohci->n_it = hweight32(ohci->it_context_mask); From 35b2e295e669a6f6817f51a6f14d821e3f5f7ce0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 17 Sep 2015 07:47:08 -0400 Subject: [PATCH 1463/2091] nfsd: serialize state seqid morphing operations commit 35a92fe8770ce54c5eb275cd76128645bea2d200 upstream. Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were running in parallel. The server would receive the OPEN_DOWNGRADE first and check its seqid, but then an OPEN would race in and bump it. The OPEN_DOWNGRADE would then complete and bump the seqid again. The result was that the OPEN_DOWNGRADE would be applied after the OPEN, even though it should have been rejected since the seqid changed. The only recourse we have here I think is to serialize operations that bump the seqid in a stateid, particularly when we're given a seqid in the call. To address this, we add a new rw_semaphore to the nfs4_ol_stateid struct. We do a down_write prior to checking the seqid after looking up the stateid to ensure that nothing else is going to bump it while we're operating on it. In the case of OPEN, we do a down_read, as the call doesn't contain a seqid. Those can run in parallel -- we just need to serialize them when there is a concurrent OPEN_DOWNGRADE or CLOSE. LOCK and LOCKU however always take the write lock as there is no opportunity for parallelizing those. Reported-and-Tested-by: Andrew W Elble Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 33 ++++++++++++++++++++++++++++----- fs/nfsd/state.h | 19 ++++++++++--------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 397798368b1ae..8c2c9d85cd4ef 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3351,6 +3351,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + init_rwsem(&stp->st_rwsem); spin_lock(&oo->oo_owner.so_client->cl_lock); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -4181,15 +4182,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4201,6 +4207,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4777,10 +4784,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4827,6 +4837,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4857,11 +4868,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4940,6 +4954,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -4993,6 +5008,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5223,6 +5239,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5391,6 +5408,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5398,6 +5416,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5503,6 +5523,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5673,6 +5695,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index dbc4f85a50082..67685b6cfef30 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -533,15 +533,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) From 3c10560e50f845881cdecfbc22fea7939720ad38 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 15 Oct 2015 12:07:28 -0400 Subject: [PATCH 1464/2091] nfsd: eliminate sending duplicate and repeated delegations commit 34ed9872e745fa56f10e9bef2cf3d2336c6c8816 upstream. We've observed the nfsd server in a state where there are multiple delegations on the same nfs4_file for the same client. The nfs client does attempt to DELEGRETURN these when they are presented to it - but apparently under some (unknown) circumstances the client does not manage to return all of them. This leads to the eventual attempt to CB_RECALL more than one delegation with the same nfs filehandle to the same client. The first recall will succeed, but the next recall will fail with NFS4ERR_BADHANDLE. This leads to the server having delegations on cl_revoked that the client has no way to FREE or DELEGRETURN, with resulting inability to recover. The state manager on the server will continually assert SEQ4_STATUS_RECALLABLE_STATE_REVOKED, and the state manager on the client will be looping unable to satisfy the server. List discussion also reports a race between OPEN and DELEGRETURN that will be avoided by only sending the delegation once to the client. This is also logically in accordance with RFC5561 9.1.1 and 10.2. So, let's: 1.) Not hand out duplicate delegations. 2.) Only send them to the client once. RFC 5561: 9.1.1: "Delegations and layouts, on the other hand, are not associated with a specific owner but are associated with the client as a whole (identified by a client ID)." 10.2: "...the stateid for a delegation is associated with a client ID and may be used on behalf of all the open-owners for the given client. A delegation is made to the client as a whole and not to any specific process or thread of control within it." Reported-by: Eric Meddaugh Cc: Trond Myklebust Cc: Olga Kornievskaia Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8c2c9d85cd4ef..bb6c324f1f3df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -3941,6 +3993,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3972,16 +4036,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4001,6 +4068,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4019,9 +4095,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); From 03ecddec2c94e41d31fc1e9b5545f6ba7ced9985 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 5 Nov 2015 00:01:51 +0100 Subject: [PATCH 1465/2091] debugfs: fix refcount imbalance in start_creating commit 0ee9608c89e81a1ccee52ecb58a7ff040e2522d9 upstream. In debugfs' start_creating(), we pin the file system to safely access its root. When we failed to create a file, we unpin the file system via failed_creating() to release the mount count and eventually the reference of the vfsmount. However, when we run into an error during lookup_one_len() when still in start_creating(), we only release the parent's mutex but not so the reference on the mount. Looks like it was done in the past, but after splitting portions of __create_file() into start_creating() and end_creating() via 190afd81e4a5 ("debugfs: split the beginning and the end of __create_file() off"), this seemed missed. Noticed during code review. Fixes: 190afd81e4a5 ("debugfs: split the beginning and the end of __create_file() off") Signed-off-by: Daniel Borkmann Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 12756040ca201..8bec8f1e4b310 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -276,8 +276,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } From 254cbeb139be712a9dbcfcaea997a3e3dfd9be52 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 20 Nov 2015 09:56:20 -0500 Subject: [PATCH 1466/2091] nfs4: start callback_ident at idr 1 commit c68a027c05709330fe5b2f50c50d5fa02124b5d8 upstream. If clp->cl_cb_ident is zero, then nfs_cb_idr_remove_locked() skips removing it when the nfs_client is freed. A decoding or server bug can then find and try to put that first nfs_client which would lead to a crash. Signed-off-by: Benjamin Coddington Fixes: d6870312659d ("nfs4client: convert to idr_alloc()") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18d..5dea913baf46c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) return ret; idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); if (ret >= 0) clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); From 2cc11ce906598181b1bd3d6c57311959e47446b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 25 Nov 2015 13:50:11 -0500 Subject: [PATCH 1467/2091] nfs: if we have no valid attrs, then don't declare the attribute cache valid commit c812012f9ca7cf89c9e1a1cd512e6c3b5be04b85 upstream. If we pass in an empty nfs_fattr struct to nfs_update_inode, it will (correctly) not update any of the attributes, but it then clears the NFS_INO_INVALID_ATTR flag, which indicates that the attributes are up to date. Don't clear the flag if the fattr struct has no valid attrs to apply. Reviewed-by: Steve French Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 976ba792fbc69..7f22b6c6fb502 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1813,7 +1813,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) nfsi->attr_gencount = fattr->gencount; } - invalid &= ~NFS_INO_INVALID_ATTR; + + /* Don't declare attrcache up to date if there were no attrs! */ + if (fattr->valid != 0) + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) From 273ef13347658915da382a970326b1b1cbd3764a Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 20 Nov 2015 15:57:30 -0800 Subject: [PATCH 1468/2091] ocfs2: fix umask ignored issue commit 8f1eb48758aacf6c1ffce18179295adbf3bd7640 upstream. New created file's mode is not masked with umask, and this makes umask not work for ocfs2 volume. Fixes: 702e5bc ("ocfs2: use generic posix ACL infrastructure") Signed-off-by: Junxiao Bi Cc: Gang He Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 176fe6afd94ec..4d5e0a573f4f2 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -365,6 +365,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), From 3b16e641542a19e5624858d66b2c6f78b21be29e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Sep 2015 15:04:42 +0200 Subject: [PATCH 1469/2091] ceph: fix message length computation commit 777d738a5e58ba3b6f3932ab1543ce93703f4873 upstream. create_request_message() computes the maximum length of a message, but uses the wrong type for the time stamp: sizeof(struct timespec) may be 8 or 16 depending on the architecture, while sizeof(struct ceph_timespec) is always 8, and that is what gets put into the message. Found while auditing the uses of timespec for y2038 problems. Fixes: b8e69066d8af ("ceph: include time stamp in every MDS request") Signed-off-by: Arnd Bergmann Signed-off-by: Yan, Zheng Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84f37f34f9aa6..1e99b29650a96 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1905,7 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * From 296bf2e45cb117a0face3e0e5c94d371d8781e0e Mon Sep 17 00:00:00 2001 From: "Lu, Han" Date: Wed, 11 Nov 2015 16:54:27 +0800 Subject: [PATCH 1470/2091] ALSA: hda/hdmi - apply Skylake fix-ups to Broxton display codec commit e2656412f2a7343ecfd13eb74bac0a6e6e9c5aad upstream. Broxton and Skylake have the same behavior on display audio. So this patch applys Skylake fix-ups to Broxton. Signed-off-by: Lu, Han Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 225b78b4ef125..d02eccd51f6e8 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -48,8 +48,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); #define is_haswell(codec) ((codec)->core.vendor_id == 0x80862807) #define is_broadwell(codec) ((codec)->core.vendor_id == 0x80862808) #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809) +#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a) #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ - || is_skylake(codec)) + || is_skylake(codec) || is_broxton(codec)) #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) From 07cc49f66973f49a391c91bf4b158fa0f2562ca8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Dec 2015 21:24:51 -0800 Subject: [PATCH 1471/2091] Linux 4.1.15 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 091280d664527..cf35f6bcffd87 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 14 +SUBLEVEL = 15 EXTRAVERSION = NAME = Series 4800 From 4b675ad4441e6bb42291a6953843a1e487ba2752 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 29 Sep 2015 00:32:19 +0300 Subject: [PATCH 1472/2091] tpm, tpm_tis: fix tpm_tis ACPI detection issue with TPM 2.0 commit 399235dc6e95400a1322a9999e92073bc572f0c8 upstream. Both for FIFO and CRB interface TCG has decided to use the same HID MSFT0101. They can be differentiated by looking at the start method from TPM2 ACPI table. This patches makes necessary fixes to tpm_tis and tpm_crb modules in order to correctly detect, which module should be used. For MSFT0101 we must use struct acpi_driver because struct pnp_driver has a 7 character limitation. It turned out that the root cause in b371616b8 was not correct for https://bugzilla.kernel.org/show_bug.cgi?id=98181. v2: * One fixup was missing from v1: is_tpm2_fifo -> is_fifo v3: * Use pnp_driver for existing HIDs and acpi_driver only for MSFT0101 in order ensure backwards compatibility. v4: * Check for FIFO before doing *anything* in crb_acpi_add(). * There was return immediately after acpi_bus_unregister_driver() in cleanup_tis(). This caused pnp_unregister_driver() not to be called. Reported-by: Michael Saunders Reported-by: Michael Marley Reported-by: Jethro Beekman Reported-by: Matthew Garrett Signed-off-by: Jarkko Sakkinen Tested-by: Michael Marley Tested-by: Mimi Zohar (on TPM 1.2) Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm.h | 7 ++ drivers/char/tpm/tpm_crb.c | 32 +++---- drivers/char/tpm/tpm_tis.c | 192 +++++++++++++++++++++++++++++++------ 3 files changed, 181 insertions(+), 50 deletions(-) diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index f8319a0860fd7..39be5acc9c480 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -115,6 +115,13 @@ enum tpm2_startup_types { TPM2_SU_STATE = 0x0001, }; +enum tpm2_start_method { + TPM2_START_ACPI = 2, + TPM2_START_FIFO = 6, + TPM2_START_CRB = 7, + TPM2_START_CRB_WITH_ACPI = 8, +}; + struct tpm_chip; struct tpm_vendor_specific { diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 1267322595da0..2b971b3e5c1cf 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -34,12 +34,6 @@ enum crb_defaults { CRB_ACPI_START_INDEX = 1, }; -enum crb_start_method { - CRB_SM_ACPI_START = 2, - CRB_SM_CRB = 7, - CRB_SM_CRB_WITH_ACPI_START = 8, -}; - struct acpi_tpm2 { struct acpi_table_header hdr; u16 platform_class; @@ -220,12 +214,6 @@ static int crb_acpi_add(struct acpi_device *device) u64 pa; int rc; - chip = tpmm_chip_alloc(dev, &tpm_crb); - if (IS_ERR(chip)) - return PTR_ERR(chip); - - chip->flags = TPM_CHIP_FLAG_TPM2; - status = acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **) &buf); if (ACPI_FAILURE(status)) { @@ -233,13 +221,15 @@ static int crb_acpi_add(struct acpi_device *device) return -ENODEV; } - /* At least some versions of AMI BIOS have a bug that TPM2 table has - * zero address for the control area and therefore we must fail. - */ - if (!buf->control_area_pa) { - dev_err(dev, "TPM2 ACPI table has a zero address for the control area\n"); - return -EINVAL; - } + /* Should the FIFO driver handle this? */ + if (buf->start_method == TPM2_START_FIFO) + return -ENODEV; + + chip = tpmm_chip_alloc(dev, &tpm_crb); + if (IS_ERR(chip)) + return PTR_ERR(chip); + + chip->flags = TPM_CHIP_FLAG_TPM2; if (buf->hdr.length < sizeof(struct acpi_tpm2)) { dev_err(dev, "TPM2 ACPI table has wrong size"); @@ -259,11 +249,11 @@ static int crb_acpi_add(struct acpi_device *device) * report only ACPI start but in practice seems to require both * ACPI start and CRB start. */ - if (sm == CRB_SM_CRB || sm == CRB_SM_CRB_WITH_ACPI_START || + if (sm == TPM2_START_CRB || sm == TPM2_START_FIFO || !strcmp(acpi_device_hid(device), "MSFT0101")) priv->flags |= CRB_FL_CRB_START; - if (sm == CRB_SM_ACPI_START || sm == CRB_SM_CRB_WITH_ACPI_START) + if (sm == TPM2_START_ACPI || sm == TPM2_START_CRB_WITH_ACPI) priv->flags |= CRB_FL_ACPI_START; priv->cca = (struct crb_control_area __iomem *) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index f2dffa770b8e9..696ef1d56b4f5 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2005, 2006 IBM Corporation - * Copyright (C) 2014 Intel Corporation + * Copyright (C) 2014, 2015 Intel Corporation * * Authors: * Leendert van Doorn @@ -28,6 +28,7 @@ #include #include #include +#include #include "tpm.h" enum tis_access { @@ -65,6 +66,17 @@ enum tis_defaults { TIS_LONG_TIMEOUT = 2000, /* 2 sec */ }; +struct tpm_info { + unsigned long start; + unsigned long len; + unsigned int irq; +}; + +static struct tpm_info tis_default_info = { + .start = TIS_MEM_BASE, + .len = TIS_MEM_LEN, + .irq = 0, +}; /* Some timeout values are needed before it is known whether the chip is * TPM 1.0 or TPM 2.0. @@ -91,26 +103,54 @@ struct priv_data { }; #if defined(CONFIG_PNP) && defined(CONFIG_ACPI) -static int is_itpm(struct pnp_dev *dev) +static int has_hid(struct acpi_device *dev, const char *hid) { - struct acpi_device *acpi = pnp_acpi_device(dev); struct acpi_hardware_id *id; - if (!acpi) - return 0; - - list_for_each_entry(id, &acpi->pnp.ids, list) { - if (!strcmp("INTC0102", id->id)) + list_for_each_entry(id, &dev->pnp.ids, list) + if (!strcmp(hid, id->id)) return 1; - } return 0; } + +static inline int is_itpm(struct acpi_device *dev) +{ + return has_hid(dev, "INTC0102"); +} + +static inline int is_fifo(struct acpi_device *dev) +{ + struct acpi_table_tpm2 *tbl; + acpi_status st; + + /* TPM 1.2 FIFO */ + if (!has_hid(dev, "MSFT0101")) + return 1; + + st = acpi_get_table(ACPI_SIG_TPM2, 1, + (struct acpi_table_header **) &tbl); + if (ACPI_FAILURE(st)) { + dev_err(&dev->dev, "failed to get TPM2 ACPI table\n"); + return 0; + } + + if (le32_to_cpu(tbl->start_method) != TPM2_START_FIFO) + return 0; + + /* TPM 2.0 FIFO */ + return 1; +} #else -static inline int is_itpm(struct pnp_dev *dev) +static inline int is_itpm(struct acpi_device *dev) { return 0; } + +static inline int is_fifo(struct acpi_device *dev) +{ + return 1; +} #endif /* Before we attempt to access the TPM we must see that the valid bit is set. @@ -600,9 +640,8 @@ static void tpm_tis_remove(struct tpm_chip *chip) release_locality(chip, chip->vendor.locality, 1); } -static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, - resource_size_t start, resource_size_t len, - unsigned int irq) +static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info, + acpi_handle acpi_dev_handle) { u32 vendor, intfcaps, intmask; int rc, i, irq_s, irq_e, probe; @@ -622,7 +661,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, chip->acpi_dev_handle = acpi_dev_handle; #endif - chip->vendor.iobase = devm_ioremap(dev, start, len); + chip->vendor.iobase = devm_ioremap(dev, tpm_info->start, tpm_info->len); if (!chip->vendor.iobase) return -EIO; @@ -707,7 +746,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); if (interrupts) - chip->vendor.irq = irq; + chip->vendor.irq = tpm_info->irq; if (interrupts && !chip->vendor.irq) { irq_s = ioread8(chip->vendor.iobase + @@ -890,27 +929,27 @@ static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume); static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, const struct pnp_device_id *pnp_id) { - resource_size_t start, len; - unsigned int irq = 0; + struct tpm_info tpm_info = tis_default_info; acpi_handle acpi_dev_handle = NULL; - start = pnp_mem_start(pnp_dev, 0); - len = pnp_mem_len(pnp_dev, 0); + tpm_info.start = pnp_mem_start(pnp_dev, 0); + tpm_info.len = pnp_mem_len(pnp_dev, 0); if (pnp_irq_valid(pnp_dev, 0)) - irq = pnp_irq(pnp_dev, 0); + tpm_info.irq = pnp_irq(pnp_dev, 0); else interrupts = false; - if (is_itpm(pnp_dev)) - itpm = true; - #ifdef CONFIG_ACPI - if (pnp_acpi_device(pnp_dev)) + if (pnp_acpi_device(pnp_dev)) { + if (is_itpm(pnp_acpi_device(pnp_dev))) + itpm = true; + acpi_dev_handle = pnp_acpi_device(pnp_dev)->handle; + } #endif - return tpm_tis_init(&pnp_dev->dev, acpi_dev_handle, start, len, irq); + return tpm_tis_init(&pnp_dev->dev, &tpm_info, acpi_dev_handle); } static struct pnp_device_id tpm_pnp_tbl[] = { @@ -930,6 +969,7 @@ MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl); static void tpm_tis_pnp_remove(struct pnp_dev *dev) { struct tpm_chip *chip = pnp_get_drvdata(dev); + tpm_chip_unregister(chip); tpm_tis_remove(chip); } @@ -950,6 +990,79 @@ module_param_string(hid, tpm_pnp_tbl[TIS_HID_USR_IDX].id, MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe"); #endif +#ifdef CONFIG_ACPI +static int tpm_check_resource(struct acpi_resource *ares, void *data) +{ + struct tpm_info *tpm_info = (struct tpm_info *) data; + struct resource res; + + if (acpi_dev_resource_interrupt(ares, 0, &res)) { + tpm_info->irq = res.start; + } else if (acpi_dev_resource_memory(ares, &res)) { + tpm_info->start = res.start; + tpm_info->len = resource_size(&res); + } + + return 1; +} + +static int tpm_tis_acpi_init(struct acpi_device *acpi_dev) +{ + struct list_head resources; + struct tpm_info tpm_info = tis_default_info; + int ret; + + if (!is_fifo(acpi_dev)) + return -ENODEV; + + INIT_LIST_HEAD(&resources); + ret = acpi_dev_get_resources(acpi_dev, &resources, tpm_check_resource, + &tpm_info); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&resources); + + if (!tpm_info.irq) + interrupts = false; + + if (is_itpm(acpi_dev)) + itpm = true; + + return tpm_tis_init(&acpi_dev->dev, &tpm_info, acpi_dev->handle); +} + +static int tpm_tis_acpi_remove(struct acpi_device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(&dev->dev); + + tpm_chip_unregister(chip); + tpm_tis_remove(chip); + + return 0; +} + +static struct acpi_device_id tpm_acpi_tbl[] = { + {"MSFT0101", 0}, /* TPM 2.0 */ + /* Add new here */ + {"", 0}, /* User Specified */ + {"", 0} /* Terminator */ +}; +MODULE_DEVICE_TABLE(acpi, tpm_acpi_tbl); + +static struct acpi_driver tis_acpi_driver = { + .name = "tpm_tis", + .ids = tpm_acpi_tbl, + .ops = { + .add = tpm_tis_acpi_init, + .remove = tpm_tis_acpi_remove, + }, + .drv = { + .pm = &tpm_tis_pm, + }, +}; +#endif + static struct platform_driver tis_drv = { .driver = { .name = "tpm_tis", @@ -966,9 +1079,25 @@ static int __init init_tis(void) { int rc; #ifdef CONFIG_PNP - if (!force) - return pnp_register_driver(&tis_pnp_driver); + if (!force) { + rc = pnp_register_driver(&tis_pnp_driver); + if (rc) + return rc; + } +#endif +#ifdef CONFIG_ACPI + if (!force) { + rc = acpi_bus_register_driver(&tis_acpi_driver); + if (rc) { +#ifdef CONFIG_PNP + pnp_unregister_driver(&tis_pnp_driver); #endif + return rc; + } + } +#endif + if (!force) + return 0; rc = platform_driver_register(&tis_drv); if (rc < 0) @@ -978,7 +1107,7 @@ static int __init init_tis(void) rc = PTR_ERR(pdev); goto err_dev; } - rc = tpm_tis_init(&pdev->dev, NULL, TIS_MEM_BASE, TIS_MEM_LEN, 0); + rc = tpm_tis_init(&pdev->dev, &tis_default_info, NULL); if (rc) goto err_init; return 0; @@ -992,9 +1121,14 @@ static int __init init_tis(void) static void __exit cleanup_tis(void) { struct tpm_chip *chip; -#ifdef CONFIG_PNP +#if defined(CONFIG_PNP) || defined(CONFIG_ACPI) if (!force) { +#ifdef CONFIG_ACPI + acpi_bus_unregister_driver(&tis_acpi_driver); +#endif +#ifdef CONFIG_PNP pnp_unregister_driver(&tis_pnp_driver); +#endif return; } #endif From 4f2052f205a8e35943afb4179b2b50f759ab6da2 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 4 Nov 2015 09:28:26 -0700 Subject: [PATCH 1473/2091] toshiba_acpi: Initialize hotkey_event_type variable commit d2f20619942fe4618160a7fa3dbdcbac335cff59 upstream. Commit 53147b6cabee5e8d1997b5682fcc0c3b72ddf9c2 ("toshiba_acpi: Fix hotkeys registration on some toshiba models") fixed an issue on some laptops regarding hotkeys registration, however, if failed to address the initialization of the hotkey_event_type variable, and thus, it can lead to potential unwanted effects as the variable is being checked. This patch initializes such variable to avoid such unwanted effects. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/toshiba_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 93e54a0f471af..35882dd690a64 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2764,6 +2764,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev) ret = toshiba_function_keys_get(dev, &special_functions); dev->kbd_function_keys_supported = !ret; + dev->hotkey_event_type = 0; if (toshiba_acpi_setup_keyboard(dev)) pr_info("Unable to activate hotkeys\n"); From 71cb018402c8363a4bd49b0b0057b32504ed9fd6 Mon Sep 17 00:00:00 2001 From: Jonas Jonsson Date: Sun, 22 Nov 2015 11:47:17 +0100 Subject: [PATCH 1474/2091] USB: cdc_acm: Ignore Infineon Flash Loader utility commit f33a7f72e5fc033daccbb8d4753d7c5c41a4d67b upstream. Some modems, such as the Telit UE910, are using an Infineon Flash Loader utility. It has two interfaces, 2/2/0 (Abstract Modem) and 10/0/0 (CDC Data). The latter can be used as a serial interface to upgrade the firmware of the modem. However, that isn't possible when the cdc-acm driver takes control of the device. The following is an explanation of the behaviour by Daniele Palmas during discussion on linux-usb. "This is what happens when the device is turned on (without modifying the drivers): [155492.352031] usb 1-3: new high-speed USB device number 27 using ehci-pci [155492.485429] usb 1-3: config 1 interface 0 altsetting 0 endpoint 0x81 has an invalid bInterval 255, changing to 11 [155492.485436] usb 1-3: New USB device found, idVendor=058b, idProduct=0041 [155492.485439] usb 1-3: New USB device strings: Mfr=0, Product=0, SerialNumber=0 [155492.485952] cdc_acm 1-3:1.0: ttyACM0: USB ACM device This is the flashing device that is caught by the cdc-acm driver. Once the ttyACM appears, the application starts sending a magic string (simple write on the file descriptor) to keep the device in flashing mode. If this magic string is not properly received in a certain time interval, the modem goes on in normal operative mode: [155493.748094] usb 1-3: USB disconnect, device number 27 [155494.916025] usb 1-3: new high-speed USB device number 28 using ehci-pci [155495.059978] usb 1-3: New USB device found, idVendor=1bc7, idProduct=0021 [155495.059983] usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [155495.059986] usb 1-3: Product: 6 CDC-ACM + 1 CDC-ECM [155495.059989] usb 1-3: Manufacturer: Telit [155495.059992] usb 1-3: SerialNumber: 359658044004697 [155495.138958] cdc_acm 1-3:1.0: ttyACM0: USB ACM device [155495.140832] cdc_acm 1-3:1.2: ttyACM1: USB ACM device [155495.142827] cdc_acm 1-3:1.4: ttyACM2: USB ACM device [155495.144462] cdc_acm 1-3:1.6: ttyACM3: USB ACM device [155495.145967] cdc_acm 1-3:1.8: ttyACM4: USB ACM device [155495.147588] cdc_acm 1-3:1.10: ttyACM5: USB ACM device [155495.154322] cdc_ether 1-3:1.12 wwan0: register 'cdc_ether' at usb-0000:00:1a.7-3, Mobile Broadband Network Device, 00:00:11:12:13:14 Using the cdc-acm driver, the string, though being sent in the same way than using the usb-serial-simple driver (I can confirm that the data is passing properly since I used an hw usb sniffer), does not make the device to stay in flashing mode." Signed-off-by: Jonas Jonsson Tested-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index a086e1d69bc70..0fe15aec7ed07 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1848,6 +1848,11 @@ static const struct usb_device_id acm_ids[] = { }, #endif + /* Exclude Infineon Flash Loader utility */ + { USB_DEVICE(0x058b, 0x0041), + .driver_info = IGNORE_DEVICE, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, From bbe2ca0d54e997ce7ece0b97230b1f1d88124e5d Mon Sep 17 00:00:00 2001 From: Jonas Jonsson Date: Sun, 22 Nov 2015 11:47:18 +0100 Subject: [PATCH 1475/2091] USB: serial: Another Infineon flash loader USB ID commit a0e80fbd56b4573de997c9a088a33abbc1121400 upstream. The flash loader has been seen on a Telit UE910 modem. The flash loader is a bit special, it presents both an ACM and CDC Data interface but only the latter is useful. Unless a magic string is sent to the device it will disappear and the regular modem device appears instead. Signed-off-by: Jonas Jonsson Tested-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 3658662898fcb..a204782ae530e 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -53,6 +53,7 @@ DEVICE(funsoft, FUNSOFT_IDS); /* Infineon Flashloader driver */ #define FLASHLOADER_IDS() \ + { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \ { USB_DEVICE(0x8087, 0x0716) } DEVICE(flashloader, FLASHLOADER_IDS); From a7572562ef8dc36cb97e2d28fa9fd8f49ea6125f Mon Sep 17 00:00:00 2001 From: Dmitry Katsubo Date: Fri, 20 Nov 2015 01:30:44 +0100 Subject: [PATCH 1476/2091] usb-storage: Fix scsi-sd failure "Invalid field in cdb" for USB adapter JMicron commit 9fa62b1a31c96715aef34f25000e882ed4ac4876 upstream. The patch extends the family of SATA-to-USB JMicron adapters that need FUA to be disabled and applies the same policy for uas driver. See details in http://unix.stackexchange.com/questions/237204/ Signed-off-by: Dmitry Katsubo Tested-by: Dmitry Katsubo Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 4 ++++ drivers/usb/storage/unusual_devs.h | 2 +- drivers/usb/storage/unusual_uas.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 6d3122afeed33..75e4979e6c159 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -796,6 +796,10 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_NO_REPORT_OPCODES) sdev->no_report_opcodes = 1; + /* A few buggy USB-ATA bridges don't understand FUA */ + if (devinfo->flags & US_FL_BROKEN_FUA) + sdev->broken_fua = 1; + scsi_change_queue_depth(sdev, devinfo->qdepth - 2); return 0; } diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 87898ca2ed171..4095824c8c6da 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1987,7 +1987,7 @@ UNUSUAL_DEV( 0x14cd, 0x6600, 0x0201, 0x0201, US_FL_IGNORE_RESIDUE ), /* Reported by Michael Büsch */ -UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0114, +UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116, "JMicron", "USB to ATA/ATAPI Bridge", USB_SC_DEVICE, USB_PR_DEVICE, NULL, diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index c85ea530085f1..ccc113e83d88e 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -132,7 +132,7 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999, "JMicron", "JMS567", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_OPCODES), + US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES), /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, From ee10f5fe09105cc5495f00156f9a7914bbfa1c32 Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Tue, 10 Nov 2015 16:40:13 -0600 Subject: [PATCH 1477/2091] USB: cp210x: Remove CP2110 ID from compatibility list commit 7c90e610b60cd1ed6abafd806acfaedccbbe52d1 upstream. CP2110 ID (0x10c4, 0xea80) doesn't belong here because it's a HID and completely different from CP210x devices. Signed-off-by: Konstantin Shkolnyy Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index eac7ccaa3c859..7d4f51a32e66f 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -132,7 +132,6 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ - { USB_DEVICE(0x10C4, 0xEA80) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */ { USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */ { USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */ From 04db25a46b25ac5c29082a61f96067c8cd5984ad Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 10 Dec 2015 15:27:21 -0500 Subject: [PATCH 1478/2091] USB: add quirk for devices with broken LPM commit ad87e03213b552a5c33d5e1e7a19a73768397010 upstream. Some USB device / host controller combinations seem to have problems with Link Power Management. For example, Steinar found that his xHCI controller wouldn't handle bandwidth calculations correctly for two video cards simultaneously when LPM was enabled, even though the bus had plenty of bandwidth available. This patch introduces a new quirk flag for devices that should remain disabled for LPM, and creates quirk entries for Steinar's devices. Signed-off-by: Alan Stern Reported-by: Steinar H. Gunderson Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 7 ++++++- drivers/usb/core/quirks.c | 6 ++++++ include/linux/usb/quirks.h | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1e9a8c9aa531f..cd1fc3707ca17 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -124,6 +124,10 @@ struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev) int usb_device_supports_lpm(struct usb_device *udev) { + /* Some devices have trouble with LPM */ + if (udev->quirks & USB_QUIRK_NO_LPM) + return 0; + /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. */ @@ -4493,6 +4497,8 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1, goto fail; } + usb_detect_quirks(udev); + if (udev->wusb == 0 && le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) { retval = usb_get_bos_descriptor(udev); if (!retval) { @@ -4691,7 +4697,6 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, if (status < 0) goto loop; - usb_detect_quirks(udev); if (udev->quirks & USB_QUIRK_DELAY_INIT) msleep(1000); diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f5a381945db28..017c1de53aa56 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -199,6 +199,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Blackmagic Design Intensity Shuttle */ + { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM }, + + /* Blackmagic Design UltraStudio SDI */ + { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM }, + { } /* terminating entry must be last */ }; diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 9948c874e3f1e..1d0043dc34e42 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -47,4 +47,7 @@ /* device generates spurious wakeup, ignore remote wakeup capability */ #define USB_QUIRK_IGNORE_REMOTE_WAKEUP BIT(9) +/* device can't handle Link Power Management */ +#define USB_QUIRK_NO_LPM BIT(10) + #endif /* __LINUX_USB_QUIRKS_H */ From de04cf3c618b358d65c2b30340b5f9b0740d8cc8 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 21 Nov 2015 00:36:44 +0300 Subject: [PATCH 1479/2091] USB: whci-hcd: add check for dma mapping error commit f9fa1887dcf26bd346665a6ae3d3f53dec54cba1 upstream. qset_fill_page_list() do not check for dma mapping errors. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/whci/qset.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c index dc31c425ce017..9f1c0538b2112 100644 --- a/drivers/usb/host/whci/qset.c +++ b/drivers/usb/host/whci/qset.c @@ -377,6 +377,10 @@ static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_f if (std->pl_virt == NULL) return -ENOMEM; std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE); + if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) { + kfree(std->pl_virt); + return -EFAULT; + } for (p = 0; p < std->num_pointers; p++) { std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr); From 21d6ff27bb30b461ab7a7cfb418d8c45b9ba6887 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 18 Nov 2015 17:06:00 -0600 Subject: [PATCH 1480/2091] usb: gadget: pxa27x: fix suspend callback commit 391e6dcb37857d5659b53def2f41e2f56850d33c upstream. pxa27x disconnects pullups on suspend but doesn't notify the gadget driver about it, so gadget driver can't disable the endpoints it was using. This causes problems on resume because gadget core will think endpoints are still enabled and just ignore the following usb_ep_enable(). Fix this problem by calling gadget_driver->disconnect(). Tested-by: Robert Jarzmik Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/pxa27x_udc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index b51226abade62..7a454708e948a 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2535,6 +2535,9 @@ static int pxa_udc_suspend(struct platform_device *_dev, pm_message_t state) udc->pullup_resume = udc->pullup_on; dplus_pullup(udc, 0); + if (udc->driver) + udc->driver->disconnect(&udc->gadget); + return 0; } From 12ca1c84a29f68d215d60b1abc2960280176dac6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Nov 2015 17:18:40 +0100 Subject: [PATCH 1481/2091] usb: musb: USB_TI_CPPI41_DMA requires dmaengine support commit 183e53e8ddf4165c3763181682189362d6b403f7 upstream. The CPPI-4.1 driver selects TI_CPPI41, which is a dmaengine driver and that may not be available when CONFIG_DMADEVICES is not set: warning: (USB_TI_CPPI41_DMA) selects TI_CPPI41 which has unmet direct dependencies (DMADEVICES && ARCH_OMAP) This adds an extra dependency to avoid generating warnings in randconfig builds. Ideally we'd remove the 'select' statement, but that has the potential to break defconfig files. Signed-off-by: Arnd Bergmann Fixes: 411dd19c682d ("usb: musb: Kconfig: Select the DMA driver if DMA mode of MUSB is enabled") Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 39db8b603627c..d1b9e0c7fb0d4 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -147,7 +147,7 @@ config USB_TI_CPPI_DMA config USB_TI_CPPI41_DMA bool 'TI CPPI 4.1 (AM335x)' - depends on ARCH_OMAP + depends on ARCH_OMAP && DMADEVICES select TI_CPPI41 config USB_TUSB_OMAP_DMA From c922602c0d2ae1f435d16b6124cfa1137ce83a7e Mon Sep 17 00:00:00 2001 From: Hans Yang Date: Tue, 1 Dec 2015 16:54:59 +0800 Subject: [PATCH 1482/2091] usb: core : hub: Fix BOS 'NULL pointer' kernel panic commit 464ad8c43a9ead98c2b0eaed86bea727f2ad106e upstream. When a USB 3.0 mass storage device is disconnected in transporting state, storage device driver may handle it as a transport error and reset the device by invoking usb_reset_and_verify_device() and following could happen: in usb_reset_and_verify_device(): udev->bos = NULL; For U1/U2 enabled devices, driver will disable LPM, and in some conditions: from usb_unlocked_disable_lpm() --> usb_disable_lpm() --> usb_enable_lpm() udev->bos->ss_cap->bU1devExitLat; And it causes 'NULL pointer' and 'kernel panic': [ 157.976257] Unable to handle kernel NULL pointer dereference at virtual address 00000010 ... [ 158.026400] PC is at usb_enable_link_state+0x34/0x2e0 [ 158.031442] LR is at usb_enable_lpm+0x98/0xac ... [ 158.137368] [] usb_enable_link_state+0x34/0x2e0 [ 158.143451] [] usb_enable_lpm+0x94/0xac [ 158.148840] [] usb_disable_lpm+0xa8/0xb4 ... [ 158.214954] Kernel panic - not syncing: Fatal exception This commit moves 'udev->bos = NULL' behind usb_unlocked_disable_lpm() to prevent from NULL pointer access. Issue can be reproduced by following setup: 1) A SS pen drive behind a SS hub connected to the host. 2) Transporting data between the pen drive and the host. 3) Abruptly disconnect hub and pen drive from host. 4) With a chance it crashes. Signed-off-by: Hans Yang Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index cd1fc3707ca17..d68c4a4db6827 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5312,9 +5312,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (udev->usb2_hw_lpm_enabled == 1) usb_set_usb2_hardware_lpm(udev, 0); - bos = udev->bos; - udev->bos = NULL; - /* Disable LPM and LTM while we reset the device and reinstall the alt * settings. Device-initiated LPM settings, and system exit latency * settings are cleared when the device is reset, so we have to set @@ -5323,15 +5320,18 @@ static int usb_reset_and_verify_device(struct usb_device *udev) ret = usb_unlocked_disable_lpm(udev); if (ret) { dev_err(&udev->dev, "%s Failed to disable LPM\n.", __func__); - goto re_enumerate; + goto re_enumerate_no_bos; } ret = usb_disable_ltm(udev); if (ret) { dev_err(&udev->dev, "%s Failed to disable LTM\n.", __func__); - goto re_enumerate; + goto re_enumerate_no_bos; } + bos = udev->bos; + udev->bos = NULL; + for (i = 0; i < SET_CONFIG_TRIES; ++i) { /* ep0 maxpacket size may change; let the HCD know about it. @@ -5428,10 +5428,11 @@ static int usb_reset_and_verify_device(struct usb_device *udev) return 0; re_enumerate: - /* LPM state doesn't matter when we're about to destroy the device. */ - hub_port_logical_disconnect(parent_hub, port1); usb_release_bos_descriptor(udev); udev->bos = bos; +re_enumerate_no_bos: + /* LPM state doesn't matter when we're about to destroy the device. */ + hub_port_logical_disconnect(parent_hub, port1); return -ENODEV; } From 2518cb2956c7774dcbf5f361bdd9c126ec32b310 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 18 Nov 2015 02:01:21 +0000 Subject: [PATCH 1483/2091] usb: Use the USB_SS_MULT() macro to decode burst multiplier for log message commit 5377adb092664d336ac212499961cac5e8728794 upstream. usb_parse_ss_endpoint_companion() now decodes the burst multiplier correctly in order to check that it's <= 3, but still uses the wrong expression if warning that it's > 3. Fixes: ff30cbc8da42 ("usb: Use the USB_SS_MULT() macro to get the ...") Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index b9ddf0c1ffe59..894894f2ff935 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -115,7 +115,8 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, USB_SS_MULT(desc->bmAttributes) > 3) { dev_warn(ddev, "Isoc endpoint has Mult of %d in " "config %d interface %d altsetting %d ep %d: " - "setting to 3\n", desc->bmAttributes + 1, + "setting to 3\n", + USB_SS_MULT(desc->bmAttributes), cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 2; } From 2e7b09211aaadac86fe3ae8e0a94566bfb2ac83d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 3 Dec 2015 16:49:32 +0100 Subject: [PATCH 1484/2091] pppoe: fix memory corruption in padt work structure [ Upstream commit fe53985aaac83d516b38358d4f39921d9942a0e2 ] pppoe_connect() mustn't touch the padt_work field of pppoe sockets because that work could be already pending. [ 21.473147] BUG: unable to handle kernel NULL pointer dereference at 00000004 [ 21.474523] IP: [] process_one_work+0x29/0x31c [ 21.475164] *pde = 00000000 [ 21.475513] Oops: 0000 [#1] SMP [ 21.475910] Modules linked in: pppoe pppox ppp_generic slhc crc32c_intel aesni_intel virtio_net xts aes_i586 lrw gf128mul ablk_helper cryptd evdev acpi_cpufreq processor serio_raw button ext4 crc16 mbcache jbd2 virtio_blk virtio_pci virtio_ring virtio [ 21.476168] CPU: 2 PID: 164 Comm: kworker/2:2 Not tainted 4.4.0-rc1 #1 [ 21.476168] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 21.476168] task: f5f83c00 ti: f5e28000 task.ti: f5e28000 [ 21.476168] EIP: 0060:[] EFLAGS: 00010046 CPU: 2 [ 21.476168] EIP is at process_one_work+0x29/0x31c [ 21.484082] EAX: 00000000 EBX: f678b2a0 ECX: 00000004 EDX: 00000000 [ 21.484082] ESI: f6c69940 EDI: f5e29ef0 EBP: f5e29f0c ESP: f5e29edc [ 21.484082] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 21.484082] CR0: 80050033 CR2: 000000a4 CR3: 317ad000 CR4: 00040690 [ 21.484082] Stack: [ 21.484082] 00000000 f6c69950 00000000 f6c69940 c0042338 f5e29f0c c1327945 00000000 [ 21.484082] 00000008 f678b2a0 f6c69940 f678b2b8 f5e29f30 c1043984 f5f83c00 f6c69970 [ 21.484082] f678b2a0 c10437d3 f6775e80 f678b2a0 c10437d3 f5e29fac c1047059 f5e29f74 [ 21.484082] Call Trace: [ 21.484082] [] ? _raw_spin_lock_irq+0x28/0x30 [ 21.484082] [] worker_thread+0x1b1/0x244 [ 21.484082] [] ? rescuer_thread+0x229/0x229 [ 21.484082] [] ? rescuer_thread+0x229/0x229 [ 21.484082] [] kthread+0x8f/0x94 [ 21.484082] [] ? _raw_spin_unlock_irq+0x22/0x26 [ 21.484082] [] ret_from_kernel_thread+0x21/0x38 [ 21.484082] [] ? kthread_parkme+0x19/0x19 [ 21.496082] Code: 5d c3 55 89 e5 57 56 53 89 c3 83 ec 24 89 d0 89 55 e0 8d 7d e4 e8 6c d8 ff ff b9 04 00 00 00 89 45 d8 8b 43 24 89 45 dc 8b 45 d8 <8b> 40 04 8b 80 e0 00 00 00 c1 e8 05 24 01 88 45 d7 8b 45 e0 8d [ 21.496082] EIP: [] process_one_work+0x29/0x31c SS:ESP 0068:f5e29edc [ 21.496082] CR2: 0000000000000004 [ 21.496082] ---[ end trace e362cc9cf10dae89 ]--- Reported-by: Andrew Fixes: 287f3a943fef ("pppoe: Use workqueue to die properly when a PADT is received") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index ab33262ed8260..9c8fabed4444f 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -568,6 +568,9 @@ static int pppoe_create(struct net *net, struct socket *sock) sk->sk_family = PF_PPPOX; sk->sk_protocol = PX_PROTO_OE; + INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work, + pppoe_unbind_sock_work); + return 0; } @@ -632,8 +635,6 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); - INIT_WORK(&po->proto.pppoe.padt_work, pppoe_unbind_sock_work); - error = -EINVAL; if (sp->sa_protocol != PX_PROTO_OE) goto end; @@ -663,8 +664,13 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, po->pppoe_dev = NULL; } - memset(sk_pppox(po) + 1, 0, - sizeof(struct pppox_sock) - sizeof(struct sock)); + po->pppoe_ifindex = 0; + memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); + memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); + memset(&po->chan, 0, sizeof(po->chan)); + po->next = NULL; + po->num = 0; + sk->sk_state = PPPOX_NONE; } From 5bb18eb6e464694ca851bd9b86d64ab747ecbcbd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 3 Dec 2015 17:21:50 +0100 Subject: [PATCH 1485/2091] gre6: allow to update all parameters via rtnl [ Upstream commit 6a61d4dbf4f54b5683e0f1e58d873cecca7cb977 ] Parameters were updated only if the kernel was unable to find the tunnel with the new parameters, ie only if core pamareters were updated (keys, addr, link, type). Now it's possible to update ttl, hoplimit, flowinfo and flags. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 69f4f689f06af..76be7d311cc4e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1553,13 +1553,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; } else { t = nt; - - ip6gre_tunnel_unlink(ign, t); - ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); - ip6gre_tunnel_link(ign, t); - netdev_state_change(dev); } + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); return 0; } From e4907d0a5567efbe2fe2e31d372a3e72d1f7f871 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Fri, 4 Dec 2015 09:50:00 +0100 Subject: [PATCH 1486/2091] atl1c: Improve driver not to do order 4 GFP_ATOMIC allocation [ Upstream commit f2a3771ae8aca879c32336c76ad05a017629bae2 ] atl1c driver is doing order-4 allocation with GFP_ATOMIC priority. That often breaks networking after resume. Switch to GFP_KERNEL. Still not ideal, but should be significantly better. atl1c_setup_ring_resources() is called from .open() function, and already uses GFP_KERNEL, so this change is safe. Signed-off-by: Pavel Machek Acked-by: Michal Hocko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 932bd1862f7ad..6e9036a06515f 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -1014,13 +1014,12 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter) sizeof(struct atl1c_recv_ret_status) * rx_desc_count + 8 * 4; - ring_header->desc = pci_alloc_consistent(pdev, ring_header->size, - &ring_header->dma); + ring_header->desc = dma_zalloc_coherent(&pdev->dev, ring_header->size, + &ring_header->dma, GFP_KERNEL); if (unlikely(!ring_header->desc)) { - dev_err(&pdev->dev, "pci_alloc_consistend failed\n"); + dev_err(&pdev->dev, "could not get memory for DMA buffer\n"); goto err_nomem; } - memset(ring_header->desc, 0, ring_header->size); /* init TPD ring */ tpd_ring[0].dma = roundup(ring_header->dma, 8); From a7a97780df2c5d34f0ab7a9a748708dc5e98a087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 4 Dec 2015 14:15:08 +0100 Subject: [PATCH 1487/2091] ipv6: keep existing flags when setting IFA_F_OPTIMISTIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9a1ec4612c9bfc94d4185e3459055a37a685e575 ] Commit 64236f3f3d74 ("ipv6: introduce IFA_F_STABLE_PRIVACY flag") failed to update the setting of the IFA_F_OPTIMISTIC flag, causing the IFA_F_STABLE_PRIVACY flag to be lost if IFA_F_OPTIMISTIC is set. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Cc: Hannes Frederic Sowa Fixes: 64236f3f3d74 ("ipv6: introduce IFA_F_STABLE_PRIVACY flag") Signed-off-by: Bjørn Mork Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fd3aa6148dd18..9d067b349c8ce 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2384,7 +2384,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && !net->ipv6.devconf_all->forwarding && sllao) - addr_flags = IFA_F_OPTIMISTIC; + addr_flags |= IFA_F_OPTIMISTIC; #endif /* Do not allow to create too much of autoconfigured From d599ae203c46fa6de7af4564c3fef8015f3012ca Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 4 Dec 2015 13:54:03 +0100 Subject: [PATCH 1488/2091] vxlan: fix incorrect RCO bit in VXLAN header [ Upstream commit c5fb8caaf91ea6a92920cf24db10cfc94d58de0f ] Commit 3511494ce2f3d ("vxlan: Group Policy extension") changed definition of VXLAN_HF_RCO from 0x00200000 to BIT(24). This is obviously incorrect. It's also in violation with the RFC draft. Fixes: 3511494ce2f3d ("vxlan: Group Policy extension") Cc: Thomas Graf Cc: Tom Herbert Signed-off-by: Jiri Benc Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/vxlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0082b5d33d7d3..7ef9272a405aa 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -78,7 +78,7 @@ struct vxlanhdr { }; /* VXLAN header flags. */ -#define VXLAN_HF_RCO BIT(24) +#define VXLAN_HF_RCO BIT(21) #define VXLAN_HF_VNI BIT(27) #define VXLAN_HF_GBP BIT(31) From 497d78eab755c11adf31fbb3fdbaf27c16f758aa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 4 Dec 2015 15:14:03 -0200 Subject: [PATCH 1489/2091] sctp: use the same clock as if sock source timestamps were on [ Upstream commit cb5e173ed7c03a0d4630ce68a95a186cce3cc872 ] SCTP echoes a cookie o INIT ACK chunks that contains a timestamp, for detecting stale cookies. This cookie is echoed back to the server by the client and then that timestamp is checked. Thing is, if the listening socket is using packet timestamping, the cookie is encoded with ktime_get() value and checked against ktime_get_real(), as done by __net_timestamp(). The fix is to sctp also use ktime_get_real(), so we can compare bananas with bananas later no matter if packet timestamping was enabled or not. Fixes: 52db882f3fc2 ("net: sctp: migrate cookie life from timeval to ktime") Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 06320c8c1c866..83a07d4686440 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Set an expiration time for the cookie. */ cookie->c.expiration = ktime_add(asoc->cookie_life, - ktime_get()); + ktime_get_real()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, @@ -1780,7 +1780,7 @@ struct sctp_association *sctp_unpack_cookie( if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) kt = skb_get_ktime(skb); else - kt = ktime_get(); + kt = ktime_get_real(); if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* From 2e2b937a300e338bbe6c131181a97af17a9eb15e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 4 Dec 2015 15:14:04 -0200 Subject: [PATCH 1490/2091] sctp: update the netstamp_needed counter when copying sockets [ Upstream commit 01ce63c90170283a9855d1db4fe81934dddce648 ] Dmitry Vyukov reported that SCTP was triggering a WARN on socket destroy related to disabling sock timestamp. When SCTP accepts an association or peel one off, it copies sock flags but forgot to call net_enable_timestamp() if a packet timestamping flag was copied, leading to extra calls to net_disable_timestamp() whenever such clones were closed. The fix is to call net_enable_timestamp() whenever we copy a sock with that flag on, like tcp does. Reported-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 2 ++ net/core/sock.c | 2 -- net/sctp/socket.c | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ed01a012f8d58..ec468b0c4e990 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -722,6 +722,8 @@ enum sock_flags { SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ }; +#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) + static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) { nsk->sk_flags = osk->sk_flags; diff --git a/net/core/sock.c b/net/core/sock.c index dc30dc5bb1b89..6c5aab35c42cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -421,8 +421,6 @@ static void sock_warn_obsolete_bsdism(const char *name) } } -#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) - static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 66d7960750509..3a1cb52665ae9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7207,6 +7207,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_ttl = 1; newinet->mc_index = 0; newinet->mc_list = NULL; + + if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) + net_enable_timestamp(); } static inline void sctp_copy_descendant(struct sock *sk_to, From 9a03ee76fc125c1199a9e36fff031a7627a9fae1 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 4 Dec 2015 15:14:05 -0200 Subject: [PATCH 1491/2091] sctp: also copy sk_tsflags when copying the socket [ Upstream commit 50a5ffb1ef535e3c6989711c51b5d61b543a3b45 ] As we are keeping timestamps on when copying the socket, we also have to copy sk_tsflags. This is needed since b9f40e21ef42 ("net-timestamp: move timestamp flags out of sk_flags"). Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3a1cb52665ae9..a63c2c87a0c6c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7175,6 +7175,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; + newsk->sk_tsflags = sk->sk_tsflags; newsk->sk_no_check_tx = sk->sk_no_check_tx; newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; From b8a3d39fea6c2ba60c15d61989436a36b3a42064 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 4 Dec 2015 16:29:10 +0100 Subject: [PATCH 1492/2091] net: qca_spi: fix transmit queue timeout handling [ Upstream commit ed7d42e24effbd3681e909711a7a2119a85e9217 ] In case of a tx queue timeout every transmit is blocked until the QCA7000 resets himself and triggers a sync which makes the driver flushs the tx ring. So avoid this blocking situation by triggering the sync immediately after the timeout. Waking the queue doesn't make sense in this situation. Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/qca_spi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 6af028d5f9bcb..97e4df9bf407b 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -736,9 +736,8 @@ qcaspi_netdev_tx_timeout(struct net_device *dev) netdev_info(qca->net_dev, "Transmit timeout at %ld, latency %ld\n", jiffies, jiffies - dev->trans_start); qca->net_dev->stats.tx_errors++; - /* wake the queue if there is room */ - if (qcaspi_tx_ring_has_space(&qca->txr)) - netif_wake_queue(dev); + /* Trigger tx queue flush and QCA7000 reset */ + qca->sync = QCASPI_SYNC_UNKNOWN; } static int From dbc2d32fe4c0bb6ab3028ad986c42577ea059b6e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 8 Dec 2015 12:17:42 +0100 Subject: [PATCH 1493/2091] r8152: fix lockup when runtime PM is enabled [ Upstream commit 90186af404ada5a47b875bf3c16d0b02bb023ea0 ] When an interface is brought up which was previously suspended (via runtime PM), it would hang. This happens because napi_disable is called before napi_enable. Solve this by avoiding napi_enable in the resume during open function (netif_running is true when open is called, IFF_UP is set after a successful open; netif_running is false when close is called, but IFF_UP is then still set). While at it, remove WORK_ENABLE check from rtl8152_open (introduced with the original change) because it cannot happen: - After this patch, runtime resume will not set it during rtl8152_open. - When link is up, rtl8152_open is not called. - When link is down during system/auto suspend/resume, it is not set. Fixes: 41cec84cf285 ("r8152: don't enable napi before rx ready") Link: https://lkml.kernel.org/r/20151205105912.GA1766@al Signed-off-by: Peter Wu Acked-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index aafa1a1898e43..ce6fad1c43e64 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3006,17 +3006,6 @@ static int rtl8152_open(struct net_device *netdev) mutex_lock(&tp->control); - /* The WORK_ENABLE may be set when autoresume occurs */ - if (test_bit(WORK_ENABLE, &tp->flags)) { - clear_bit(WORK_ENABLE, &tp->flags); - usb_kill_urb(tp->intr_urb); - cancel_delayed_work_sync(&tp->schedule); - - /* disable the tx/rx, if the workqueue has enabled them. */ - if (netif_carrier_ok(netdev)) - tp->rtl_ops.disable(tp); - } - tp->rtl_ops.up(tp); rtl8152_set_speed(tp, AUTONEG_ENABLE, @@ -3063,12 +3052,6 @@ static int rtl8152_close(struct net_device *netdev) } else { mutex_lock(&tp->control); - /* The autosuspend may have been enabled and wouldn't - * be disable when autoresume occurs, because the - * netif_running() would be false. - */ - rtl_runtime_suspend_enable(tp, false); - tp->rtl_ops.down(tp); mutex_unlock(&tp->control); @@ -3369,7 +3352,7 @@ static int rtl8152_resume(struct usb_interface *intf) netif_device_attach(tp->netdev); } - if (netif_running(tp->netdev)) { + if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) { if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); @@ -3387,6 +3370,8 @@ static int rtl8152_resume(struct usb_interface *intf) } usb_submit_urb(tp->intr_urb, GFP_KERNEL); } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { + if (tp->netdev->flags & IFF_UP) + rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); } From 683f40ec958e669b0d376a0d83c0213783ee7f99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Dec 2015 07:25:06 -0800 Subject: [PATCH 1494/2091] ipv6: sctp: clone options to avoid use after free [ Upstream commit 9470e24f35ab81574da54e69df90c1eb4a96b43f ] SCTP is lacking proper np->opt cloning at accept() time. TCP and DCCP use ipv6_dup_options() helper, do the same in SCTP. We might later factorize this code in a common helper to avoid future mistakes. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0e4198ee23708..3267a5cbb3e86 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -634,6 +634,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; + struct ipv6_txoptions *opt; newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); if (!newsk) @@ -653,6 +654,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); + /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() * and getpeername(). */ From bc8f79b522b57ca79a676615003d85b08162ff5a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 14 Dec 2015 22:03:39 +0100 Subject: [PATCH 1495/2091] net: add validation for the socket syscall protocol argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79462ad02e861803b3840cc782248c7359451cd9 ] 郭永刚 reported that one could simply crash the kernel as root by using a simple program: int socket_fd; struct sockaddr_in addr; addr.sin_port = 0; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_family = 10; socket_fd = socket(10,3,0x40000000); connect(socket_fd , &addr,16); AF_INET, AF_INET6 sockets actually only support 8-bit protocol identifiers. inet_sock's skc_protocol field thus is sized accordingly, thus larger protocol identifiers simply cut off the higher bits and store a zero in the protocol fields. This could lead to e.g. NULL function pointer because as a result of the cut off inet_num is zero and we call down to inet_autobind, which is NULL for raw sockets. kernel: Call Trace: kernel: [] ? inet_autobind+0x2e/0x70 kernel: [] inet_dgram_connect+0x54/0x80 kernel: [] SYSC_connect+0xd9/0x110 kernel: [] ? ptrace_notify+0x5b/0x80 kernel: [] ? syscall_trace_enter_phase2+0x108/0x200 kernel: [] SyS_connect+0xe/0x10 kernel: [] tracesys_phase2+0x84/0x89 I found no particular commit which introduced this problem. CVE: CVE-2015-8543 Cc: Cong Wang Reported-by: 郭永刚 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 1 + net/ax25/af_ax25.c | 3 +++ net/decnet/af_decnet.c | 3 +++ net/ipv4/af_inet.c | 3 +++ net/ipv6/af_inet6.c | 3 +++ net/irda/af_irda.c | 3 +++ 6 files changed, 16 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index ec468b0c4e990..f58132440d629 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -386,6 +386,7 @@ struct sock { sk_no_check_rx : 1, sk_userlocks : 4, sk_protocol : 8, +#define SK_PROTOCOL_MAX U8_MAX sk_type : 16; kmemcheck_bitfield_end(flags); int sk_wmem_queued; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 330c1f4a5a0b6..a64884bbf0cea 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -806,6 +806,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; ax25_cb *ax25; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 754484b3cd0e8..2783c538ec193 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, { struct sock *sk; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a5aa54ea65336..0cc98b135b8fe 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -259,6 +259,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (protocol < 0 || protocol >= IPPROTO_MAX) + return -EINVAL; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2d044d2a2ccff..bad62fa5e70fe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (protocol < 0 || protocol >= IPPROTO_MAX) + return -EINVAL; + /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ee0ea25c8e7aa..9a1edcde4ba54 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1086,6 +1086,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; struct irda_sock *self; + if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + return -EINVAL; + if (net != &init_net) return -EAFNOSUPPORT; From 8fa4abd57ad4ef9b2ff9beda2eb29368ae10ae9e Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 4 Dec 2015 01:45:40 +0300 Subject: [PATCH 1496/2091] sh_eth: fix kernel oops in skb_put() [ Upstream commit 248be83dcb3feb3f6332eb3d010a016402138484 ] In a low memory situation the following kernel oops occurs: Unable to handle kernel NULL pointer dereference at virtual address 00000050 pgd = 8490c000 [00000050] *pgd=4651e831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] PREEMPT ARM Modules linked in: CPU: 0 Not tainted (3.4-at16 #9) PC is at skb_put+0x10/0x98 LR is at sh_eth_poll+0x2c8/0xa10 pc : [<8035f780>] lr : [<8028bf50>] psr: 60000113 sp : 84eb1a90 ip : 84eb1ac8 fp : 84eb1ac4 r10: 0000003f r9 : 000005ea r8 : 00000000 r7 : 00000000 r6 : 940453b0 r5 : 00030000 r4 : 9381b180 r3 : 00000000 r2 : 00000000 r1 : 000005ea r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 10c53c7d Table: 4248c059 DAC: 00000015 Process klogd (pid: 2046, stack limit = 0x84eb02e8) [...] This is because netdev_alloc_skb() fails and 'mdp->rx_skbuff[entry]' is left NULL but sh_eth_rx() later uses it without checking. Add such check... Reported-by: Yasushi SHOJI Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 7fb244f565b28..13463c4acc86f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1481,6 +1481,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) if (mdp->cd->shift_rd0) desc_status >>= 16; + skb = mdp->rx_skbuff[entry]; if (desc_status & (RD_RFS1 | RD_RFS2 | RD_RFS3 | RD_RFS4 | RD_RFS5 | RD_RFS6 | RD_RFS10)) { ndev->stats.rx_errors++; @@ -1496,12 +1497,11 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) ndev->stats.rx_missed_errors++; if (desc_status & RD_RFS10) ndev->stats.rx_over_errors++; - } else { + } else if (skb) { if (!mdp->cd->hw_swap) sh_eth_soft_swap( phys_to_virt(ALIGN(rxdesc->addr, 4)), pkt_len + 2); - skb = mdp->rx_skbuff[entry]; mdp->rx_skbuff[entry] = NULL; if (mdp->cd->rpadir) skb_reserve(skb, NET_IP_ALIGN); From 2dea635ac36ee66cae8a9c001694d0b72dd1d939 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Dec 2015 14:08:53 -0800 Subject: [PATCH 1497/2091] net: fix IP early demux races [ Upstream commit 5037e9ef9454917b047f9f3a19b4dd179fbf7cd4 ] David Wilder reported crashes caused by dst reuse. I am seeing a crash on a distro V4.2.3 kernel caused by a double release of a dst_entry. In ipv4_dst_destroy() the call to list_empty() finds a poisoned next pointer, indicating the dst_entry has already been removed from the list and freed. The crash occurs 18 to 24 hours into a run of a network stress exerciser. Thanks to his detailed report and analysis, we were able to understand the core issue. IP early demux can associate a dst to skb, after a lookup in TCP/UDP sockets. When socket cache is not properly set, we want to store into sk->sk_dst_cache the dst for future IP early demux lookups, by acquiring a stable refcount on the dst. Problem is this acquisition is simply using an atomic_inc(), which works well, unless the dst was queued for destruction from dst_release() noticing dst refcount went to zero, if DST_NOCACHE was set on dst. We need to make sure current refcount is not zero before incrementing it, or risk double free as David reported. This patch, being a stable candidate, adds two new helpers, and use them only from IP early demux problematic paths. It might be possible to merge in net-next skb_dst_force() and skb_dst_force_safe(), but I prefer having the smallest patch for stable kernels : Maybe some skb_dst_force() callers do not expect skb->dst can suddenly be cleared. Can probably be backported back to linux-3.6 kernels Reported-by: David J. Wilder Tested-by: David J. Wilder Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst.h | 33 +++++++++++++++++++++++++++++++++ include/net/sock.h | 2 +- net/ipv4/tcp_ipv4.c | 5 ++--- net/ipv6/tcp_ipv6.c | 3 +-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 0fb99a26e9737..182b812d45e1b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -312,6 +312,39 @@ static inline void skb_dst_force(struct sk_buff *skb) } } +/** + * dst_hold_safe - Take a reference on a dst if possible + * @dst: pointer to dst entry + * + * This helper returns false if it could not safely + * take a reference on a dst. + */ +static inline bool dst_hold_safe(struct dst_entry *dst) +{ + if (dst->flags & DST_NOCACHE) + return atomic_inc_not_zero(&dst->__refcnt); + dst_hold(dst); + return true; +} + +/** + * skb_dst_force_safe - makes sure skb dst is refcounted + * @skb: buffer + * + * If dst is not yet refcounted and not destroyed, grab a ref on it. + */ +static inline void skb_dst_force_safe(struct sk_buff *skb) +{ + if (skb_dst_is_noref(skb)) { + struct dst_entry *dst = skb_dst(skb); + + if (!dst_hold_safe(dst)) + dst = NULL; + + skb->_skb_refdst = (unsigned long)dst; + } +} + /** * __skb_tunnel_rx - prepare skb for rx reinsert diff --git a/include/net/sock.h b/include/net/sock.h index f58132440d629..4c4b21c008280 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -799,7 +799,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force(skb); + skb_dst_force_safe(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 88203e755af87..cd18c3d3251e1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1509,7 +1509,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (likely(sk->sk_rx_dst)) skb_dst_drop(skb); else - skb_dst_force(skb); + skb_dst_force_safe(skb); __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; @@ -1714,8 +1714,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { - dst_hold(dst); + if (dst && dst_hold_safe(dst)) { sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cfb27f56c62fd..c1938ad39f8cb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { + if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; if (rt->rt6i_node) From f167b6f4244fbc8d05fcc385b1bf8e70729c9e7c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 14 Dec 2015 13:48:36 -0800 Subject: [PATCH 1498/2091] pptp: verify sockaddr_len in pptp_bind() and pptp_connect() [ Upstream commit 09ccfd238e5a0e670d8178cf50180ea81ae09ae1 ] Reported-by: Dmitry Vyukov Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pptp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index e3bfbd4d01367..0bacabfa486e5 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -420,6 +420,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, struct pptp_opt *opt = &po->proto.pptp; int error = 0; + if (sockaddr_len < sizeof(struct sockaddr_pppox)) + return -EINVAL; + lock_sock(sk); opt->src_addr = sp->sa_addr.pptp; @@ -441,6 +444,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, struct flowi4 fl4; int error = 0; + if (sockaddr_len < sizeof(struct sockaddr_pppox)) + return -EINVAL; + if (sp->sa_protocol != PX_PROTO_PPTP) return -EINVAL; From bdaa853b10bc9daebf001f365e1da9287c730f6a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 16 Nov 2015 15:43:44 -0500 Subject: [PATCH 1499/2091] vlan: Fix untag operations of stacked vlans with REORDER_HEADER off [ Upstream commit a6e18ff111701b4ff6947605bfbe9594ec42a6e8 ] When we have multiple stacked vlan devices all of which have turned off REORDER_HEADER flag, the untag operation does not locate the ethernet addresses correctly for nested vlans. The reason is that in case of REORDER_HEADER flag being off, the outer vlan headers are put back and the mac_len is adjusted to account for the presense of the header. Then, the subsequent untag operation, for the next level vlan, always use VLAN_ETH_HLEN to locate the begining of the ethernet header and that ends up being a multiple of 4 bytes short of the actuall beginning of the mac header (the multiple depending on the how many vlan encapsulations ethere are). As a reslult, if there are multiple levles of vlan devices with REODER_HEADER being off, the recevied packets end up being dropped. To solve this, we use skb->mac_len as the offset. The value is always set on receive path and starts out as a ETH_HLEN. The value is also updated when the vlan header manupations occur so we know it will be correct. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 075d2e78c87ee..13ddacb93c972 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4200,7 +4200,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len, + 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; } From 2af7df8919a74e4567878b1415f4dd5edbb75163 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 14 Dec 2015 17:44:10 -0500 Subject: [PATCH 1500/2091] skbuff: Fix offset error in skb_reorder_vlan_header [ Upstream commit f654861569872d10dcb79d9d7ca219b316f94ff0 ] skb_reorder_vlan_header is called after the vlan header has been pulled. As a result the offset of the begining of the mac header has been incrased by 4 bytes (VLAN_HLEN). When moving the mac addresses, include this incrase in the offset calcualation so that the mac addresses are copied correctly. Fixes: a6e18ff1117 (vlan: Fix untag operations of stacked vlans with REORDER_HEADER off) CC: Nicolas Dichtel CC: Patrick McHardy Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 13ddacb93c972..f3f7d0abcf63f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4200,7 +4200,7 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len, + memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; From bd30c2ee7e5dc8258476801a2ec7039a47e2078e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 16 Dec 2015 23:39:04 -0800 Subject: [PATCH 1501/2091] net: check both type and procotol for tcp sockets [ Upstream commit ac5cc977991d2dce85fc734a6c71ddb33f6fe3c1 ] Dmitry reported the following out-of-bound access: Call Trace: [] __asan_report_load4_noabort+0x3e/0x40 mm/kasan/report.c:294 [] sock_setsockopt+0x1284/0x13d0 net/core/sock.c:880 [< inline >] SYSC_setsockopt net/socket.c:1746 [] SyS_setsockopt+0x1fe/0x240 net/socket.c:1729 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 This is because we mistake a raw socket as a tcp socket. We should check both sk->sk_type and sk->sk_protocol to ensure it is a tcp socket. Willem points out __skb_complete_tx_timestamp() needs to fix as well. Reported-by: Dmitry Vyukov Cc: Willem de Bruijn Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 3 ++- net/core/sock.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3f7d0abcf63f..2e5fcda165705 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3661,7 +3661,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_info = tstype; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= sk->sk_tskey; } diff --git a/net/core/sock.c b/net/core/sock.c index 6c5aab35c42cb..47fc8bb3b9462 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -859,7 +859,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { - if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) { if (sk->sk_state != TCP_ESTABLISHED) { ret = -EINVAL; break; From 262ee9759e5f7b0db137fd6ee5780f92e119ff6e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Dec 2015 09:43:12 -0800 Subject: [PATCH 1502/2091] net_sched: make qdisc_tree_decrease_qlen() work for non mq [ Upstream commit 225734de70cd0a9e0b978f3583a4a87939271d5e ] Stas Nichiporovich reported a regression in his HFSC qdisc setup on a non multi queue device. It turns out I mistakenly added a TCQ_F_NOPARENT flag on all qdisc allocated in qdisc_create() for non multi queue devices, which was rather buggy. I was clearly mislead by the TCQ_F_ONETXQUEUE that is also set here for no good reason, since it only matters for the root qdisc. Fixes: 4eaf3b84f288 ("net_sched: fix qdisc_tree_decrease_qlen() races") Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d4b6f3682c14c..68c599a5e1d1d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -950,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); if (!netif_is_multiqueue(dev)) - sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + sch->flags |= TCQ_F_ONETXQUEUE; } sch->handle = handle; From 1814295365a02ff174948159f1eececa55fe4f97 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Dec 2015 15:39:08 -0500 Subject: [PATCH 1503/2091] bluetooth: Validate socket address length in sco_sock_bind(). [ Upstream commit 5233252fce714053f0151680933571a2da9cbfb4 ] Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/sco.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4322c833e7489..8611bc7bdd327 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -520,6 +520,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_sco)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != BT_OPEN) { From 1c72e110be7f6860ccb4e226beb34e2c4537f6bb Mon Sep 17 00:00:00 2001 From: "tadeusz.struk@intel.com" Date: Tue, 15 Dec 2015 10:46:17 -0800 Subject: [PATCH 1504/2091] net: fix uninitialized variable issue [ Upstream commit 130ed5d105dde141e7fe60d5440aa53e0a84f13b ] msg_iocb needs to be initialized on the recv/recvfrom path. Otherwise afalg will wrongly interpret it as an async call. Cc: stable@vger.kernel.org Reported-by: Harald Freudenberger Signed-off-by: Tadeusz Struk Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/socket.c b/net/socket.c index 884e329976984..dcbfa868e3984 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1705,6 +1705,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_name = addr ? (struct sockaddr *)&address : NULL; /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; + msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); From abf79193eb35f1786a3e22fda24cb6cd32b0083b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 15 Dec 2015 22:59:12 +0100 Subject: [PATCH 1505/2091] ipv6: automatically enable stable privacy mode if stable_secret set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b29c6962b70f232cde4076b1020191e1be0889d ] Bjørn reported that while we switch all interfaces to privacy stable mode when setting the secret, we don't set this mode for new interfaces. This does not make sense, so change this behaviour. Fixes: 622c81d57b392cc ("ipv6: generation of stable privacy addresses for link-local and autoconf") Reported-by: Bjørn Mork Cc: Bjørn Mork Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9d067b349c8ce..a2d685030a34f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -343,6 +343,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) setup_timer(&ndev->rs_timer, addrconf_rs_timer, (unsigned long)ndev); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); + + if (ndev->cnf.stable_secret.initialized) + ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + else + ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64; + ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); From 78e1970233fd270349834bb8b7ca84c58c7c974c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 16 Dec 2015 18:13:14 +0800 Subject: [PATCH 1506/2091] rhashtable: Enforce minimum size on initial hash table [ Upstream commit 3a324606bbabfc30084ce9d08169910773ba9a92 ] William Hua wrote: > > I wasn't aware there was an enforced minimum size. I simply set the > nelem_hint in the rhastable_params struct to 1, expecting it to grow as > needed. This caused a segfault afterwards when trying to insert an > element. OK we're doing the size computation before we enforce the limit on min_size. ---8<--- We need to do the initial hash table size computation after we have obtained the correct min_size/max_size parameters. Otherwise we may end up with a hash table whose size is outside the allowed envelope. Fixes: a998f712f77e ("rhashtable: Round up/down min/max_size to...") Reported-by: William Hua Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index cf910e48f8f21..4ff003e1d9659 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -730,9 +730,6 @@ int rhashtable_init(struct rhashtable *ht, if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) return -EINVAL; - if (params->nelem_hint) - size = rounded_hashtable_size(params); - memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); @@ -752,6 +749,9 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + if (params->nelem_hint) + size = rounded_hashtable_size(&ht->p); + /* The maximum (not average) chain length grows with the * size of the hash table, at a rate of (log N)/(log log N). * The value of 16 is selected so that even if the hash From 2bc1b5b3a634081d49dbb16aab2b55cec1eb55b9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 15 Dec 2015 21:01:53 +0100 Subject: [PATCH 1507/2091] fou: clean up socket with kfree_rcu [ Upstream commit 3036facbb7be3a169e35be3b271162b0fa564a2d ] fou->udp_offloads is managed by RCU. As it is actually included inside the fou sockets, we cannot let the memory go out of scope before a grace period. We either can synchronize_rcu or switch over to kfree_rcu to manage the sockets. kfree_rcu seems appropriate as it is used by vxlan and geneve. Fixes: 23461551c00628c ("fou: Support for foo-over-udp RX path") Cc: Tom Herbert Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fou.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 34968cd5c1464..4b67937692c99 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -24,6 +24,7 @@ struct fou { u16 type; struct udp_offload udp_offloads; struct list_head list; + struct rcu_head rcu; }; #define FOU_F_REMCSUM_NOPARTIAL BIT(0) @@ -421,7 +422,7 @@ static void fou_release(struct fou *fou) list_del(&fou->list); udp_tunnel_sock_release(sock); - kfree(fou); + kfree_rcu(fou, rcu); } static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) From cc01a0aa28400e2fe92251edf8621e91402d51b4 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Wed, 16 Dec 2015 20:09:25 +0000 Subject: [PATCH 1508/2091] af_unix: Revert 'lock_interruptible' in stream receive code [ Upstream commit 3822b5c2fc62e3de8a0f33806ff279fb7df92432 ] With b3ca9b02b00704053a38bfe4c31dbbb9c13595d0, the AF_UNIX SOCK_STREAM receive code was changed from using mutex_lock(&u->readlock) to mutex_lock_interruptible(&u->readlock) to prevent signals from being delayed for an indefinite time if a thread sleeping on the mutex happened to be selected for handling the signal. But this was never a problem with the stream receive code (as opposed to its datagram counterpart) as that never went to sleep waiting for new messages with the mutex held and thus, wouldn't cause secondary readers to block on the mutex waiting for the sleeping primary reader. As the interruptible locking makes the code more complicated in exchange for no benefit, change it back to using mutex_lock. Signed-off-by: Rainer Weikusat Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1975fd8d1c100..a398f624c28dc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2072,14 +2072,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, memset(&scm, 0, sizeof(scm)); - err = mutex_lock_interruptible(&u->readlock); - if (unlikely(err)) { - /* recvmsg() in non blocking mode is supposed to return -EAGAIN - * sk_rcvtimeo is not honored by mutex_lock_interruptible() - */ - err = noblock ? -EAGAIN : -ERESTARTSYS; - goto out; - } + mutex_lock(&u->readlock); if (flags & MSG_PEEK) skip = sk_peek_offset(sk, flags); @@ -2120,12 +2113,12 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, timeo = unix_stream_data_wait(sk, timeo, last); - if (signal_pending(current) - || mutex_lock_interruptible(&u->readlock)) { + if (signal_pending(current)) { err = sock_intr_errno(timeo); goto out; } + mutex_lock(&u->readlock); continue; unlock: unix_state_unlock(sk); From 04f694ddd4f44f3174e59777bb94a85263741376 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Dec 2015 13:53:10 -0800 Subject: [PATCH 1509/2091] tcp: restore fastopen with no data in SYN packet [ Upstream commit 07e100f984975cb0417a7d5e626d0409efbad478 ] Yuchung tracked a regression caused by commit 57be5bdad759 ("ip: convert tcp_sendmsg() to iov_iter primitives") for TCP Fast Open. Some Fast Open users do not actually add any data in the SYN packet. Fixes: 57be5bdad759 ("ip: convert tcp_sendmsg() to iov_iter primitives") Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Al Viro Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 986440b249784..1ea4322c3b0c9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3143,7 +3143,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0, copied; + int syn_loss = 0, space, err = 0; unsigned long last_syn_loss = 0; struct sk_buff *syn_data; @@ -3181,17 +3181,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); - copied = copy_from_iter(skb_put(syn_data, space), space, - &fo->data->msg_iter); - if (unlikely(!copied)) { - kfree_skb(syn_data); - goto fallback; - } - if (copied != space) { - skb_trim(syn_data, copied); - space = copied; + if (space) { + int copied = copy_from_iter(skb_put(syn_data, space), space, + &fo->data->msg_iter); + if (unlikely(!copied)) { + kfree_skb(syn_data); + goto fallback; + } + if (copied != space) { + skb_trim(syn_data, copied); + space = copied; + } } - /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) fo->data = NULL; From ac5966d165a9b4115744269a64ffd3d061b5efe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 16 Dec 2015 16:45:54 +0800 Subject: [PATCH 1510/2091] rhashtable: Fix walker list corruption [ Upstream commit c6ff5268293ef98e48a99597e765ffc417e39fa5 ] The commit ba7c95ea3870fe7b847466d39a049ab6f156aa2c ("rhashtable: Fix sleeping inside RCU critical section in walk_stop") introduced a new spinlock for the walker list. However, it did not convert all existing users of the list over to the new spin lock. Some continued to use the old mutext for this purpose. This obviously led to corruption of the list. The fix is to use the spin lock everywhere where we touch the list. This also allows us to do rcu_rad_lock before we take the lock in rhashtable_walk_start. With the old mutex this would've deadlocked but it's safe with the new spin lock. Fixes: ba7c95ea3870 ("rhashtable: Fix sleeping inside RCU...") Reported-by: Colin Ian King Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/rhashtable.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4ff003e1d9659..5b17447efa8bf 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -506,10 +506,11 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; - mutex_lock(&ht->mutex); - iter->walker->tbl = rht_dereference(ht->tbl, ht); + spin_lock(&ht->lock); + iter->walker->tbl = + rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); list_add(&iter->walker->list, &iter->walker->tbl->walkers); - mutex_unlock(&ht->mutex); + spin_unlock(&ht->lock); return 0; } @@ -523,10 +524,10 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); */ void rhashtable_walk_exit(struct rhashtable_iter *iter) { - mutex_lock(&iter->ht->mutex); + spin_lock(&iter->ht->lock); if (iter->walker->tbl) list_del(&iter->walker->list); - mutex_unlock(&iter->ht->mutex); + spin_unlock(&iter->ht->lock); kfree(iter->walker); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); @@ -550,14 +551,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) { struct rhashtable *ht = iter->ht; - mutex_lock(&ht->mutex); + rcu_read_lock(); + spin_lock(&ht->lock); if (iter->walker->tbl) list_del(&iter->walker->list); - - rcu_read_lock(); - - mutex_unlock(&ht->mutex); + spin_unlock(&ht->lock); if (!iter->walker->tbl) { iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); From 868384ceda39e6e2f1cfd9f46430f4d8340c9e72 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Sep 2015 16:30:08 +0100 Subject: [PATCH 1511/2091] KEYS: Fix race between key destruction and finding a keyring by name commit 94c4554ba07adbdde396748ee7ae01e86cf2d8d7 upstream. There appears to be a race between: (1) key_gc_unused_keys() which frees key->security and then calls keyring_destroy() to unlink the name from the name list (2) find_keyring_by_name() which calls key_permission(), thus accessing key->security, on a key before checking to see whether the key usage is 0 (ie. the key is dead and might be cleaned up). Fix this by calling ->destroy() before cleaning up the core key data - including key->security. Reported-by: Petr Matousek Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- security/keys/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/keys/gc.c b/security/keys/gc.c index c7952375ac532..39eac1fd5706c 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -134,6 +134,10 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kdebug("- %u", key->serial); key_check(key); + /* Throw away the key data */ + if (key->type->destroy) + key->type->destroy(key); + security_key_free(key); /* deal with the user's key tracking and quota */ @@ -148,10 +152,6 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - /* now throw away the key memory */ - if (key->type->destroy) - key->type->destroy(key); - key_user_put(key->user); kfree(key->description); From 3366472496482e75a83db93846226b8a360ff911 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Oct 2015 17:21:37 +0100 Subject: [PATCH 1512/2091] KEYS: Fix crash when attempt to garbage collect an uninstantiated keyring commit f05819df10d7b09f6d1eb6f8534a8f68e5a4fe61 upstream. The following sequence of commands: i=`keyctl add user a a @s` keyctl request2 keyring foo bar @t keyctl unlink $i @s tries to invoke an upcall to instantiate a keyring if one doesn't already exist by that name within the user's keyring set. However, if the upcall fails, the code sets keyring->type_data.reject_error to -ENOKEY or some other error code. When the key is garbage collected, the key destroy function is called unconditionally and keyring_destroy() uses list_empty() on keyring->type_data.link - which is in a union with reject_error. Subsequently, the kernel tries to unlink the keyring from the keyring names list - which oopses like this: BUG: unable to handle kernel paging request at 00000000ffffff8a IP: [] keyring_destroy+0x3d/0x88 ... Workqueue: events key_garbage_collector ... RIP: 0010:[] keyring_destroy+0x3d/0x88 RSP: 0018:ffff88003e2f3d30 EFLAGS: 00010203 RAX: 00000000ffffff82 RBX: ffff88003bf1a900 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 000000003bfc6901 RDI: ffffffff81a73a40 RBP: ffff88003e2f3d38 R08: 0000000000000152 R09: 0000000000000000 R10: ffff88003e2f3c18 R11: 000000000000865b R12: ffff88003bf1a900 R13: 0000000000000000 R14: ffff88003bf1a908 R15: ffff88003e2f4000 ... CR2: 00000000ffffff8a CR3: 000000003e3ec000 CR4: 00000000000006f0 ... Call Trace: [] key_gc_unused_keys.constprop.1+0x5d/0x10f [] key_garbage_collector+0x1fa/0x351 [] process_one_work+0x28e/0x547 [] worker_thread+0x26e/0x361 [] ? rescuer_thread+0x2a8/0x2a8 [] kthread+0xf3/0xfb [] ? kthread_create_on_node+0x1c2/0x1c2 [] ret_from_fork+0x3f/0x70 [] ? kthread_create_on_node+0x1c2/0x1c2 Note the value in RAX. This is a 32-bit representation of -ENOKEY. The solution is to only call ->destroy() if the key was successfully instantiated. Reported-by: Dmitry Vyukov Signed-off-by: David Howells Tested-by: Dmitry Vyukov Signed-off-by: Greg Kroah-Hartman --- security/keys/gc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/keys/gc.c b/security/keys/gc.c index 39eac1fd5706c..addf060399e09 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -134,8 +134,10 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kdebug("- %u", key->serial); key_check(key); - /* Throw away the key data */ - if (key->type->destroy) + /* Throw away the key data if the key is instantiated */ + if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && + !test_bit(KEY_FLAG_NEGATIVE, &key->flags) && + key->type->destroy) key->type->destroy(key); security_key_free(key); From 98fec5a2034454f004ca6471de4df4ded2c5f79f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Dec 2015 01:34:26 +0000 Subject: [PATCH 1513/2091] KEYS: Fix race between read and revoke commit b4a1b4f5047e4f54e194681125c74c0aa64d637d upstream. This fixes CVE-2015-7550. There's a race between keyctl_read() and keyctl_revoke(). If the revoke happens between keyctl_read() checking the validity of a key and the key's semaphore being taken, then the key type read method will see a revoked key. This causes a problem for the user-defined key type because it assumes in its read method that there will always be a payload in a non-revoked key and doesn't check for a NULL pointer. Fix this by making keyctl_read() check the validity of a key after taking semaphore instead of before. I think the bug was introduced with the original keyrings code. This was discovered by a multithreaded test program generated by syzkaller (http://github.com/google/syzkaller). Here's a cleaned up version: #include #include #include void *thr0(void *arg) { key_serial_t key = (unsigned long)arg; keyctl_revoke(key); return 0; } void *thr1(void *arg) { key_serial_t key = (unsigned long)arg; char buffer[16]; keyctl_read(key, buffer, 16); return 0; } int main() { key_serial_t key = add_key("user", "%", "foo", 3, KEY_SPEC_USER_KEYRING); pthread_t th[5]; pthread_create(&th[0], 0, thr0, (void *)(unsigned long)key); pthread_create(&th[1], 0, thr1, (void *)(unsigned long)key); pthread_create(&th[2], 0, thr0, (void *)(unsigned long)key); pthread_create(&th[3], 0, thr1, (void *)(unsigned long)key); pthread_join(th[0], 0); pthread_join(th[1], 0); pthread_join(th[2], 0); pthread_join(th[3], 0); return 0; } Build as: cc -o keyctl-race keyctl-race.c -lkeyutils -lpthread Run as: while keyctl-race; do :; done as it may need several iterations to crash the kernel. The crash can be summarised as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] user_read+0x56/0xa3 ... Call Trace: [] keyctl_read_key+0xb6/0xd7 [] SyS_keyctl+0x83/0xe0 [] entry_SYSCALL_64_fastpath+0x12/0x6f Reported-by: Dmitry Vyukov Signed-off-by: David Howells Tested-by: Dmitry Vyukov Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/keys/keyctl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0b9ec78a7a7ad..26f0e0a11ed68 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -757,16 +757,16 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) /* the key is probably readable - now try to read it */ can_read_key: - ret = key_validate(key); - if (ret == 0) { - ret = -EOPNOTSUPP; - if (key->type->read) { - /* read the data with the semaphore held (since we - * might sleep) */ - down_read(&key->sem); + ret = -EOPNOTSUPP; + if (key->type->read) { + /* Read the data with the semaphore held (since we might sleep) + * to protect against the key being updated or revoked. + */ + down_read(&key->sem); + ret = key_validate(key); + if (ret == 0) ret = key->type->read(key, buffer, buflen); - up_read(&key->sem); - } + up_read(&key->sem); } error2: From b71567e6c9a2b15b7be9cb8ca695e9c990c0513b Mon Sep 17 00:00:00 2001 From: Yevgeny Pats Date: Tue, 19 Jan 2016 22:09:04 +0000 Subject: [PATCH 1514/2091] KEYS: Fix keyring ref leak in join_session_keyring() commit 23567fd052a9abb6d67fe8e7a9ccdd9800a540f2 upstream. This fixes CVE-2016-0728. If a thread is asked to join as a session keyring the keyring that's already set as its session, we leak a keyring reference. This can be tested with the following program: #include #include #include #include int main(int argc, const char *argv[]) { int i = 0; key_serial_t serial; serial = keyctl(KEYCTL_JOIN_SESSION_KEYRING, "leaked-keyring"); if (serial < 0) { perror("keyctl"); return -1; } if (keyctl(KEYCTL_SETPERM, serial, KEY_POS_ALL | KEY_USR_ALL) < 0) { perror("keyctl"); return -1; } for (i = 0; i < 100; i++) { serial = keyctl(KEYCTL_JOIN_SESSION_KEYRING, "leaked-keyring"); if (serial < 0) { perror("keyctl"); return -1; } } return 0; } If, after the program has run, there something like the following line in /proc/keys: 3f3d898f I--Q--- 100 perm 3f3f0000 0 0 keyring leaked-keyring: empty with a usage count of 100 * the number of times the program has been run, then the kernel is malfunctioning. If leaked-keyring has zero usages or has been garbage collected, then the problem is fixed. Reported-by: Yevgeny Pats Signed-off-by: David Howells Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Jarod Wilson Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/keys/process_keys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index bd536cb221e23..db91639c81e3a 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -794,6 +794,7 @@ long join_session_keyring(const char *name) ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { + key_put(keyring); ret = 0; goto error2; } From 99c5a856dcee7658ec7e250aa477a9afaab8cfc6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 22 Jan 2016 20:54:23 -0800 Subject: [PATCH 1515/2091] Linux 4.1.16 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cf35f6bcffd87..7609f1dcdcb94 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 15 +SUBLEVEL = 16 EXTRAVERSION = NAME = Series 4800 From c24eedeca7b86abc1719f1705de671cee8853b8d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 30 Nov 2015 16:31:13 -0800 Subject: [PATCH 1516/2091] x86/mpx: Fix instruction decoder condition commit 8e8efe0379bd93e8219ca0fc6fa80b5dd85b09cb upstream. MPX decodes instructions in order to tell which bounds register was violated. Part of this decoding involves looking at the "REX prefix" which is a special instrucion prefix used to retrofit support for new registers in to old instructions. The X86_REX_*() macros are defined to return actual bit values: #define X86_REX_R(rex) ((rex) & 4) *not* boolean values. However, the MPX code was checking for them like they were booleans. This might have led to us mis-decoding the "REX prefix" and giving false information out to userspace about bounds violations. X86_REX_B() actually is bit 1, so this is really only broken for the X86_REX_X() case. Fix the conditionals up to tolerate the non-boolean values. Fixes: fcc7ffd67991 "x86, mpx: Decode MPX instruction to get bound violation information" Reported-by: Dan Carpenter Signed-off-by: Dave Hansen Cc: x86@kernel.org Cc: Dave Hansen Link: http://lkml.kernel.org/r/20151201003113.D800C1E0@viggo.jf.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mpx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 4d1c11c07fe1f..f738c61bc891e 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -120,19 +120,19 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, switch (type) { case REG_TYPE_RM: regno = X86_MODRM_RM(insn->modrm.value); - if (X86_REX_B(insn->rex_prefix.value) == 1) + if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_INDEX: regno = X86_SIB_INDEX(insn->sib.value); - if (X86_REX_X(insn->rex_prefix.value) == 1) + if (X86_REX_X(insn->rex_prefix.value)) regno += 8; break; case REG_TYPE_BASE: regno = X86_SIB_BASE(insn->sib.value); - if (X86_REX_B(insn->rex_prefix.value) == 1) + if (X86_REX_B(insn->rex_prefix.value)) regno += 8; break; From 3ab6a090acfb20dfa49979d21c0a110019ad381f Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 1 Dec 2015 00:54:36 +0300 Subject: [PATCH 1517/2091] x86/signal: Fix restart_syscall number for x32 tasks commit 22eab1108781eff09961ae7001704f7bd8fb1dce upstream. When restarting a syscall with regs->ax == -ERESTART_RESTARTBLOCK, regs->ax is assigned to a restart_syscall number. For x32 tasks, this syscall number must have __X32_SYSCALL_BIT set, otherwise it will be an x86_64 syscall number instead of a valid x32 syscall number. This issue has been there since the introduction of x32. Reported-by: strace/tests/restart_syscall.test Reported-and-tested-by: Elvira Khabirova Signed-off-by: Dmitry V. Levin Cc: Elvira Khabirova Link: http://lkml.kernel.org/r/20151130215436.GA25996@altlinux.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/signal.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e0fd5f47fbb9a..5d2e2e9af1c4c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -667,12 +667,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) signal_setup_done(failed, ksig, stepping); } -#ifdef CONFIG_X86_32 -#define NR_restart_syscall __NR_restart_syscall -#else /* !CONFIG_X86_32 */ -#define NR_restart_syscall \ - test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall -#endif /* CONFIG_X86_32 */ +static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) +{ +#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) + return __NR_restart_syscall; +#else /* !CONFIG_X86_32 && CONFIG_X86_64 */ + return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : + __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); +#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ +} /* * Note that 'init' is a special process: it doesn't get signals it doesn't @@ -701,7 +704,7 @@ static void do_signal(struct pt_regs *regs) break; case -ERESTART_RESTARTBLOCK: - regs->ax = NR_restart_syscall; + regs->ax = get_nr_restart_syscall(regs); regs->ip -= 2; break; } From 702d87e2ff77d75c8ba8783510b71c68eef644f3 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 10 Nov 2015 15:10:33 -0500 Subject: [PATCH 1518/2091] xen/gntdev: Grant maps should not be subject to NUMA balancing commit 9c17d96500f78d7ecdb71ca6942830158bc75a2b upstream. Doing so will cause the grant to be unmapped and then, during fault handling, the fault to be mistakenly treated as NUMA hint fault. In addition, even if those maps could partcipate in NUMA balancing, it wouldn't provide any benefit since we are unable to determine physical page's node (even if/when VNUMA is implemented). Marking grant maps' VMAs as VM_IO will exclude them from being part of NUMA balancing. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 4bd23bba816fb..ee71baddbb10e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -804,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; From c12a3752a390994ad5e3b987f123ffe24143ad48 Mon Sep 17 00:00:00 2001 From: "Ouyang Zhaowei (Charles)" Date: Wed, 6 May 2015 09:47:04 +0800 Subject: [PATCH 1519/2091] x86/xen: don't reset vcpu_info on a cancelled suspend commit 6a1f513776b78c994045287073e55bae44ed9f8c upstream. On a cancelled suspend the vcpu_info location does not change (it's still in the per-cpu area registered by xen_vcpu_setup()). So do not call xen_hvm_init_shared_info() which would make the kernel think its back in the shared info. With the wrong vcpu_info, events cannot be received and the domain will hang after a cancelled suspend. Signed-off-by: Charles Ouyang Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/suspend.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 53b4c0811f4f6..6d3415144dab4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -32,7 +32,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + if (!suspend_cancelled) + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { From 03e572f3dda7f5790930df631a3f013f4100558b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 2 Nov 2015 22:20:00 +0100 Subject: [PATCH 1520/2091] KVM: VMX: fix SMEP and SMAP without EPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 656ec4a4928a3db7d16e5cb9bce351a478cfd3d5 upstream. The comment in code had it mostly right, but we enable paging for emulated real mode regardless of EPT. Without EPT (which implies emulated real mode), secondary VCPUs won't start unless we disable SM[AE]P when the guest doesn't use paging. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a243854c35d55..945f9e13f1aa6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3652,20 +3652,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!is_paging(vcpu)) { hw_cr4 &= ~X86_CR4_PAE; hw_cr4 |= X86_CR4_PSE; - /* - * SMEP/SMAP is disabled if CPU is in non-paging mode - * in hardware. However KVM always uses paging mode to - * emulate guest non-paging mode with TDP. - * To emulate this behavior, SMEP/SMAP needs to be - * manually disabled when guest switches to non-paging - * mode. - */ - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); } else if (!(cr4 & X86_CR4_PAE)) { hw_cr4 &= ~X86_CR4_PAE; } } + if (!enable_unrestricted_guest && !is_paging(vcpu)) + /* + * SMEP/SMAP is disabled if CPU is in non-paging mode in + * hardware. However KVM always uses paging mode without + * unrestricted guest. + * To emulate this behavior, SMEP/SMAP needs to be manually + * disabled when guest switches to non-paging mode. + */ + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); + vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); return 0; From 19eaffefc4b03d92e0adfd1870b10b9539916106 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Nov 2015 09:14:39 +0100 Subject: [PATCH 1521/2091] KVM: svm: unconditionally intercept #DB commit cbdb967af3d54993f5814f1cee0ed311a055377d upstream. This is needed to avoid the possibility that the guest triggers an infinite stream of #DB exceptions (CVE-2015-8104). VMX is not affected: because it does not save DR6 in the VMCS, it already intercepts #DB unconditionally. Reported-by: Jan Beulich Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 454ccb082e182..0d039cd268a8a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1106,6 +1106,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); + set_exception_intercept(svm, DB_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1638,20 +1639,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_bp_intercept(struct kvm_vcpu *vcpu) +static void update_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - clr_exception_intercept(svm, DB_VECTOR); clr_exception_intercept(svm, BP_VECTOR); - if (svm->nmi_singlestep) - set_exception_intercept(svm, DB_VECTOR); - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) set_exception_intercept(svm, BP_VECTOR); } else @@ -1757,7 +1751,6 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -3751,7 +3744,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4367,7 +4359,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .update_db_bp_intercept = update_db_bp_intercept, + .update_db_bp_intercept = update_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, From e052d6eeedf13ad69db1686b0b49fb9192519d9e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Nov 2015 16:43:02 +1100 Subject: [PATCH 1522/2091] KVM: PPC: Book3S HV: Prohibit setting illegal transaction state in MSR commit c20875a3e638e4a03e099b343ec798edd1af5cc6 upstream. Currently it is possible for userspace (e.g. QEMU) to set a value for the MSR for a guest VCPU which has both of the TS bits set, which is an illegal combination. The result of this is that when we execute a hrfid (hypervisor return from interrupt doubleword) instruction to enter the guest, the CPU will take a TM Bad Thing type of program interrupt (vector 0x700). Now, if PR KVM is configured in the kernel along with HV KVM, we actually handle this without crashing the host or giving hypervisor privilege to the guest; instead what happens is that we deliver a program interrupt to the guest, with SRR0 reflecting the address of the hrfid instruction and SRR1 containing the MSR value at that point. If PR KVM is not configured in the kernel, then we try to run the host's program interrupt handler with the MMU set to the guest context, which almost certainly causes a host crash. This closes the hole by making kvmppc_set_msr_hv() check for the illegal combination and force the TS field to a safe value (00, meaning non-transactional). Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f1e0e5522e3a6..f5b3de7f7fa24 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -210,6 +210,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { + /* + * Check for illegal transactional state bit combination + * and if we find it, force the TS field to a safe state. + */ + if ((msr & MSR_TS_MASK) == MSR_TS_MASK) + msr &= ~MSR_TS_MASK; vcpu->arch.shregs.msr = msr; kvmppc_end_cede(vcpu); } From 8a3185c54d650a86dafc8d8bcafa124b50944315 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Nov 2015 14:49:17 +0100 Subject: [PATCH 1523/2091] KVM: x86: expose MSR_TSC_AUX to userspace commit 9dbe6cf941a6fe82933aef565e4095fb10f65023 upstream. If we do not do this, it is not properly saved and restored across migration. Windows notices due to its self-protection mechanisms, and is very upset about it (blue screen of death). Cc: Radim Krcmar Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47a32f743a912..fed4c84eac443 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -940,7 +940,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -4117,16 +4117,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } From 17f33d468f4dfbfd5e4c8782f5e26a0863167a66 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Nov 2015 16:42:18 +0100 Subject: [PATCH 1524/2091] KVM: x86: correctly print #AC in traces commit aba2f06c070f604e388cf77b1dcc7f4cf4577eb0 upstream. Poor #AC was so unimportant until a few days ago that we were not even tracing its name correctly. But now it's all over the place. Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7c7bc8bef21fd..21dda139eb3ad 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -250,7 +250,7 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) + EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: From eec2baa864792fc7d6b396b1934c83dad759905d Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 18 Dec 2015 20:24:06 +0100 Subject: [PATCH 1525/2091] x86/reboot/quirks: Add iMac10,1 to pci_reboot_dmi_table[] commit 2f0c0b2d96b1205efb14347009748d786c2d9ba5 upstream. Without the reboot=pci method, the iMac 10,1 simply hangs after printing "Restarting system" at the point when it should reboot. This fixes it. Signed-off-by: Mario Kleiner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1450466646-26663-1-git-send-email-mario.kleiner.de@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 86db4bcd7ce52..0549ae3cb3326 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, + { /* Handle problems with rebooting on the iMac10,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac10,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"), + }, + }, /* ASRock */ { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ From 4b6c3a55305de01d24d994d16ff7dc3dfaee2715 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 4 Jan 2016 10:17:09 -0800 Subject: [PATCH 1526/2091] x86/boot: Double BOOT_HEAP_SIZE to 64KB commit 8c31902cffc4d716450be549c66a67a8a3dd479c upstream. When decompressing kernel image during x86 bootup, malloc memory for ELF program headers may run out of heap space, which leads to system halt. This patch doubles BOOT_HEAP_SIZE to 64KB. Tested with 32-bit kernel which failed to boot without this patch. Signed-off-by: H.J. Lu Acked-by: H. Peter Anvin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/boot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47a62d..6b8d6e8cd4494 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -27,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#define BOOT_HEAP_SIZE 0x8000 +#define BOOT_HEAP_SIZE 0x10000 #endif /* !CONFIG_KERNEL_BZIP2 */ From ae535caf02c7e2e7feec62f4e07ac1f48ad5b336 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 6 Jan 2016 12:21:01 -0800 Subject: [PATCH 1527/2091] x86/mm: Add barriers and document switch_mm()-vs-flush synchronization commit 71b3c126e61177eb693423f2e18a1914205b165e upstream. When switch_mm() activates a new PGD, it also sets a bit that tells other CPUs that the PGD is in use so that TLB flush IPIs will be sent. In order for that to work correctly, the bit needs to be visible prior to loading the PGD and therefore starting to fill the local TLB. Document all the barriers that make this work correctly and add a couple that were missing. Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 33 +++++++++++++++++++++++++++++- arch/x86/mm/tlb.c | 29 +++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 80d67dd803511..ca21dfafd424c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -104,8 +104,34 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs much + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. This barrier synchronizes with + * remote TLB flushers. Fortunately, load_cr3 is + * serializing and thus acts as a full barrier. + * + */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ @@ -142,10 +168,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, this is a barrier that forces + * TLB repopulation to be ordered after the + * store to mm_cpumask. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 90b924acd9822..061e0114005e5 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -160,7 +160,10 @@ void flush_tlb_current_task(void) preempt_disable(); count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + + /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -187,17 +190,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); - if (current->active_mm != mm) + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; + } if (!current->mm) { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; } if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) base_pages_to_flush = (end - start) >> PAGE_SHIFT; + /* + * Both branches below are implicit full barriers (MOV to CR or + * INVLPG) that synchronize with switch_mm. + */ if (base_pages_to_flush > tlb_single_page_flush_ceiling) { base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); @@ -227,10 +242,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) preempt_disable(); if (current->active_mm == mm) { - if (current->mm) + if (current->mm) { + /* + * Implicit full barrier (INVLPG) that synchronizes + * with switch_mm. + */ __flush_tlb_one(start); - else + } else { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) From c1a631b482a7b3cc5a78daa8f5babf28e67d4dc6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Jan 2016 12:47:40 -0800 Subject: [PATCH 1528/2091] x86/mm: Improve switch_mm() barrier comments commit 4eaffdd5a5fe6ff9f95e1ab4de1ac904d5e0fa8b upstream. My previous comments were still a bit confusing and there was a typo. Fix it up. Reported-by: Peter Zijlstra Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Rik van Riel Cc: Thomas Gleixner Fixes: 71b3c126e611 ("x86/mm: Add barriers and document switch_mm()-vs-flush synchronization") Link: http://lkml.kernel.org/r/0a0b43cdcdd241c5faaaecfbcc91a155ddedc9a1.1452631609.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index ca21dfafd424c..73e38f14ddebe 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -120,14 +120,16 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * be sent, and CPU 0's TLB will contain a stale entry.) * * The bad outcome can occur if either CPU's load is - * reordered before that CPU's store, so both CPUs much + * reordered before that CPU's store, so both CPUs must * execute full barriers to prevent this from happening. * * Thus, switch_mm needs a full barrier between the * store to mm_cpumask and any operation that could load - * from next->pgd. This barrier synchronizes with - * remote TLB flushers. Fortunately, load_cr3 is - * serializing and thus acts as a full barrier. + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. * */ load_cr3(next->pgd); @@ -174,9 +176,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. * - * As above, this is a barrier that forces - * TLB repopulation to be ordered after the - * store to mm_cpumask. + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); From 8dfca273353b9131dfd82c2720ccd78f89fd44ae Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sat, 5 Sep 2015 17:44:13 -0500 Subject: [PATCH 1529/2091] ipmi: Start the timer and thread on internal msgs commit 0cfec916e86d881e209de4b4ae9959a6271e6660 upstream. The timer and thread were not being started for internal messages, so in interrupt mode if something hung the timer would never go off and clean things up. Factor out the internal message sending and start the timer for those messages, too. Signed-off-by: Corey Minyard Tested-by: Gouji, Masayuki Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 73 +++++++++++++++++++------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 8a45e92ff60c7..88a7bdd4f28b8 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -404,18 +404,42 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) return rv; } -static void start_check_enables(struct smi_info *smi_info) +static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) +{ + smi_info->last_timeout_jiffies = jiffies; + mod_timer(&smi_info->si_timer, new_val); + smi_info->timer_running = true; +} + +/* + * Start a new message and (re)start the timer and thread. + */ +static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, + unsigned int size) +{ + smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES); + + if (smi_info->thread) + wake_up_process(smi_info->thread); + + smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); +} + +static void start_check_enables(struct smi_info *smi_info, bool start_timer) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + if (start_timer) + start_new_msg(smi_info, msg, 2); + else + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info) +static void start_clear_flags(struct smi_info *smi_info, bool start_timer) { unsigned char msg[3]; @@ -424,7 +448,10 @@ static void start_clear_flags(struct smi_info *smi_info) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + if (start_timer) + start_new_msg(smi_info, msg, 3); + else + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -434,10 +461,8 @@ static void start_getting_msg_queue(struct smi_info *smi_info) smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD; smi_info->curr_msg->data_size = 2; - smi_info->handlers->start_transaction( - smi_info->si_sm, - smi_info->curr_msg->data, - smi_info->curr_msg->data_size); + start_new_msg(smi_info, smi_info->curr_msg->data, + smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_MESSAGES; } @@ -447,20 +472,11 @@ static void start_getting_events(struct smi_info *smi_info) smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD; smi_info->curr_msg->data_size = 2; - smi_info->handlers->start_transaction( - smi_info->si_sm, - smi_info->curr_msg->data, - smi_info->curr_msg->data_size); + start_new_msg(smi_info, smi_info->curr_msg->data, + smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_EVENTS; } -static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) -{ - smi_info->last_timeout_jiffies = jiffies; - mod_timer(&smi_info->si_timer, new_val); - smi_info->timer_running = true; -} - /* * When we have a situtaion where we run out of memory and cannot * allocate messages, we just leave them in the BMC and run the system @@ -470,11 +486,11 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info) +static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) { if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info); + start_check_enables(smi_info, start_timer); return true; } return false; @@ -484,7 +500,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info); + start_check_enables(smi_info, true); return true; } return false; @@ -502,7 +518,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info)) + if (!disable_si_irq(smi_info, true)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -518,7 +534,7 @@ static void handle_flags(struct smi_info *smi_info) /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info); + start_clear_flags(smi_info, true); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -870,8 +886,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_MSG_FLAGS_CMD; - smi_info->handlers->start_transaction( - smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_GETTING_FLAGS; goto restart; } @@ -901,7 +916,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->irq) { - start_check_enables(smi_info); + start_check_enables(smi_info, true); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -3515,7 +3530,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi); + start_clear_flags(new_smi, false); /* * IRQ is defined to be set when non-zero. req_events will @@ -3817,7 +3832,7 @@ static void cleanup_one_si(struct smi_info *to_clean) poll(to_clean); schedule_timeout_uninterruptible(1); } - disable_si_irq(to_clean); + disable_si_irq(to_clean, false); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); From 69bed67bc32fd47774539eaddb9f33420df556fe Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Tue, 8 Dec 2015 13:57:51 -0500 Subject: [PATCH 1530/2091] ipmi: move timer init to before irq is setup commit 27f972d3e00b50639deb4cc1392afaeb08d3cecc upstream. We encountered a panic on boot in ipmi_si on a dell per320 due to an uninitialized timer as follows. static int smi_start_processing(void *send_info, ipmi_smi_t intf) { /* Try to claim any interrupts. */ if (new_smi->irq_setup) new_smi->irq_setup(new_smi); --> IRQ arrives here and irq handler tries to modify uninitialized timer which triggers BUG_ON(!timer->function) in __mod_timer(). Call Trace: [] start_new_msg+0x47/0x80 [ipmi_si] [] start_check_enables+0x4e/0x60 [ipmi_si] [] smi_event_handler+0x1e8/0x640 [ipmi_si] [] ? __rcu_process_callbacks+0x54/0x350 [] si_irq_handler+0x3c/0x60 [ipmi_si] [] handle_IRQ_event+0x60/0x170 [] handle_edge_irq+0xde/0x180 [] handle_irq+0x49/0xa0 [] do_IRQ+0x6c/0xf0 [] ret_from_intr+0x0/0x11 /* Set up the timer that drives the interface. */ setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi); The following patch fixes the problem. To: Openipmi-developer@lists.sourceforge.net To: Corey Minyard CC: linux-kernel@vger.kernel.org Signed-off-by: Jan Stancek Signed-off-by: Tony Camuso Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_si_intf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 88a7bdd4f28b8..05222706dc66c 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1218,14 +1218,14 @@ static int smi_start_processing(void *send_info, new_smi->intf = intf; - /* Try to claim any interrupts. */ - if (new_smi->irq_setup) - new_smi->irq_setup(new_smi); - /* Set up the timer that drives the interface. */ setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi); smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); + /* Try to claim any interrupts. */ + if (new_smi->irq_setup) + new_smi->irq_setup(new_smi); + /* * Check if the user forcefully enabled the daemon. */ From e702f5856baa2d68c225662d57c572ea178351b5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Oct 2015 14:21:51 +0100 Subject: [PATCH 1531/2091] ALSA: hda - Disable 64bit address for Creative HDA controllers commit cadd16ea33a938d49aee99edd4758cc76048b399 upstream. We've had many reports that some Creative sound cards with CA0132 don't work well. Some reported that it starts working after reloading the module, while some reported it starts working when a 32bit kernel is used. All these facts seem implying that the chip fails to communicate when the buffer is located in 64bit address. This patch addresses these issues by just adding AZX_DCAPS_NO_64BIT flag to the corresponding PCI entries. I casually had a chance to test an SB Recon3D board, and indeed this seems helping. Although this hasn't been tested on all Creative devices, it's safer to assume that this restriction applies to the rest of them, too. So the flag is applied to all Creative entries. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 44dfc7b92bc35..eb0e2e81a712a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -329,6 +329,7 @@ enum { #define AZX_DCAPS_PRESET_CTHDA \ (AZX_DCAPS_NO_MSI | AZX_DCAPS_POSFIX_LPIB |\ + AZX_DCAPS_NO_64BIT |\ AZX_DCAPS_4K_BDLE_BOUNDARY | AZX_DCAPS_SNOOP_OFF) /* @@ -2156,11 +2157,13 @@ static const struct pci_device_id azx_ids[] = { .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8, .class_mask = 0xffffff, .driver_data = AZX_DRIVER_CTX | AZX_DCAPS_CTX_WORKAROUND | + AZX_DCAPS_NO_64BIT | AZX_DCAPS_RIRB_PRE_DELAY | AZX_DCAPS_POSFIX_LPIB }, #else /* this entry seems still valid -- i.e. without emu20kx chip */ { PCI_DEVICE(0x1102, 0x0009), .driver_data = AZX_DRIVER_CTX | AZX_DCAPS_CTX_WORKAROUND | + AZX_DCAPS_NO_64BIT | AZX_DCAPS_RIRB_PRE_DELAY | AZX_DCAPS_POSFIX_LPIB }, #endif /* CM8888 */ From 29923678a8ef26de4fb186fd0c3b9c817c4b4738 Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Wed, 4 Nov 2015 15:56:09 -0800 Subject: [PATCH 1532/2091] ALSA: hda - Add Intel Lewisburg device IDs Audio commit 5cf92c8b3dc5da59e05dc81bdc069cedf6f38313 upstream. Adding Intel codename Lewisburg platform device IDs for audio. [rearranged the position by tiwai] Signed-off-by: Alexandra Yates Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index eb0e2e81a712a..74142f8444efa 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1977,6 +1977,11 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, { PCI_DEVICE(0x8086, 0x8d21), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + /* Lewisburg */ + { PCI_DEVICE(0x8086, 0xa1f0), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + { PCI_DEVICE(0x8086, 0xa270), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Lynx Point-LP */ { PCI_DEVICE(0x8086, 0x9c20), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, From 055fb07950beed4e3ee6e0a1d3b50ed84bb29fa5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Nov 2015 22:39:16 +0100 Subject: [PATCH 1533/2091] ALSA: hda - Apply pin fixup for HP ProBook 6550b commit c932b98c1e47312822d911c1bb76e81ef50e389c upstream. HP ProBook 6550b needs the same pin fixup applied to other HP B-series laptops with docks for making its headphone and dock headphone jacks working properly. We just need to add the codec SSID to the list. Bugzilla: https://bugzilla.kernel.org/attachment.cgi?id=191971 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index b1bc667839745..38fe75617dc49 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -702,6 +702,7 @@ static bool hp_bnb2011_with_dock(struct hda_codec *codec) static bool hp_blike_system(u32 subsystem_id) { switch (subsystem_id) { + case 0x103c1473: /* HP ProBook 6550b */ case 0x103c1520: case 0x103c1521: case 0x103c1523: From c397bc975aa4b205a99b0c651af931537be871e5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 18 Oct 2015 13:46:47 +0900 Subject: [PATCH 1534/2091] ALSA: fireworks/bebob/oxfw/dice: enable to make as built-in commit df4833886f91eea0d20e6e97066adab308625ef8 upstream. When committed to upstream, these four modules had wrong entries for Makefile. This forces them to be loadable modules even if they're set as built-in. This commit fixes this bug. Fixes: b5b04336015e('ALSA: fireworks: Add skelton for Fireworks based devices') Fixes: fd6f4b0dc167('ALSA: bebob: Add skelton for BeBoB based devices') Fixes: 1a4e39c2e5ca('ALSA: oxfw: Move to its own directory') Fixes: 14ff6a094815('ALSA: dice: Move file to its own directory') Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/bebob/Makefile | 2 +- sound/firewire/dice/Makefile | 2 +- sound/firewire/fireworks/Makefile | 2 +- sound/firewire/oxfw/Makefile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/firewire/bebob/Makefile b/sound/firewire/bebob/Makefile index 6cf470c80d1fd..af7ed66432661 100644 --- a/sound/firewire/bebob/Makefile +++ b/sound/firewire/bebob/Makefile @@ -1,4 +1,4 @@ snd-bebob-objs := bebob_command.o bebob_stream.o bebob_proc.o bebob_midi.o \ bebob_pcm.o bebob_hwdep.o bebob_terratec.o bebob_yamaha.o \ bebob_focusrite.o bebob_maudio.o bebob.o -obj-m += snd-bebob.o +obj-$(CONFIG_SND_BEBOB) += snd-bebob.o diff --git a/sound/firewire/dice/Makefile b/sound/firewire/dice/Makefile index 9ef228ef7baf2..55b4be9b00340 100644 --- a/sound/firewire/dice/Makefile +++ b/sound/firewire/dice/Makefile @@ -1,3 +1,3 @@ snd-dice-objs := dice-transaction.o dice-stream.o dice-proc.o dice-midi.o \ dice-pcm.o dice-hwdep.o dice.o -obj-m += snd-dice.o +obj-$(CONFIG_SND_DICE) += snd-dice.o diff --git a/sound/firewire/fireworks/Makefile b/sound/firewire/fireworks/Makefile index 0c7440826db8e..15ef7f75a8ef1 100644 --- a/sound/firewire/fireworks/Makefile +++ b/sound/firewire/fireworks/Makefile @@ -1,4 +1,4 @@ snd-fireworks-objs := fireworks_transaction.o fireworks_command.o \ fireworks_stream.o fireworks_proc.o fireworks_midi.o \ fireworks_pcm.o fireworks_hwdep.o fireworks.o -obj-m += snd-fireworks.o +obj-$(CONFIG_SND_FIREWORKS) += snd-fireworks.o diff --git a/sound/firewire/oxfw/Makefile b/sound/firewire/oxfw/Makefile index a926850864f67..06ff50f4e6c0b 100644 --- a/sound/firewire/oxfw/Makefile +++ b/sound/firewire/oxfw/Makefile @@ -1,3 +1,3 @@ snd-oxfw-objs := oxfw-command.o oxfw-stream.o oxfw-control.o oxfw-pcm.o \ oxfw-proc.o oxfw-midi.o oxfw-hwdep.o oxfw.o -obj-m += snd-oxfw.o +obj-$(CONFIG_SND_OXFW) += snd-oxfw.o From 5c7e814c1b3a350ebccde2303fbb4be933315d89 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Nov 2015 14:46:35 +0100 Subject: [PATCH 1535/2091] ALSA: hda - Apply HP headphone fixups more generically commit 196543d54574f50e3fd04df4e3048181e006a9da upstream. It turned out that many HP laptops suffer from the same problem as fixed in commit [c932b98c1e47: ALSA: hda - Apply pin fixup for HP ProBook 6550b]. But, it's tiresome to list up all such PCI SSIDs, as there are really lots of HP machines. Instead, we do a bit more clever, try to check the supposedly dock and built-in headphone pins, and apply the fixup when both seem valid. This rule can be applied generically to all models using the same quirk, so we'll fix all in a shot. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107491 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 45 ++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 38fe75617dc49..8e7d4c087a7a8 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -3110,6 +3110,29 @@ static void stac92hd71bxx_fixup_hp_hdx(struct hda_codec *codec, spec->gpio_led = 0x08; } +static bool is_hp_output(struct hda_codec *codec, hda_nid_t pin) +{ + unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin); + + /* count line-out, too, as BIOS sets often so */ + return get_defcfg_connect(pin_cfg) != AC_JACK_PORT_NONE && + (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT || + get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT); +} + +static void fixup_hp_headphone(struct hda_codec *codec, hda_nid_t pin) +{ + unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin); + + /* It was changed in the BIOS to just satisfy MS DTM. + * Lets turn it back into slaved HP + */ + pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE)) | + (AC_JACK_HP_OUT << AC_DEFCFG_DEVICE_SHIFT); + pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC | AC_DEFCFG_SEQUENCE))) | + 0x1f; + snd_hda_codec_set_pincfg(codec, pin, pin_cfg); +} static void stac92hd71bxx_fixup_hp(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -3119,22 +3142,12 @@ static void stac92hd71bxx_fixup_hp(struct hda_codec *codec, if (action != HDA_FIXUP_ACT_PRE_PROBE) return; - if (hp_blike_system(codec->core.subsystem_id)) { - unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, 0x0f); - if (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT || - get_defcfg_device(pin_cfg) == AC_JACK_SPEAKER || - get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT) { - /* It was changed in the BIOS to just satisfy MS DTM. - * Lets turn it back into slaved HP - */ - pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE)) - | (AC_JACK_HP_OUT << - AC_DEFCFG_DEVICE_SHIFT); - pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC - | AC_DEFCFG_SEQUENCE))) - | 0x1f; - snd_hda_codec_set_pincfg(codec, 0x0f, pin_cfg); - } + /* when both output A and F are assigned, these are supposedly + * dock and built-in headphones; fix both pin configs + */ + if (is_hp_output(codec, 0x0a) && is_hp_output(codec, 0x0f)) { + fixup_hp_headphone(codec, 0x0a); + fixup_hp_headphone(codec, 0x0f); } if (find_mute_led_cfg(codec, 1)) From 842dd2467a244af77c59b6830a0656fe1d82c8cf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Nov 2015 16:39:50 +0100 Subject: [PATCH 1536/2091] ALSA: hda - Add fixup for Acer Aspire One Cloudbook 14 commit b9c2fa52135d49a931c56ed2bfc17d61f771b412 upstream. For making the speakers on Acer Aspire One Cloudbook 14 to work, we need the as same quirk as for another Chromebook. This patch adds the corresponding fixup entry. Reported-by: Patrick Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 57bb5a559f8e5..64d3b5c2308ae 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5107,6 +5107,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), + SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X), From 9bc3da2497b3b21e686175ce2a5bbb15182534fd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Nov 2015 20:02:12 +0100 Subject: [PATCH 1537/2091] ALSA: hda - Fix noise on Gigabyte Z170X mobo commit 0c25ad80408e95e0a4fbaf0056950206e95f726f upstream. Gigabyte Z710X mobo with ALC1150 codec gets significant noises from the analog loopback routes even if their inputs are all muted. Simply kill the aamix for fixing it. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=108301 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 64d3b5c2308ae..13294a94b2c1e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1771,6 +1771,7 @@ enum { ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, ALC887_FIXUP_BASS_CHMAP, + ALC882_FIXUP_DISABLE_AAMIX, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -1932,6 +1933,8 @@ static void alc882_fixup_no_primary_hp(struct hda_codec *codec, static void alc_fixup_bass_chmap(struct hda_codec *codec, const struct hda_fixup *fix, int action); +static void alc_fixup_disable_aamix(struct hda_codec *codec, + const struct hda_fixup *fix, int action); static const struct hda_fixup alc882_fixups[] = { [ALC882_FIXUP_ABIT_AW9D_MAX] = { @@ -2163,6 +2166,10 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_bass_chmap, }, + [ALC882_FIXUP_DISABLE_AAMIX] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2230,6 +2237,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), + SND_PCI_QUIRK(0x1458, 0xa182, "Gigabyte Z170X-UD3", ALC882_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), From af95ff49d83b6ccb628bc5b7b1caa994fd2a27c9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 4 Dec 2015 16:44:24 +0100 Subject: [PATCH 1538/2091] ALSA: rme96: Fix unexpected volume reset after rate changes commit a74a821624c0c75388a193337babd17a8c02c740 upstream. rme96 driver needs to reset DAC depending on the sample rate, and this results in resetting to the max volume suddenly. It's because of the missing call of snd_rme96_apply_dac_volume(). However, calling this function right after the DAC reset still may not work, and we need some delay before this call. Since the DAC reset and the procedure after that are performed in the spinlock, we delay the DAC volume restore at the end after the spinlock. Reported-and-tested-by: Sylvain LABOISNE Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme96.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c index 2306ccf7281e2..77c963ced67a3 100644 --- a/sound/pci/rme96.c +++ b/sound/pci/rme96.c @@ -741,10 +741,11 @@ snd_rme96_playback_setrate(struct rme96 *rme96, { /* change to/from double-speed: reset the DAC (if available) */ snd_rme96_reset_dac(rme96); + return 1; /* need to restore volume */ } else { writel(rme96->wcreg, rme96->iobase + RME96_IO_CONTROL_REGISTER); + return 0; } - return 0; } static int @@ -980,6 +981,7 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream, struct rme96 *rme96 = snd_pcm_substream_chip(substream); struct snd_pcm_runtime *runtime = substream->runtime; int err, rate, dummy; + bool apply_dac_volume = false; runtime->dma_area = (void __force *)(rme96->iobase + RME96_IO_PLAY_BUFFER); @@ -993,24 +995,26 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream, { /* slave clock */ if ((int)params_rate(params) != rate) { - spin_unlock_irq(&rme96->lock); - return -EIO; - } - } else if ((err = snd_rme96_playback_setrate(rme96, params_rate(params))) < 0) { - spin_unlock_irq(&rme96->lock); - return err; - } - if ((err = snd_rme96_playback_setformat(rme96, params_format(params))) < 0) { - spin_unlock_irq(&rme96->lock); - return err; + err = -EIO; + goto error; + } + } else { + err = snd_rme96_playback_setrate(rme96, params_rate(params)); + if (err < 0) + goto error; + apply_dac_volume = err > 0; /* need to restore volume later? */ } + + err = snd_rme96_playback_setformat(rme96, params_format(params)); + if (err < 0) + goto error; snd_rme96_setframelog(rme96, params_channels(params), 1); if (rme96->capture_periodsize != 0) { if (params_period_size(params) << rme96->playback_frlog != rme96->capture_periodsize) { - spin_unlock_irq(&rme96->lock); - return -EBUSY; + err = -EBUSY; + goto error; } } rme96->playback_periodsize = @@ -1021,9 +1025,16 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream, rme96->wcreg &= ~(RME96_WCR_PRO | RME96_WCR_DOLBY | RME96_WCR_EMP); writel(rme96->wcreg |= rme96->wcreg_spdif_stream, rme96->iobase + RME96_IO_CONTROL_REGISTER); } + + err = 0; + error: spin_unlock_irq(&rme96->lock); - - return 0; + if (apply_dac_volume) { + usleep_range(3000, 10000); + snd_rme96_apply_dac_volume(rme96); + } + + return err; } static int From c5e609dc5cab66a6db129d11e5d33be7371624b7 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Mon, 7 Dec 2015 11:29:31 +0100 Subject: [PATCH 1539/2091] ALSA: hda - Add inverted dmic for Packard Bell DOTS commit 02f6ff90400d055f08b0ba0b5f0707630b6faed7 upstream. On the internal mic of the Packard Bell DOTS, one channel has an inverted signal. Add a quirk to fix this up. BugLink: https://bugs.launchpad.net/bugs/1523232 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 13294a94b2c1e..3fa7f676a1640 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6465,6 +6465,7 @@ static const struct hda_fixup alc662_fixups[] = { static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x1025, 0x0241, "Packard Bell DOTS", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), From bc4d35b2c8c7c39b6d69cda22eac15b1d850b71a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Dec 2015 15:17:43 +0100 Subject: [PATCH 1540/2091] ALSA: hda - Fix noise problems on Thinkpad T440s commit 9a811230481243f384b8036c6a558bfdbd961f78 upstream. Lenovo Thinkpad T440s suffers from constant background noises, and it seems to be a generic hardware issue on this model: https://forums.lenovo.com/t5/ThinkPad-T400-T500-and-newer-T/T440s-speaker-noise/td-p/1339883 As the noise comes from the analog loopback path, disabling the path is the easy workaround. Also, the machine gives significant cracking noises at PM suspend. A workaround found by trial-and-error is to disable the shutup callback currently used for ALC269-variant. This patch addresses these noise issues by introducing a new fixup chain. Although the same workaround might be applicable to other Thinkpad models, it's applied only to T440s (17aa:220c) in this patch, so far, just to be safe (you chicken!). As a compromise, a new model option string "tp440" is provided now, though, so that owners of other Thinkpad models can test it more easily. Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=958504 Reported-and-tested-by: Tim Hardeck Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3fa7f676a1640..b15f6ff608963 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4208,6 +4208,18 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, } } +/* additional fixup for Thinkpad T440s noise problem */ +static void alc_fixup_tpt440(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->shutup = alc_no_shutup; /* reduce click noise */ + spec->gen.mixer_nid = 0; /* reduce background noise */ + } +} + static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4533,6 +4545,7 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, + ALC292_FIXUP_TPT440, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -5001,6 +5014,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST }, + [ALC292_FIXUP_TPT440] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_tpt440, + .chained = true, + .chain_id = ALC292_FIXUP_TPT440_DOCK, + }, [ALC283_FIXUP_BXBT2807_MIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5236,7 +5255,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x2208, "Thinkpad T431s", ALC269_FIXUP_LENOVO_DOCK), - SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440), SND_PCI_QUIRK(0x17aa, 0x220e, "Thinkpad T440p", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2210, "Thinkpad T540p", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad T440", ALC292_FIXUP_TPT440_DOCK), @@ -5331,6 +5350,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"}, {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"}, {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, + {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, {} }; From dff55bf1b247c612600ba0855f792bf8c3833952 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Dec 2015 12:20:20 +0100 Subject: [PATCH 1541/2091] ALSA: hda - Add a fixup for Thinkpad X1 Carbon 2nd commit b6903c0ed9f0bcbbe88f67f7ed43d1721cbc6235 upstream. Apply the same fixup for Thinkpad with dock to Thinkpad X1 Carbon 2nd, too. This reduces the annoying loud cracking noise problem, as well as the support of missing docking port. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=958439 Reported-and-tested-by: Benjamin Poirier Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b15f6ff608963..bf9d3a86b5de8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5261,6 +5261,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad T440", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad X240", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), From be7c9844eea1a34939f808af288b2e59c1510fff Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Dec 2015 23:30:43 +0100 Subject: [PATCH 1542/2091] ALSA: hda - Apply click noise workaround for Thinkpads generically commit 157f0b7f6c0cc0bc88647390006e959e267a0143 upstream. It seems that a workaround for Thinkpad T440s crackling noise can be applied generically to all Thinkpad models: namely, disabling the default alc269 shutup callback. This patch moves it to the existing alc_fixup_tpt440_dock() while also replacing the rest code with another existing alc_fixup_disable_aamix(). It resulted in a good code reduction. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=958439 Reported-and-tested-by: Benjamin Poirier Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf9d3a86b5de8..b89e9e4110d6b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4202,24 +4202,13 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->shutup = alc_no_shutup; /* reduce click noise */ spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; codec->power_save_node = 0; /* avoid click noises */ snd_hda_apply_pincfgs(codec, pincfgs); } } -/* additional fixup for Thinkpad T440s noise problem */ -static void alc_fixup_tpt440(struct hda_codec *codec, - const struct hda_fixup *fix, int action) -{ - struct alc_spec *spec = codec->spec; - - if (action == HDA_FIXUP_ACT_PRE_PROBE) { - spec->shutup = alc_no_shutup; /* reduce click noise */ - spec->gen.mixer_nid = 0; /* reduce background noise */ - } -} - static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -5016,7 +5005,7 @@ static const struct hda_fixup alc269_fixups[] = { }, [ALC292_FIXUP_TPT440] = { .type = HDA_FIXUP_FUNC, - .v.func = alc_fixup_tpt440, + .v.func = alc_fixup_disable_aamix, .chained = true, .chain_id = ALC292_FIXUP_TPT440_DOCK, }, From 228a0e09d53a3c9048050f976e90c96ea28a666e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Dec 2015 14:59:58 +0100 Subject: [PATCH 1543/2091] ALSA: hda - Set codec to D3 at reboot/shutdown on Thinkpads commit 70a0976b0c0d90f4246d7e63359d796ec82b87d6 upstream. Lenovo Thinkpads with Realtek codecs may still have some loud crackling noises at reboot/shutdown even though a few previous fixes have been applied. It's because the previous fix (disabling the default shutup callback) takes effect only at transition of the codec power state. Meanwhile, at reboot or shutdown, we don't take down the codec power as default, thus it triggers the same problem unless the codec is powered down casually by runtime PM. This patch tries to address the issue. It gives two things: - implement the separate reboot_notify hook to struct alc_spec, and call it optionally if defined. - turn off the codec to D3 for Thinkpad models via this new callback Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=958439 Reported-and-tested-by: Benjamin Poirier Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b89e9e4110d6b..0269648043e91 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -111,6 +111,7 @@ struct alc_spec { void (*power_hook)(struct hda_codec *codec); #endif void (*shutup)(struct hda_codec *codec); + void (*reboot_notify)(struct hda_codec *codec); int init_amp; int codec_variant; /* flag for other variants */ @@ -773,6 +774,25 @@ static inline void alc_shutup(struct hda_codec *codec) snd_hda_shutup_pins(codec); } +static void alc_reboot_notify(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (spec && spec->reboot_notify) + spec->reboot_notify(codec); + else + alc_shutup(codec); +} + +/* power down codec to D3 at reboot/shutdown; set as reboot_notify ops */ +static void alc_d3_at_reboot(struct hda_codec *codec) +{ + snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); + snd_hda_codec_write(codec, codec->core.afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + msleep(10); +} + #define alc_free snd_hda_gen_free #ifdef CONFIG_PM @@ -818,7 +838,7 @@ static const struct hda_codec_ops alc_patch_ops = { .suspend = alc_suspend, .check_power_status = snd_hda_gen_check_power_status, #endif - .reboot_notify = alc_shutup, + .reboot_notify = alc_reboot_notify, }; @@ -4203,6 +4223,7 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->shutup = alc_no_shutup; /* reduce click noise */ + spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */ spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; codec->power_save_node = 0; /* avoid click noises */ snd_hda_apply_pincfgs(codec, pincfgs); From 271126c7cbb6c0dabb5eb9dd25e224b53a48f715 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Sun, 13 Dec 2015 20:49:58 +0200 Subject: [PATCH 1544/2091] ALSA: usb-audio: Add a more accurate volume quirk for AudioQuest DragonFly commit 42e3121d90f42e57f6dbd6083dff2f57b3ec7daa upstream. AudioQuest DragonFly DAC reports a volume control range of 0..50 (0x0000..0x0032) which in USB Audio means a range of 0 .. 0.2dB, which is obviously incorrect and would cause software using the dB information in e.g. volume sliders to have a massive volume difference in 100..102% range. Commit 2d1cb7f658fb ("ALSA: usb-audio: add dB range mapping for some devices") added a dB range mapping for it with range 0..50 dB. However, the actual volume mapping seems to be neither linear volume nor linear dB scale, but instead quite close to the cubic mapping e.g. alsamixer uses, with a range of approx. -53...0 dB. Replace the previous quirk with a custom dB mapping based on some basic output measurements, using a 10-item range TLV (which will still fit in alsa-lib MAX_TLV_RANGE_SIZE). Tested on AudioQuest DragonFly HW v1.2. The quirk is only applied if the range is 0..50, so if this gets fixed/changed in later HW revisions it will no longer be applied. v2: incorporated Takashi Iwai's suggestion for the quirk application method Signed-off-by: Anssi Hannula Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 2 ++ sound/usb/mixer_maps.c | 12 ------------ sound/usb/mixer_quirks.c | 37 +++++++++++++++++++++++++++++++++++++ sound/usb/mixer_quirks.h | 4 ++++ 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index cd8ed2e393a2d..f9a9752d4dbc8 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1336,6 +1336,8 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc, } } + snd_usb_mixer_fu_apply_quirk(state->mixer, cval, unitid, kctl); + range = (cval->max - cval->min) / cval->res; /* * Are there devices with volume range more than 255? I use a bit more diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 6a803eff87f71..ddca6547399b0 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -348,13 +348,6 @@ static struct usbmix_name_map bose_companion5_map[] = { { 0 } /* terminator */ }; -/* Dragonfly DAC 1.2, the dB conversion factor is 1 instead of 256 */ -static struct usbmix_dB_map dragonfly_1_2_dB = {0, 5000}; -static struct usbmix_name_map dragonfly_1_2_map[] = { - { 7, NULL, .dB = &dragonfly_1_2_dB }, - { 0 } /* terminator */ -}; - /* * Control map entries */ @@ -470,11 +463,6 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x1020), .map = bose_companion5_map, }, - { - /* Dragonfly DAC 1.2 */ - .id = USB_ID(0x21b4, 0x0081), - .map = dragonfly_1_2_map, - }, { 0 } /* terminator */ }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 337c317ead6fb..48a7450faddd1 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "usbaudio.h" #include "mixer.h" @@ -1843,3 +1844,39 @@ void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer, } } +static void snd_dragonfly_quirk_db_scale(struct usb_mixer_interface *mixer, + struct snd_kcontrol *kctl) +{ + /* Approximation using 10 ranges based on output measurement on hw v1.2. + * This seems close to the cubic mapping e.g. alsamixer uses. */ + static const DECLARE_TLV_DB_RANGE(scale, + 0, 1, TLV_DB_MINMAX_ITEM(-5300, -4970), + 2, 5, TLV_DB_MINMAX_ITEM(-4710, -4160), + 6, 7, TLV_DB_MINMAX_ITEM(-3884, -3710), + 8, 14, TLV_DB_MINMAX_ITEM(-3443, -2560), + 15, 16, TLV_DB_MINMAX_ITEM(-2475, -2324), + 17, 19, TLV_DB_MINMAX_ITEM(-2228, -2031), + 20, 26, TLV_DB_MINMAX_ITEM(-1910, -1393), + 27, 31, TLV_DB_MINMAX_ITEM(-1322, -1032), + 32, 40, TLV_DB_MINMAX_ITEM(-968, -490), + 41, 50, TLV_DB_MINMAX_ITEM(-441, 0), + ); + + usb_audio_info(mixer->chip, "applying DragonFly dB scale quirk\n"); + kctl->tlv.p = scale; + kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ; + kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; +} + +void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, + struct usb_mixer_elem_info *cval, int unitid, + struct snd_kcontrol *kctl) +{ + switch (mixer->chip->usb_id) { + case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */ + if (unitid == 7 && cval->min == 0 && cval->max == 50) + snd_dragonfly_quirk_db_scale(mixer, kctl); + break; + } +} + diff --git a/sound/usb/mixer_quirks.h b/sound/usb/mixer_quirks.h index bdbfab093816a..177c329cd4ddb 100644 --- a/sound/usb/mixer_quirks.h +++ b/sound/usb/mixer_quirks.h @@ -9,5 +9,9 @@ void snd_emuusb_set_samplerate(struct snd_usb_audio *chip, void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer, int unitid); +void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, + struct usb_mixer_elem_info *cval, int unitid, + struct snd_kcontrol *kctl); + #endif /* SND_USB_MIXER_QUIRKS_H */ From 73a5fdacba1f8cf702e93e66ea9753fa85afc2fc Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Sun, 13 Dec 2015 20:49:59 +0200 Subject: [PATCH 1545/2091] ALSA: usb-audio: Add sample rate inquiry quirk for AudioQuest DragonFly commit 12a6116e66695a728bcb9616416c508ce9c051a1 upstream. Avoid getting sample rate on AudioQuest DragonFly as it is unsupported and causes noisy "cannot get freq at ep 0x1" messages when playback starts. Signed-off-by: Anssi Hannula Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index eef9b8e4b9498..c73c37930ccdb 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1122,6 +1122,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; } return false; From b40f630cfe4d099deca538523542783b79471fc8 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Fri, 18 Dec 2015 13:29:18 +0800 Subject: [PATCH 1546/2091] ALSA: hda - Set SKL+ hda controller power at freeze() and thaw() commit 3e6db33aaf1d42a30339f831ec4850570d6cc7a3 upstream. It takes three minutes to enter into hibernation on some OEM SKL machines and we see many codec spurious response after thaw() opertion. This is because HDA is still in D0 state after freeze() call and pci_pm_freeze/pci_pm_freeze_noirq() don't set D3 hot in pci_bus driver. It seems bios still access HDA when system enter into freeze state, HDA will receive codec response interrupt immediately after thaw() call. Because of this unexpected interrupt, HDA enter into a abnormal state and slow down the system enter into hibernation. In this patch, we put HDA into D3 hot state in azx_freeze_noirq() and put HDA into D0 state in azx_thaw_noirq(). V2: Only apply this fix to SKL+ Fix compile error when CONFIG_PM_SLEEP isn't defined [Yet another fix for CONFIG_PM_SLEEP ifdef and the additional comment by tiwai] Signed-off-by: Xiong Zhang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 74142f8444efa..38d8ba90c5256 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -840,6 +840,36 @@ static int azx_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP || SUPPORT_VGA_SWITCHEROO */ +#ifdef CONFIG_PM_SLEEP +/* put codec down to D3 at hibernation for Intel SKL+; + * otherwise BIOS may still access the codec and screw up the driver + */ +#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) +#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) +#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) + +static int azx_freeze_noirq(struct device *dev) +{ + struct pci_dev *pci = to_pci_dev(dev); + + if (IS_SKL_PLUS(pci)) + pci_set_power_state(pci, PCI_D3hot); + + return 0; +} + +static int azx_thaw_noirq(struct device *dev) +{ + struct pci_dev *pci = to_pci_dev(dev); + + if (IS_SKL_PLUS(pci)) + pci_set_power_state(pci, PCI_D0); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_PM static int azx_runtime_suspend(struct device *dev) { @@ -940,6 +970,10 @@ static int azx_runtime_idle(struct device *dev) static const struct dev_pm_ops azx_pm = { SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) +#ifdef CONFIG_PM_SLEEP + .freeze_noirq = azx_freeze_noirq, + .thaw_noirq = azx_thaw_noirq, +#endif SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle) }; From 37191bd8b4dd9fa695806687235206ddc30d4761 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Tue, 22 Dec 2015 00:45:43 +0100 Subject: [PATCH 1547/2091] ALSA: hda/realtek - Fix silent headphone output on MacPro 4,1 (v2) commit 9f660a1c43890c2cdd1f423fd73654e7ca08fe56 upstream. Without this patch, internal speaker and line-out work, but front headphone output jack stays silent on the Mac Pro 4,1. This code path also gets executed on the MacPro 5,1 due to identical codec SSID, but i don't know if it has any positive or adverse effects there or not. (v2) Implement feedback from Takashi Iwai: Reuse alc889_fixup_mbp_vref and just add a new nid 0x19 for the MacPro 4,1. Signed-off-by: Mario Kleiner Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0269648043e91..cd7fafb862749 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1787,6 +1787,7 @@ enum { ALC889_FIXUP_MBA11_VREF, ALC889_FIXUP_MBA21_VREF, ALC889_FIXUP_MP11_VREF, + ALC889_FIXUP_MP41_VREF, ALC882_FIXUP_INV_DMIC, ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, @@ -1875,7 +1876,7 @@ static void alc889_fixup_mbp_vref(struct hda_codec *codec, const struct hda_fixup *fix, int action) { struct alc_spec *spec = codec->spec; - static hda_nid_t nids[2] = { 0x14, 0x15 }; + static hda_nid_t nids[3] = { 0x14, 0x15, 0x19 }; int i; if (action != HDA_FIXUP_ACT_INIT) @@ -2165,6 +2166,12 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC885_FIXUP_MACPRO_GPIO, }, + [ALC889_FIXUP_MP41_VREF] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc889_fixup_mbp_vref, + .chained = true, + .chain_id = ALC885_FIXUP_MACPRO_GPIO, + }, [ALC882_FIXUP_INV_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic, @@ -2247,7 +2254,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x3f00, "Macbook 5,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF), - SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO), + SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 4,1/5,1", ALC889_FIXUP_MP41_VREF), SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF), From aa13585b6672c9d6dc9e0f01de03ef3f8c5622b6 Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Mon, 11 Jan 2016 08:16:58 +0100 Subject: [PATCH 1548/2091] ALSA: usb: Add native DSD support for Oppo HA-1 commit a4eae3a506ea4a7d4474cd74e20b423fa8053d91 upstream. This patch adds native DSD support for the Oppo HA-1. It uses a XMOS chipset but they use their own vendor ID. Signed-off-by: Jurgen Kramer Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c73c37930ccdb..fb9a8a5787a66 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1266,6 +1266,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */ case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ + case USB_ID(0x22d8, 0x0416): /* OPPO HA-1*/ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; From 9a6003a362acb814fea7422209be344b822b047a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Jan 2016 12:38:02 +0100 Subject: [PATCH 1549/2091] ALSA: seq: Fix missing NULL check at remove_events ioctl commit 030e2c78d3a91dd0d27fef37e91950dde333eba1 upstream. snd_seq_ioctl_remove_events() calls snd_seq_fifo_clear() unconditionally even if there is no FIFO assigned, and this leads to an Oops due to NULL dereference. The fix is just to add a proper NULL check. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_clientmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index edbdab85fc02f..bd4741442909a 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1962,7 +1962,7 @@ static int snd_seq_ioctl_remove_events(struct snd_seq_client *client, * No restrictions so for a user client we can clear * the whole fifo */ - if (client->type == USER_CLIENT) + if (client->type == USER_CLIENT && client->data.user.fifo) snd_seq_fifo_clear(client->data.user.fifo); } From 49c9eb3db86407868a664ade6da041fabeb457f8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Jan 2016 15:36:27 +0100 Subject: [PATCH 1550/2091] ALSA: seq: Fix race at timer setup and close commit 3567eb6af614dac436c4b16a8d426f9faed639b3 upstream. ALSA sequencer code has an open race between the timer setup ioctl and the close of the client. This was triggered by syzkaller fuzzer, and a use-after-free was caught there as a result. This patch papers over it by adding a proper queue->timer_mutex lock around the timer-related calls in the relevant code path. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_queue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index a0cda38205b97..77ec214203558 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -142,8 +142,10 @@ static struct snd_seq_queue *queue_new(int owner, int locked) static void queue_delete(struct snd_seq_queue *q) { /* stop and release the timer */ + mutex_lock(&q->timer_mutex); snd_seq_timer_stop(q->timer); snd_seq_timer_close(q); + mutex_unlock(&q->timer_mutex); /* wait until access free */ snd_use_lock_sync(&q->use_lock); /* release resources... */ From 6aee1f8440bfd61d8939989197576e1577a01eb9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Jan 2016 21:06:39 +0100 Subject: [PATCH 1551/2091] ALSA: hda - Fix white noise on Dell Latitude E5550 commit 98070576c4f77509459c83cd2358617ef0769a38 upstream. Dell Latitude E5550 (1028:062c) has a white noise problem like other Latitude E models, and it gets fixed by the very same quirk as well. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110591 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cd7fafb862749..a6da2ed4785bf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5161,6 +5161,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), + SND_PCI_QUIRK(0x1028, 0x062c, "Dell Latitude E5550", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From 27b496cc9e8c4cea094806ae1f54059696afabae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Jan 2016 07:20:13 +0100 Subject: [PATCH 1552/2091] ALSA: usb-audio: Fix mixer ctl regression of Native Instrument devices commit c4a359a0049f2e17b012b31e801e96566f6391e5 upstream. The commit [da6d276957ea: ALSA: usb-audio: Add resume support for Native Instruments controls] brought a regression where the Native Instrument audio devices don't get the correct value at update due to the missing shift at writing. This patch addresses it. Fixes: da6d276957ea ('ALSA: usb-audio: Add resume support for Native Instruments controls') Reported-and-tested-by: Owen Williams Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 48a7450faddd1..db9547d04f385 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -803,7 +803,7 @@ static int snd_nativeinstruments_control_put(struct snd_kcontrol *kcontrol, return 0; kcontrol->private_value &= ~(0xff << 24); - kcontrol->private_value |= newval; + kcontrol->private_value |= (unsigned int)newval << 24; err = snd_ni_update_cur_val(list); return err < 0 ? err : 1; } From 466c99bd815a1ae189d883b509b067c9a74a30f9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Jan 2016 16:30:58 +0100 Subject: [PATCH 1553/2091] ALSA: timer: Harden slave timer list handling commit b5a663aa426f4884c71cd8580adae73f33570f0d upstream. A slave timer instance might be still accessible in a racy way while operating the master instance as it lacks of locking. Since the master operation is mostly protected with timer->lock, we should cope with it while changing the slave instance, too. Also, some linked lists (active_list and ack_list) of slave instances aren't unlinked immediately at stopping or closing, and this may lead to unexpected accesses. This patch tries to address these issues. It adds spin lock of timer->lock (either from master or slave, which is equivalent) in a few places. For avoiding a deadlock, we ensure that the global slave_active_lock is always locked at first before each timer lock. Also, ack and active_list of slave instances are properly unlinked at snd_timer_stop() and snd_timer_close(). Last but not least, remove the superfluous call of _snd_timer_stop() at removing slave links. This is a noop, and calling it may confuse readers wrt locking. Further cleanup will follow in a later patch. Actually we've got reports of use-after-free by syzkaller fuzzer, and this hopefully fixes these issues. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index a9a1a047c521f..149d4f24b9156 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -215,11 +215,13 @@ static void snd_timer_check_master(struct snd_timer_instance *master) slave->slave_id == master->slave_id) { list_move_tail(&slave->open_list, &master->slave_list_head); spin_lock_irq(&slave_active_lock); + spin_lock(&master->timer->lock); slave->master = master; slave->timer = master->timer; if (slave->flags & SNDRV_TIMER_IFLG_RUNNING) list_add_tail(&slave->active_list, &master->slave_active_head); + spin_unlock(&master->timer->lock); spin_unlock_irq(&slave_active_lock); } } @@ -346,15 +348,18 @@ int snd_timer_close(struct snd_timer_instance *timeri) timer->hw.close) timer->hw.close(timer); /* remove slave links */ + spin_lock_irq(&slave_active_lock); + spin_lock(&timer->lock); list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) { - spin_lock_irq(&slave_active_lock); - _snd_timer_stop(slave, 1, SNDRV_TIMER_EVENT_RESOLUTION); list_move_tail(&slave->open_list, &snd_timer_slave_list); slave->master = NULL; slave->timer = NULL; - spin_unlock_irq(&slave_active_lock); + list_del_init(&slave->ack_list); + list_del_init(&slave->active_list); } + spin_unlock(&timer->lock); + spin_unlock_irq(&slave_active_lock); mutex_unlock(®ister_mutex); } out: @@ -441,9 +446,12 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri) spin_lock_irqsave(&slave_active_lock, flags); timeri->flags |= SNDRV_TIMER_IFLG_RUNNING; - if (timeri->master) + if (timeri->master && timeri->timer) { + spin_lock(&timeri->timer->lock); list_add_tail(&timeri->active_list, &timeri->master->slave_active_head); + spin_unlock(&timeri->timer->lock); + } spin_unlock_irqrestore(&slave_active_lock, flags); return 1; /* delayed start */ } @@ -489,6 +497,8 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, if (!keep_flag) { spin_lock_irqsave(&slave_active_lock, flags); timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; + list_del_init(&timeri->ack_list); + list_del_init(&timeri->active_list); spin_unlock_irqrestore(&slave_active_lock, flags); } goto __end; From 042b42c579b94d2e5b53fe208d16dc8fb5dc0b10 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 13 Jan 2016 11:51:38 +0800 Subject: [PATCH 1554/2091] ALSA: hda - fix the headset mic detection problem for a Dell laptop commit 0a1f90a982e85f4921bed606a6b41a24f4de2ae1 upstream. The machine uses codec alc255, and the pin configuration value for pin 0x14 on this machine is 0x90171130 which is not in the pin quirk table yet. BugLink: https://bugs.launchpad.net/bugs/1533461 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a6da2ed4785bf..4f1ce50b22f23 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5495,6 +5495,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x02211040}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC255_STANDARD_PINS, + {0x12, 0x90a60170}, + {0x14, 0x90171130}, + {0x21, 0x02211040}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60170}, {0x14, 0x90170140}, {0x17, 0x40000000}, From ac905ca58370789645e813d8abfa5871c93e9e36 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Jan 2016 17:48:01 +0100 Subject: [PATCH 1555/2091] ALSA: timer: Fix race among timer ioctls commit af368027a49a751d6ff4ee9e3f9961f35bb4fede upstream. ALSA timer ioctls have an open race and this may lead to a use-after-free of timer instance object. A simplistic fix is to make each ioctl exclusive. We have already tread_sem for controlling the tread, and extend this as a global mutex to be applied to each ioctl. The downside is, of course, the worse concurrency. But these ioctls aren't to be parallel accessible, in anyway, so it should be fine to serialize there. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 149d4f24b9156..f5ec1ba4c4bb5 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -73,7 +73,7 @@ struct snd_timer_user { struct timespec tstamp; /* trigger tstamp */ wait_queue_head_t qchange_sleep; struct fasync_struct *fasync; - struct mutex tread_sem; + struct mutex ioctl_lock; }; /* list of timers */ @@ -1263,7 +1263,7 @@ static int snd_timer_user_open(struct inode *inode, struct file *file) return -ENOMEM; spin_lock_init(&tu->qlock); init_waitqueue_head(&tu->qchange_sleep); - mutex_init(&tu->tread_sem); + mutex_init(&tu->ioctl_lock); tu->ticks = 1; tu->queue_size = 128; tu->queue = kmalloc(tu->queue_size * sizeof(struct snd_timer_read), @@ -1283,8 +1283,10 @@ static int snd_timer_user_release(struct inode *inode, struct file *file) if (file->private_data) { tu = file->private_data; file->private_data = NULL; + mutex_lock(&tu->ioctl_lock); if (tu->timeri) snd_timer_close(tu->timeri); + mutex_unlock(&tu->ioctl_lock); kfree(tu->queue); kfree(tu->tqueue); kfree(tu); @@ -1522,7 +1524,6 @@ static int snd_timer_user_tselect(struct file *file, int err = 0; tu = file->private_data; - mutex_lock(&tu->tread_sem); if (tu->timeri) { snd_timer_close(tu->timeri); tu->timeri = NULL; @@ -1566,7 +1567,6 @@ static int snd_timer_user_tselect(struct file *file, } __err: - mutex_unlock(&tu->tread_sem); return err; } @@ -1779,7 +1779,7 @@ enum { SNDRV_TIMER_IOCTL_PAUSE_OLD = _IO('T', 0x23), }; -static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, +static long __snd_timer_user_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_timer_user *tu; @@ -1796,17 +1796,11 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, { int xarg; - mutex_lock(&tu->tread_sem); - if (tu->timeri) { /* too late */ - mutex_unlock(&tu->tread_sem); + if (tu->timeri) /* too late */ return -EBUSY; - } - if (get_user(xarg, p)) { - mutex_unlock(&tu->tread_sem); + if (get_user(xarg, p)) return -EFAULT; - } tu->tread = xarg ? 1 : 0; - mutex_unlock(&tu->tread_sem); return 0; } case SNDRV_TIMER_IOCTL_GINFO: @@ -1839,6 +1833,18 @@ static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, return -ENOTTY; } +static long snd_timer_user_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct snd_timer_user *tu = file->private_data; + long ret; + + mutex_lock(&tu->ioctl_lock); + ret = __snd_timer_user_ioctl(file, cmd, arg); + mutex_unlock(&tu->ioctl_lock); + return ret; +} + static int snd_timer_user_fasync(int fd, struct file * file, int on) { struct snd_timer_user *tu; From dc5697eb3297920e20b53fdf4c40891e1ed0eafd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Jan 2016 21:35:06 +0100 Subject: [PATCH 1556/2091] ALSA: timer: Fix double unlink of active_list commit ee8413b01045c74340aa13ad5bdf905de32be736 upstream. ALSA timer instance object has a couple of linked lists and they are unlinked unconditionally at snd_timer_stop(). Meanwhile snd_timer_interrupt() unlinks it, but it calls list_del() which leaves the element list itself unchanged. This ends up with unlinking twice, and it was caught by syzkaller fuzzer. The fix is to use list_del_init() variant properly there, too. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index f5ec1ba4c4bb5..3d2ba526093a6 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -704,7 +704,7 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (--timer->running) - list_del(&ti->active_list); + list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) || (ti->flags & SNDRV_TIMER_IFLG_FAST)) From d1a55757fefff39a7b71305a78c2722d3b9b6529 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 18 Jan 2016 21:35:01 +0800 Subject: [PATCH 1557/2091] ALSA: seq: Fix snd_seq_call_port_info_ioctl in compat mode commit 9586495dc3011a80602329094e746dbce16cb1f1 upstream. This reverts one hunk of commit ef44a1ec6eee ("ALSA: sound/core: use memdup_user()"), which replaced a number of kmalloc followed by memcpy with memdup calls. In this case, we are copying from a struct snd_seq_port_info32 to a struct snd_seq_port_info, but the latter is 4 bytes longer than the 32-bit version, so we need to separate kmalloc and copy calls. Fixes: ef44a1ec6eee ('ALSA: sound/core: use memdup_user()') Signed-off-by: Nicolas Boichat Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_compat.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_compat.c b/sound/core/seq/seq_compat.c index 81f7c109dc46e..65175902a68a8 100644 --- a/sound/core/seq/seq_compat.c +++ b/sound/core/seq/seq_compat.c @@ -49,11 +49,12 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned struct snd_seq_port_info *data; mm_segment_t fs; - data = memdup_user(data32, sizeof(*data32)); - if (IS_ERR(data)) - return PTR_ERR(data); + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; - if (get_user(data->flags, &data32->flags) || + if (copy_from_user(data, data32, sizeof(*data32)) || + get_user(data->flags, &data32->flags) || get_user(data->time_queue, &data32->time_queue)) goto error; data->kernel = NULL; From 281bedb8728f02a9b8bd5f5e342883e6d255abdc Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 18 Jan 2016 21:35:00 +0800 Subject: [PATCH 1558/2091] ALSA: pcm: Fix snd_pcm_hw_params struct copy in compat mode commit 43c54b8c7cfe22f868a751ba8a59abf1724160b1 upstream. This reverts one hunk of commit ef44a1ec6eee ("ALSA: sound/core: use memdup_user()"), which replaced a number of kmalloc followed by memcpy with memdup calls. In this case, we are copying from a struct snd_pcm_hw_params32 to a struct snd_pcm_hw_params, but the latter is 4 bytes longer than the 32-bit version, so we need to separate kmalloc and copy calls. This actually leads to an out-of-bounds memory access later on in sound/soc/soc-pcm.c:soc_pcm_hw_params() (detected using KASan). Fixes: ef44a1ec6eee ('ALSA: sound/core: use memdup_user()') Signed-off-by: Nicolas Boichat Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_compat.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index b48b434444ed0..9630e9f72b7ba 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -255,10 +255,15 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream, if (! (runtime = substream->runtime)) return -ENOTTY; - /* only fifo_size is different, so just copy all */ - data = memdup_user(data32, sizeof(*data32)); - if (IS_ERR(data)) - return PTR_ERR(data); + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* only fifo_size (RO from userspace) is different, so just copy all */ + if (copy_from_user(data, data32, sizeof(*data32))) { + err = -EFAULT; + goto error; + } if (refine) err = snd_pcm_hw_refine(substream, data); From 965b1203f399676ac4989a0876336e212a71085b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Jan 2016 13:52:47 +0100 Subject: [PATCH 1559/2091] ALSA: hrtimer: Fix stall by hrtimer_cancel() commit 2ba1fe7a06d3624f9a7586d672b55f08f7c670f3 upstream. hrtimer_cancel() waits for the completion from the callback, thus it must not be called inside the callback itself. This was already a problem in the past with ALSA hrtimer driver, and the early commit [fcfdebe70759: ALSA: hrtimer - Fix lock-up] tried to address it. However, the previous fix is still insufficient: it may still cause a lockup when the ALSA timer instance reprograms itself in its callback. Then it invokes the start function even in snd_timer_interrupt() that is called in hrtimer callback itself, results in a CPU stall. This is no hypothetical problem but actually triggered by syzkaller fuzzer. This patch tries to fix the issue again. Now we call hrtimer_try_to_cancel() at both start and stop functions so that it won't fall into a deadlock, yet giving some chance to cancel the queue if the functions have been called outside the callback. The proper hrtimer_cancel() is called in anyway at closing, so this should be enough. Reported-and-tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/hrtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index 886be7da989d1..38514ed6e55c2 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -90,7 +90,7 @@ static int snd_hrtimer_start(struct snd_timer *t) struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0); - hrtimer_cancel(&stime->hrt); + hrtimer_try_to_cancel(&stime->hrt); hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); atomic_set(&stime->running, 1); @@ -101,6 +101,7 @@ static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; atomic_set(&stime->running, 0); + hrtimer_try_to_cancel(&stime->hrt); return 0; } From efcf073d7fa1aa1fb2a8d3f3db373e1a5ecbc60a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Jan 2016 14:12:40 +0100 Subject: [PATCH 1560/2091] ALSA: control: Avoid kernel warnings from tlv ioctl with numid 0 commit c0bcdbdff3ff73a54161fca3cb8b6cdbd0bb8762 upstream. When a TLV ioctl with numid zero is handled, the driver may spew a kernel warning with a stack trace at each call. The check was intended obviously only for a kernel driver, but not for a user interaction. Let's fix it. This was spotted by syzkaller fuzzer. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/control.c b/sound/core/control.c index 196a6fe100ca8..a85d45595d02a 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1405,6 +1405,8 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, return -EFAULT; if (tlv.length < sizeof(unsigned int) * 2) return -EINVAL; + if (!tlv.numid) + return -EINVAL; down_read(&card->controls_rwsem); kctl = snd_ctl_find_numid(card, tlv.numid); if (kctl == NULL) { From a63fabd4ac6c29f01fa630e43c1597e8e6680a97 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Jan 2016 09:17:30 +0100 Subject: [PATCH 1561/2091] ALSA: hda - Fix bass pin fixup for ASUS N550JX commit db8948e653e12b218058bb6696f4a33fa7845f64 upstream. ASUS N550JX (PCI SSID 1043:13df) requires the same fixup for a bass speaker output pin as other N550 models. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110001 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4f1ce50b22f23..8189f02f84462 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6526,6 +6526,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A), + SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), From 2b332719b8f033d77070a4b58e41105b658dc2eb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 Jan 2016 17:19:02 +0100 Subject: [PATCH 1562/2091] ALSA: hda - Flush the pending probe work at remove commit 991f86d7ae4e1f8c15806e62f97af519e3cdd860 upstream. As HD-audio driver does deferred probe internally via workqueue, the driver might go into the mixed state doing both probe and remove when the module gets unloaded during the probe work. This eventually triggers an Oops, unsurprisingly. For avoiding this race, we just need to flush the pending probe work explicitly before actually starting the resource release. Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=960710 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 38d8ba90c5256..09920ba55ba19 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1972,9 +1972,17 @@ static int azx_probe_continue(struct azx *chip) static void azx_remove(struct pci_dev *pci) { struct snd_card *card = pci_get_drvdata(pci); + struct azx *chip; + struct hda_intel *hda; + + if (card) { + /* flush the pending probing work */ + chip = card->private_data; + hda = container_of(chip, struct hda_intel, chip); + flush_work(&hda->probe_work); - if (card) snd_card_free(card); + } } static void azx_shutdown(struct pci_dev *pci) From b309c8bf5a4a524b30f9e4013828eed9dbea1e5c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Jan 2016 17:19:31 +0100 Subject: [PATCH 1563/2091] ALSA: timer: Handle disconnection more safely commit 230323dac060123c340cf75997971145a42661ee upstream. Currently ALSA timer device doesn't take the disconnection into account very well; it merely unlinks the timer device at disconnection callback but does nothing else. Because of this, when an application accessing the timer device is disconnected, it may release the resource before actually closed. In most cases, it results in a warning message indicating a leftover timer instance like: ALSA: timer xxxx is busy? But basically this is an open race. This patch tries to address it. The strategy is like other ALSA devices: namely, - Manage card's refcount at each open/close - Wake up the pending tasks at disconnection - Check the shutdown flag appropriately at each possible call Note that this patch has one ugly hack to handle the wakeup of pending tasks. It'd be cleaner to introduce a new disconnect op to snd_timer_instance ops. But since it would lead to internal ABI breakage and it eventually increase my own work when backporting to stable kernels, I took a different path to implement locally in timer.c. A cleanup patch will follow at next for 4.5 kernel. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109431 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 3d2ba526093a6..a419878901c46 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -65,6 +65,7 @@ struct snd_timer_user { int qtail; int qused; int queue_size; + bool disconnected; struct snd_timer_read *queue; struct snd_timer_tread *tqueue; spinlock_t qlock; @@ -290,6 +291,9 @@ int snd_timer_open(struct snd_timer_instance **ti, mutex_unlock(®ister_mutex); return -ENOMEM; } + /* take a card refcount for safe disconnection */ + if (timer->card) + get_device(&timer->card->card_dev); timeri->slave_class = tid->dev_sclass; timeri->slave_id = slave_id; if (list_empty(&timer->open_list_head) && timer->hw.open) @@ -360,6 +364,9 @@ int snd_timer_close(struct snd_timer_instance *timeri) } spin_unlock(&timer->lock); spin_unlock_irq(&slave_active_lock); + /* release a card refcount for safe disconnection */ + if (timer->card) + put_device(&timer->card->card_dev); mutex_unlock(®ister_mutex); } out: @@ -475,6 +482,8 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks) timer = timeri->timer; if (timer == NULL) return -EINVAL; + if (timer->card && timer->card->shutdown) + return -ENODEV; spin_lock_irqsave(&timer->lock, flags); timeri->ticks = timeri->cticks = ticks; timeri->pticks = 0; @@ -509,6 +518,10 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, spin_lock_irqsave(&timer->lock, flags); list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); + if (timer->card && timer->card->shutdown) { + spin_unlock_irqrestore(&timer->lock, flags); + return 0; + } if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) && !(--timer->running)) { timer->hw.stop(timer); @@ -571,6 +584,8 @@ int snd_timer_continue(struct snd_timer_instance *timeri) timer = timeri->timer; if (! timer) return -EINVAL; + if (timer->card && timer->card->shutdown) + return -ENODEV; spin_lock_irqsave(&timer->lock, flags); if (!timeri->cticks) timeri->cticks = 1; @@ -634,6 +649,9 @@ static void snd_timer_tasklet(unsigned long arg) unsigned long resolution, ticks; unsigned long flags; + if (timer->card && timer->card->shutdown) + return; + spin_lock_irqsave(&timer->lock, flags); /* now process all callbacks */ while (!list_empty(&timer->sack_list_head)) { @@ -674,6 +692,9 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) if (timer == NULL) return; + if (timer->card && timer->card->shutdown) + return; + spin_lock_irqsave(&timer->lock, flags); /* remember the current resolution */ @@ -884,11 +905,28 @@ static int snd_timer_dev_register(struct snd_device *dev) return 0; } +/* just for reference in snd_timer_dev_disconnect() below */ +static void snd_timer_user_ccallback(struct snd_timer_instance *timeri, + int event, struct timespec *tstamp, + unsigned long resolution); + static int snd_timer_dev_disconnect(struct snd_device *device) { struct snd_timer *timer = device->device_data; + struct snd_timer_instance *ti; + mutex_lock(®ister_mutex); list_del_init(&timer->device_list); + /* wake up pending sleepers */ + list_for_each_entry(ti, &timer->open_list_head, open_list) { + /* FIXME: better to have a ti.disconnect() op */ + if (ti->ccallback == snd_timer_user_ccallback) { + struct snd_timer_user *tu = ti->callback_data; + + tu->disconnected = true; + wake_up(&tu->qchange_sleep); + } + } mutex_unlock(®ister_mutex); return 0; } @@ -899,6 +937,8 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam unsigned long resolution = 0; struct snd_timer_instance *ti, *ts; + if (timer->card && timer->card->shutdown) + return; if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) return; if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART || @@ -1057,6 +1097,8 @@ static void snd_timer_proc_read(struct snd_info_entry *entry, mutex_lock(®ister_mutex); list_for_each_entry(timer, &snd_timer_list, device_list) { + if (timer->card && timer->card->shutdown) + continue; switch (timer->tmr_class) { case SNDRV_TIMER_CLASS_GLOBAL: snd_iprintf(buffer, "G%i: ", timer->tmr_device); @@ -1882,6 +1924,10 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, remove_wait_queue(&tu->qchange_sleep, &wait); + if (tu->disconnected) { + err = -ENODEV; + break; + } if (signal_pending(current)) { err = -ERESTARTSYS; break; @@ -1931,6 +1977,8 @@ static unsigned int snd_timer_user_poll(struct file *file, poll_table * wait) mask = 0; if (tu->qused) mask |= POLLIN | POLLRDNORM; + if (tu->disconnected) + mask |= POLLERR; return mask; } From 1c98861e646744157dc97cf24d2d584774521452 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 5 Nov 2015 23:53:03 +0000 Subject: [PATCH 1564/2091] ASoC: rsnd: fixup SCU_SYS_INT_EN1 address commit 021c5d9469960b8c68aa1d1825f7bfd8d61e157d upstream. cfcefe0126 ("ASoC: rsnd: add recovery support for under/over flow error on SRC") added SCU_SYS_INT_EN1 address, but it should be 0x1d4, not 0x1c4. This patch fixup it. Fixes: cfcefe0126 ("ASoC: rsnd: add recovery support for under/over flow error on SRC") Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/sh/rcar/gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c index 8c7dc51b1c4fd..f7a0cb786d5c3 100644 --- a/sound/soc/sh/rcar/gen.c +++ b/sound/soc/sh/rcar/gen.c @@ -214,7 +214,7 @@ static int rsnd_gen2_probe(struct platform_device *pdev, RSND_GEN_S_REG(SCU_SYS_STATUS0, 0x1c8), RSND_GEN_S_REG(SCU_SYS_INT_EN0, 0x1cc), RSND_GEN_S_REG(SCU_SYS_STATUS1, 0x1d0), - RSND_GEN_S_REG(SCU_SYS_INT_EN1, 0x1c4), + RSND_GEN_S_REG(SCU_SYS_INT_EN1, 0x1d4), RSND_GEN_M_REG(SRC_SWRSR, 0x200, 0x40), RSND_GEN_M_REG(SRC_SRCIR, 0x204, 0x40), RSND_GEN_M_REG(SRC_ADINR, 0x214, 0x40), From 64fa05313b0aa4802e0d5be0331eb6cd68baf7a0 Mon Sep 17 00:00:00 2001 From: Sachin Pandhare Date: Tue, 10 Nov 2015 23:38:02 +0530 Subject: [PATCH 1565/2091] ASoC: wm8962: correct addresses for HPF_C_0/1 commit e9f96bc53c1b959859599cb30ce6fd4fbb4448c2 upstream. From datasheet: R17408 (4400h) HPF_C_1 R17409 (4401h) HPF_C_0 17048 -> 17408 (0x4400) 17049 -> 17409 (0x4401) Signed-off-by: Sachin Pandhare Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8962.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 118b0034ba235..154c1a24a303f 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -365,8 +365,8 @@ static struct reg_default wm8962_reg[] = { { 16924, 0x0059 }, /* R16924 - HDBASS_PG_1 */ { 16925, 0x999A }, /* R16925 - HDBASS_PG_0 */ - { 17048, 0x0083 }, /* R17408 - HPF_C_1 */ - { 17049, 0x98AD }, /* R17409 - HPF_C_0 */ + { 17408, 0x0083 }, /* R17408 - HPF_C_1 */ + { 17409, 0x98AD }, /* R17409 - HPF_C_0 */ { 17920, 0x007F }, /* R17920 - ADCL_RETUNE_C1_1 */ { 17921, 0xFFFF }, /* R17921 - ADCL_RETUNE_C1_0 */ From 5a1109e05e0907b324db88b8e376fb5174facd2c Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 9 Dec 2015 11:38:13 +0000 Subject: [PATCH 1566/2091] ASoC: es8328: Fix deemphasis values commit 84ebac4d04d25ac5c1b1dc3ae621fd465eb38f4e upstream. This is using completely the wrong mask and value when updating the register. Since the correct values are already defined in the header, switch to using a table with explicit constants rather than shifting the array index. Signed-off-by: John Keeping Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/es8328.c | 25 +++++++++++++++++-------- sound/soc/codecs/es8328.h | 1 + 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index c5f35a07e8e48..3ad7f5be1cfa9 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -85,7 +85,15 @@ static const DECLARE_TLV_DB_SCALE(pga_tlv, 0, 300, 0); static const DECLARE_TLV_DB_SCALE(bypass_tlv, -1500, 300, 0); static const DECLARE_TLV_DB_SCALE(mic_tlv, 0, 300, 0); -static const int deemph_settings[] = { 0, 32000, 44100, 48000 }; +static const struct { + int rate; + unsigned int val; +} deemph_settings[] = { + { 0, ES8328_DACCONTROL6_DEEMPH_OFF }, + { 32000, ES8328_DACCONTROL6_DEEMPH_32k }, + { 44100, ES8328_DACCONTROL6_DEEMPH_44_1k }, + { 48000, ES8328_DACCONTROL6_DEEMPH_48k }, +}; static int es8328_set_deemph(struct snd_soc_codec *codec) { @@ -97,21 +105,22 @@ static int es8328_set_deemph(struct snd_soc_codec *codec) * rate. */ if (es8328->deemph) { - best = 1; - for (i = 2; i < ARRAY_SIZE(deemph_settings); i++) { - if (abs(deemph_settings[i] - es8328->playback_fs) < - abs(deemph_settings[best] - es8328->playback_fs)) + best = 0; + for (i = 1; i < ARRAY_SIZE(deemph_settings); i++) { + if (abs(deemph_settings[i].rate - es8328->playback_fs) < + abs(deemph_settings[best].rate - es8328->playback_fs)) best = i; } - val = best << 1; + val = deemph_settings[best].val; } else { - val = 0; + val = ES8328_DACCONTROL6_DEEMPH_OFF; } dev_dbg(codec->dev, "Set deemphasis %d\n", val); - return snd_soc_update_bits(codec, ES8328_DACCONTROL6, 0x6, val); + return snd_soc_update_bits(codec, ES8328_DACCONTROL6, + ES8328_DACCONTROL6_DEEMPH_MASK, val); } static int es8328_get_deemph(struct snd_kcontrol *kcontrol, diff --git a/sound/soc/codecs/es8328.h b/sound/soc/codecs/es8328.h index cb36afe10c0ec..156c748c89c7e 100644 --- a/sound/soc/codecs/es8328.h +++ b/sound/soc/codecs/es8328.h @@ -153,6 +153,7 @@ int es8328_probe(struct device *dev, struct regmap *regmap); #define ES8328_DACCONTROL6_CLICKFREE (1 << 3) #define ES8328_DACCONTROL6_DAC_INVR (1 << 4) #define ES8328_DACCONTROL6_DAC_INVL (1 << 5) +#define ES8328_DACCONTROL6_DEEMPH_MASK (3 << 6) #define ES8328_DACCONTROL6_DEEMPH_OFF (0 << 6) #define ES8328_DACCONTROL6_DEEMPH_32k (1 << 6) #define ES8328_DACCONTROL6_DEEMPH_44_1k (2 << 6) From 1fce19176555fdad16a9933965f36e514514d092 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 11 Dec 2015 11:27:08 +0000 Subject: [PATCH 1567/2091] ASoC: wm8974: set cache type for regmap commit 1ea5998afe903384ddc16391d4c023cd4c867bea upstream. Attempting to use this codec driver triggers a BUG() in regcache_sync() since no cache type is set. The register map of this device is fairly small and has few holes so a flat cache is suitable. Signed-off-by: Mans Rullgard Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8974.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c index ff0e4646b934c..88317c1b7f96c 100644 --- a/sound/soc/codecs/wm8974.c +++ b/sound/soc/codecs/wm8974.c @@ -575,6 +575,7 @@ static const struct regmap_config wm8974_regmap = { .max_register = WM8974_MONOMIX, .reg_defaults = wm8974_reg_defaults, .num_reg_defaults = ARRAY_SIZE(wm8974_reg_defaults), + .cache_type = REGCACHE_FLAT, }; static int wm8974_probe(struct snd_soc_codec *codec) From 6ac84206a212c8d86277680ef6e9066020fc54f7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 11 Dec 2015 13:06:24 +0200 Subject: [PATCH 1568/2091] ASoC: davinci-mcasp: Fix XDATA check in mcasp_start_tx commit e2a0c9fa80227be5ee017b5476638829dd41cb39 upstream. The condition for checking for XDAT being cleared was not correct. Fixes: 36bcecd0a73eb ("ASoC: davinci-mcasp: Correct TX start sequence") Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/davinci/davinci-mcasp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 23c91fa65ab8f..76dd8c6aa4f0e 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -221,8 +221,8 @@ static void mcasp_start_tx(struct davinci_mcasp *mcasp) /* wait for XDATA to be cleared */ cnt = 0; - while (!(mcasp_get_reg(mcasp, DAVINCI_MCASP_TXSTAT_REG) & - ~XRDATA) && (cnt < 100000)) + while ((mcasp_get_reg(mcasp, DAVINCI_MCASP_TXSTAT_REG) & XRDATA) && + (cnt < 100000)) cnt++; /* Release TX state machine */ From 20091f9139bc0a17544a45b51fd3a7780552682f Mon Sep 17 00:00:00 2001 From: Nikesh Oswal Date: Wed, 23 Dec 2015 14:18:05 +0000 Subject: [PATCH 1569/2091] ASoC: arizona: Fix bclk for sample rates that are multiple of 4kHz commit e73694d871867cae8471d2350ce89acb38bc2b63 upstream. For a sample rate of 12kHz the bclk was taken from the 44.1kHz table as we test for a multiple of 8kHz. This patch fixes this issue by testing for multiples of 4kHz instead. Signed-off-by: Nikesh Oswal Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/arizona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index ee91edcf3cb0d..13191891fc4cc 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1354,7 +1354,7 @@ static int arizona_hw_params(struct snd_pcm_substream *substream, bool reconfig; unsigned int aif_tx_state, aif_rx_state; - if (params_rate(params) % 8000) + if (params_rate(params) % 4000) rates = &arizona_44k1_bclk_rates[0]; else rates = &arizona_48k_bclk_rates[0]; From 1f6a2cc39913518756d8b4dad4a10493d208f4c2 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 7 Jan 2016 21:48:14 +0530 Subject: [PATCH 1570/2091] ASoC: compress: Fix compress device direction check commit a1068045883ed4a18363a4ebad0c3d55e473b716 upstream. The detection of direction for compress was only taking into account codec capabilities and not CPU ones. Fix this by checking the CPU side capabilities as well Tested-by: Ashish Panwar Signed-off-by: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-compress.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 025c38fbe3c03..1874cf0e6caba 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -623,6 +623,7 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) struct snd_pcm *be_pcm; char new_name[64]; int ret = 0, direction = 0; + int playback = 0, capture = 0; if (rtd->num_codecs > 1) { dev_err(rtd->card->dev, "Multicodec not supported for compressed stream\n"); @@ -634,11 +635,27 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) rtd->dai_link->stream_name, codec_dai->name, num); if (codec_dai->driver->playback.channels_min) + playback = 1; + if (codec_dai->driver->capture.channels_min) + capture = 1; + + capture = capture && cpu_dai->driver->capture.channels_min; + playback = playback && cpu_dai->driver->playback.channels_min; + + /* + * Compress devices are unidirectional so only one of the directions + * should be set, check for that (xor) + */ + if (playback + capture != 1) { + dev_err(rtd->card->dev, "Invalid direction for compress P %d, C %d\n", + playback, capture); + return -EINVAL; + } + + if(playback) direction = SND_COMPRESS_PLAYBACK; - else if (codec_dai->driver->capture.channels_min) - direction = SND_COMPRESS_CAPTURE; else - return -EINVAL; + direction = SND_COMPRESS_CAPTURE; compr = kzalloc(sizeof(*compr), GFP_KERNEL); if (compr == NULL) { From abcbfda367d4eb2b17dc9c5b036c99431ed3ea3c Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 4 Dec 2015 15:53:43 +0200 Subject: [PATCH 1571/2091] usb: xhci: fix config fail of FS hub behind a HS hub with MTT commit 096b110a3dd3c868e4610937c80d2e3f3357c1a9 upstream. if a full speed hub connects to a high speed hub which supports MTT, the MTT field of its slot context will be set to 1 when xHCI driver setups an xHCI virtual device in xhci_setup_addressable_virt_dev(); once usb core fetch its hub descriptor, and need to update the xHC's internal data structures for the device, the HUB field of its slot context will be set to 1 too, meanwhile MTT is also set before, this will cause configure endpoint command fail, so in the case, we should clear MTT to 0 for full speed hub according to section 6.2.2 Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1e6d7579709e7..471f73e151ae5 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4794,8 +4794,16 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ctrl_ctx->add_flags |= cpu_to_le32(SLOT_FLAG); slot_ctx = xhci_get_slot_ctx(xhci, config_cmd->in_ctx); slot_ctx->dev_info |= cpu_to_le32(DEV_HUB); + /* + * refer to section 6.2.2: MTT should be 0 for full speed hub, + * but it may be already set to 1 when setup an xHCI virtual + * device, so clear it anyway. + */ if (tt->multi) slot_ctx->dev_info |= cpu_to_le32(DEV_MTT); + else if (hdev->speed == USB_SPEED_FULL) + slot_ctx->dev_info &= cpu_to_le32(~DEV_MTT); + if (xhci->hci_version > 0x95) { xhci_dbg(xhci, "xHCI version %x needs hub " "TT think time and number of ports\n", From 5241134a79fa2fc2d45b66b2651c8467818482db Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Mon, 26 Oct 2015 18:58:14 -0200 Subject: [PATCH 1572/2091] airspy: increase USB control message buffer size commit aa0850e1d56623845b46350ffd971afa9241886d upstream. Driver requested device firmware version string during probe using only 24 byte long buffer. That buffer is too small for newer firmware versions, which causes device firmware hang - device stops responding to any commands after that. Increase buffer size to 128 which should be enough for any current and future version strings. Link: https://github.com/airspy/host/issues/27 Reported-by: Benjamin Vernoux Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/airspy/airspy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c index 4069234abed54..a50750ce511dd 100644 --- a/drivers/media/usb/airspy/airspy.c +++ b/drivers/media/usb/airspy/airspy.c @@ -132,7 +132,7 @@ struct airspy { int urbs_submitted; /* USB control message buffer */ - #define BUF_SIZE 24 + #define BUF_SIZE 128 u8 buf[BUF_SIZE]; /* Current configuration */ From a7e83b16c8d83a75c58989e845c664ecaa6e0aa6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 16 Dec 2015 13:32:38 -0500 Subject: [PATCH 1573/2091] USB: fix invalid memory access in hub_activate() commit e50293ef9775c5f1cf3fcc093037dd6a8c5684ea upstream. Commit 8520f38099cc ("USB: change hub initialization sleeps to delayed_work") changed the hub_activate() routine to make part of it run in a workqueue. However, the commit failed to take a reference to the usb_hub structure or to lock the hub interface while doing so. As a result, if a hub is plugged in and quickly unplugged before the work routine can run, the routine will try to access memory that has been deallocated. Or, if the hub is unplugged while the routine is running, the memory may be deallocated while it is in active use. This patch fixes the problem by taking a reference to the usb_hub at the start of hub_activate() and releasing it at the end (when the work is finished), and by locking the hub interface while the work routine is running. It also adds a check at the start of the routine to see if the hub has already been disconnected, in which nothing should be done. Signed-off-by: Alan Stern Reported-by: Alexandru Cornea Tested-by: Alexandru Cornea Fixes: 8520f38099cc ("USB: change hub initialization sleeps to delayed_work") CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d68c4a4db6827..ee11b301f3dad 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1034,10 +1034,20 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) unsigned delay; /* Continue a partial initialization */ - if (type == HUB_INIT2) - goto init2; - if (type == HUB_INIT3) + if (type == HUB_INIT2 || type == HUB_INIT3) { + device_lock(hub->intfdev); + + /* Was the hub disconnected while we were waiting? */ + if (hub->disconnected) { + device_unlock(hub->intfdev); + kref_put(&hub->kref, hub_release); + return; + } + if (type == HUB_INIT2) + goto init2; goto init3; + } + kref_get(&hub->kref); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1235,6 +1245,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) queue_delayed_work(system_power_efficient_wq, &hub->init_work, msecs_to_jiffies(delay)); + device_unlock(hub->intfdev); return; /* Continues at init3: below */ } else { msleep(delay); @@ -1256,6 +1267,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Allow autosuspend if it was suppressed */ if (type <= HUB_INIT3) usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); + + if (type == HUB_INIT2 || type == HUB_INIT3) + device_unlock(hub->intfdev); + + kref_put(&hub->kref, hub_release); } /* Implement the continuations for the delays above */ From 887c3cc2557b3548eabcce9e03dce470303f29e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2015 14:06:37 +0300 Subject: [PATCH 1574/2091] USB: ipaq.c: fix a timeout loop commit abdc9a3b4bac97add99e1d77dc6d28623afe682b upstream. The code expects the loop to end with "retries" set to zero but, because it is a post-op, it will end set to -1. I have fixed this by moving the decrement inside the loop. Fixes: 014aa2a3c32e ('USB: ipaq: minor ipaq_open() cleanup.') Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ipaq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index f51a5d52c0eda..ec1b8f2c11837 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -531,7 +531,8 @@ static int ipaq_open(struct tty_struct *tty, * through. Since this has a reasonably high failure rate, we retry * several times. */ - while (retries--) { + while (retries) { + retries--; result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), 0x22, 0x21, 0x1, 0, NULL, 0, 100); From 3f5e25aa380d0ed5a38a824049ed520dabb149a9 Mon Sep 17 00:00:00 2001 From: Oliver Freyermuth Date: Mon, 28 Dec 2015 18:37:38 +0100 Subject: [PATCH 1575/2091] USB: cp210x: add ID for ELV Marble Sound Board 1 commit f7d7f59ab124748156ea551edf789994f05da342 upstream. Add the USB device ID for ELV Marble Sound Board 1. Signed-off-by: Oliver Freyermuth Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7d4f51a32e66f..59b2126b21a39 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -160,6 +160,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ From 3fabd53542c866e28e13a2cff43904d7a66308e0 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 3 Dec 2015 15:03:34 +0100 Subject: [PATCH 1576/2091] xhci: refuse loading if nousb is used commit 1eaf35e4dd592c59041bc1ed3248c46326da1f5f upstream. The module should fail to load. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 471f73e151ae5..f6bb118e4501f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -5054,6 +5054,10 @@ static int __init xhci_hcd_init(void) BUILD_BUG_ON(sizeof(struct xhci_intr_reg) != 8*32/8); /* xhci_run_regs has eight fields and embeds 128 xhci_intr_regs */ BUILD_BUG_ON(sizeof(struct xhci_run_regs) != (8+8*128)*32/8); + + if (usb_disabled()) + return -ENODEV; + return 0; } From 57d0c018b46ef5293da715af12c8adc32aa99847 Mon Sep 17 00:00:00 2001 From: Vijay Pandurangan Date: Fri, 18 Dec 2015 14:34:59 -0500 Subject: [PATCH 1577/2091] =?UTF-8?q?veth:=20don=E2=80=99t=20modify=20ip?= =?UTF-8?q?=5Fsummed;=20doing=20so=20treats=20packets=20with=20bad=20check?= =?UTF-8?q?sums=20as=20good.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ce8c839b74e3017996fad4e1b7ba2e2625ede82f ] Packets that arrive from real hardware devices have ip_summed == CHECKSUM_UNNECESSARY if the hardware verified the checksums, or CHECKSUM_NONE if the packet is bad or it was unable to verify it. The current version of veth will replace CHECKSUM_NONE with CHECKSUM_UNNECESSARY, which causes corrupt packets routed from hardware to a veth device to be delivered to the application. This caused applications at Twitter to receive corrupt data when network hardware was corrupting packets. We believe this was added as an optimization to skip computing and verifying checksums for communication between containers. However, locally generated packets have ip_summed == CHECKSUM_PARTIAL, so the code as written does nothing for them. As far as we can tell, after removing this code, these packets are transmitted from one stack to another unmodified (tcpdump shows invalid checksums on both sides, as expected), and they are delivered correctly to applications. We didn’t test every possible network configuration, but we tried a few common ones such as bridging containers, using NAT between the host and a container, and routing from hardware devices to containers. We have effectively deployed this in production at Twitter (by disabling RX checksum offloading on veth devices). This code dates back to the first version of the driver, commit ("[NET]: Virtual ethernet device driver"), so I suspect this bug occurred mostly because the driver API has evolved significantly since then. Commit <0b7967503dc97864f283a> ("net/veth: Fix packet checksumming") (in December 2010) fixed this for packets that get created locally and sent to hardware devices, by not changing CHECKSUM_PARTIAL. However, the same issue still occurs for packets coming in from hardware devices. Co-authored-by: Evan Jones Signed-off-by: Evan Jones Cc: Nicolas Dichtel Cc: Phil Sutter Cc: Toshiaki Makita Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Vijay Pandurangan Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/veth.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index c8186ffda1a31..2e61a799f32a9 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -117,12 +117,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); goto drop; } - /* don't change ip_summed == CHECKSUM_PARTIAL, as that - * will cause bad checksum on forwarded packets - */ - if (skb->ip_summed == CHECKSUM_NONE && - rcv->features & NETIF_F_RXCSUM) - skb->ip_summed = CHECKSUM_UNNECESSARY; if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); From 8a39e24931d119c0418721923281d2b496275ad3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 21 Dec 2015 12:54:45 +0300 Subject: [PATCH 1578/2091] ipv6/addrlabel: fix ip6addrlbl_get() [ Upstream commit e459dfeeb64008b2d23bdf600f03b3605dbb8152 ] ip6addrlbl_get() has never worked. If ip6addrlbl_hold() succeeded, ip6addrlbl_get() will exit with '-ESRCH'. If ip6addrlbl_hold() failed, ip6addrlbl_get() will use about to be free ip6addrlbl_entry pointer. Fix this by inverting ip6addrlbl_hold() check. Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") Signed-off-by: Andrey Ryabinin Reviewed-by: Cong Wang Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrlabel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 882124ebb438b..a8f6986dcbe5e 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); - if (p && ip6addrlbl_hold(p)) + if (p && !ip6addrlbl_hold(p)) p = NULL; lseq = ip6addrlbl_table.seq; rcu_read_unlock(); From 6866b52c56db189f8471a13f951221c45c0089c7 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 21 Dec 2015 10:55:45 -0800 Subject: [PATCH 1579/2091] addrconf: always initialize sysctl table data [ Upstream commit 5449a5ca9bc27dd51a462de7ca0b1cd861cd2bd0 ] When sysctl performs restrict writes, it allows to write from a middle position of a sysctl file, which requires us to initialize the table data before calling proc_dostring() for the write case. Fixes: 3d1bec99320d ("ipv6: introduce secret_stable to ipv6_devconf") Reported-by: Sasha Levin Acked-by: Hannes Frederic Sowa Tested-by: Sasha Levin Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a2d685030a34f..f4795b0d6e6ef 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5267,13 +5267,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, goto out; } - if (!write) { - err = snprintf(str, sizeof(str), "%pI6", - &secret->secret); - if (err >= sizeof(str)) { - err = -EIO; - goto out; - } + err = snprintf(str, sizeof(str), "%pI6", &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; } err = proc_dostring(&lctl, write, buffer, lenp, ppos); From 45d52b0b1f9f5c8717e9212ee6e4964dc8ed095c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 23 Dec 2015 13:42:43 +0100 Subject: [PATCH 1580/2091] net: cdc_ncm: avoid changing RX/TX buffers on MTU changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1dfddff5fcd869fcab0c52fafae099dfa435a935 ] NCM buffer sizes are negotiated with the device independently of the network device MTU. The RX buffers are allocated by the usbnet framework based on the rx_urb_size value set by cdc_ncm. A single RX buffer can hold a number of MTU sized packets. The default usbnet change_mtu ndo only modifies rx_urb_size if it is equal to hard_mtu. And the cdc_ncm driver will set rx_urb_size and hard_mtu independently of each other, based on dwNtbInMaxSize and dwNtbOutMaxSize respectively. It was therefore assumed that usbnet_change_mtu() would never touch rx_urb_size. This failed to consider the case where dwNtbInMaxSize and dwNtbOutMaxSize happens to be equal. Fix by implementing an NCM specific change_mtu ndo, modifying the netdev MTU without touching the buffer size settings. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/usb/cdc_ncm.c | 31 +++++++++++++++++++++++++++++++ include/linux/usb/cdc_ncm.h | 1 + 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e4b7a47a825c7..5efaa9ab5af59 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -100,7 +100,7 @@ static const struct net_device_ops cdc_mbim_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_change_mtu = usbnet_change_mtu, + .ndo_change_mtu = cdc_ncm_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = cdc_mbim_rx_add_vid, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8067b8fbb0eea..0b481c30979b6 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -687,6 +688,33 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) kfree(ctx); } +/* we need to override the usbnet change_mtu ndo for two reasons: + * - respect the negotiated maximum datagram size + * - avoid unwanted changes to rx and tx buffers + */ +int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) +{ + struct usbnet *dev = netdev_priv(net); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + int maxmtu = ctx->max_datagram_size - cdc_ncm_eth_hlen(dev); + + if (new_mtu <= 0 || new_mtu > maxmtu) + return -EINVAL; + net->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu); + +static const struct net_device_ops cdc_ncm_netdev_ops = { + .ndo_open = usbnet_open, + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_change_mtu = cdc_ncm_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) { const struct usb_cdc_union_desc *union_desc = NULL; @@ -861,6 +889,9 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ /* add our sysfs attrs */ dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group; + /* must handle MTU changes */ + dev->net->netdev_ops = &cdc_ncm_netdev_ops; + return 0; error2: diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c53..e7827ae2462c5 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -133,6 +133,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); +int cdc_ncm_change_mtu(struct net_device *net, int new_mtu); int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); From c000d76ec0d8bbadb7972b374bcf7ffe34deb592 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Dec 2015 17:49:25 +0800 Subject: [PATCH 1581/2091] sctp: sctp should release assoc when sctp_make_abort_user return NULL in sctp_close [ Upstream commit 068d8bd338e855286aea54e70d1c101569284b21 ] In sctp_close, sctp_make_abort_user may return NULL because of memory allocation failure. If this happens, it will bypass any state change and never free the assoc. The assoc has no chance to be freed and it will be kept in memory with the state it had even after the socket is closed by sctp_close(). So if sctp_make_abort_user fails to allocate memory, we should abort the asoc via sctp_primitive_ABORT as well. Just like the annotation in sctp_sf_cookie_wait_prm_abort and sctp_sf_do_9_1_prm_abort said, "Even if we can't send the ABORT due to low memory delete the TCB. This is a departure from our typical NOMEM handling". But then the chunk is NULL (low memory) and the SCTP_CMD_REPLY cmd would dereference the chunk pointer, and system crash. So we should add SCTP_CMD_REPLY cmd only when the chunk is not NULL, just like other places where it adds SCTP_CMD_REPLY cmd. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_statefuns.c | 6 ++++-- net/sctp/socket.c | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3ee27b7704ffb..e6bb98e583fb9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( retval = SCTP_DISPOSITION_CONSUME; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + if (abort) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + if (abort) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a63c2c87a0c6c..76e6ec62cf926 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1513,8 +1513,7 @@ static void sctp_close(struct sock *sk, long timeout) struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, NULL, 0); - if (chunk) - sctp_primitive_ABORT(net, asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); } else sctp_primitive_SHUTDOWN(net, asoc, NULL); } From f45f0213b831305572436cdd2f7f0709fcbe2887 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 31 Dec 2015 14:26:33 +0100 Subject: [PATCH 1582/2091] connector: bump skb->users before callback invocation [ Upstream commit 55285bf09427c5abf43ee1d54e892f352092b1f1 ] Dmitry reports memleak with syskaller program. Problem is that connector bumps skb usecount but might not invoke callback. So move skb_get to where we invoke the callback. Reported-by: Dmitry Vyukov Signed-off-by: Florian Westphal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/connector/connector.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 30f522848c735..c19e7fc717c30 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -178,26 +178,21 @@ static int cn_call_callback(struct sk_buff *skb) * * It checks skb, netlink header and msg sizes, and calls callback helper. */ -static void cn_rx_skb(struct sk_buff *__skb) +static void cn_rx_skb(struct sk_buff *skb) { struct nlmsghdr *nlh; - struct sk_buff *skb; int len, err; - skb = skb_get(__skb); - if (skb->len >= NLMSG_HDRLEN) { nlh = nlmsg_hdr(skb); len = nlmsg_len(nlh); if (len < (int)sizeof(struct cn_msg) || skb->len < nlh->nlmsg_len || - len > CONNECTOR_MAX_MSG_SIZE) { - kfree_skb(skb); + len > CONNECTOR_MAX_MSG_SIZE) return; - } - err = cn_call_callback(skb); + err = cn_call_callback(skb_get(skb)); if (err < 0) kfree_skb(skb); } From dc6b0ec667f67d4768e72c1b7f1bbc14ea52379c Mon Sep 17 00:00:00 2001 From: willy tarreau Date: Sun, 10 Jan 2016 07:54:56 +0100 Subject: [PATCH 1583/2091] unix: properly account for FDs passed over unix sockets [ Upstream commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 ] It is possible for a process to allocate and accumulate far more FDs than the process' limit by sending them over a unix socket then closing them to keep the process' fd count low. This change addresses this problem by keeping track of the number of FDs in flight per user and preventing non-privileged processes from having more FDs in flight than their configured FD limit. Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds Acked-by: Hannes Frederic Sowa Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 1 + net/unix/af_unix.c | 24 ++++++++++++++++++++---- net/unix/garbage.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 61f4f2d5c882b..9128b4e9f5418 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -802,6 +802,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a398f624c28dc..cb3a01a9ed384 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1481,6 +1481,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1489,6 +1504,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1510,10 +1528,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226f2d33f..8fcdc2283af50 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), From a6b1d24893805bb4f2e7c7bc6f86a98ec37065ee Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 5 Jan 2016 10:46:00 +0100 Subject: [PATCH 1584/2091] bridge: Only call /sbin/bridge-stp for the initial network namespace [ Upstream commit ff62198553e43cdffa9d539f6165d3e83f8a42bc ] [I stole this patch from Eric Biederman. He wrote:] > There is no defined mechanism to pass network namespace information > into /sbin/bridge-stp therefore don't even try to invoke it except > for bridge devices in the initial network namespace. > > It is possible for unprivileged users to cause /sbin/bridge-stp to be > invoked for any network device name which if /sbin/bridge-stp does not > guard against unreasonable arguments or being invoked twice on the > same network device could cause problems. [Hannes: changed patch using netns_eq] Cc: Eric W. Biederman Signed-off-by: Eric W. Biederman Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_stp_if.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 7832d07f48f66..ce658abdc2c8d 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -128,7 +128,10 @@ static void br_stp_start(struct net_bridge *br) char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (net_eq(dev_net(br->dev), &init_net)) + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + else + r = -ENOENT; spin_lock_bh(&br->lock); From 5596242a6263ece70ee14f3b6861f02b8dc82d11 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 5 Jan 2016 16:23:07 +0100 Subject: [PATCH 1585/2091] net: filter: make JITs zero A for SKF_AD_ALU_XOR_X [ Upstream commit 55795ef5469290f89f04e12e662ded604909e462 ] The SKF_AD_ALU_XOR_X ancillary is not like the other ancillary data instructions since it XORs A with X while all the others replace A with some loaded value. All the BPF JITs fail to clear A if this is used as the first instruction in a filter. This was found using american fuzzy lop. Add a helper to determine if A needs to be cleared given the first instruction in a filter, and use this in the JITs. Except for ARM, the rest have only been compile-tested. Fixes: 3480593131e0 ("net: filter: get rid of BPF_S_* enum") Signed-off-by: Rabin Vincent Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/arm/net/bpf_jit_32.c | 16 +--------------- arch/mips/net/bpf_jit.c | 16 +--------------- arch/powerpc/net/bpf_jit_comp.c | 13 ++----------- arch/sparc/net/bpf_jit_comp.c | 17 ++--------------- include/linux/filter.h | 19 +++++++++++++++++++ 5 files changed, 25 insertions(+), 56 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e0e23582c8b4e..5fe949b084acf 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -162,19 +162,6 @@ static inline int mem_words_used(struct jit_ctx *ctx) return fls(ctx->seen & SEEN_MEM); } -static inline bool is_load_to_a(u16 inst) -{ - switch (inst) { - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - return true; - default: - return false; - } -} - static void jit_fill_hole(void *area, unsigned int size) { u32 *ptr; @@ -186,7 +173,6 @@ static void jit_fill_hole(void *area, unsigned int size) static void build_prologue(struct jit_ctx *ctx) { u16 reg_set = saved_regs(ctx); - u16 first_inst = ctx->skf->insns[0].code; u16 off; #ifdef CONFIG_FRAME_POINTER @@ -216,7 +202,7 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_I(r_X, 0), ctx); /* do not leak kernel data to userspace */ - if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) + if (bpf_needs_clear_a(&ctx->skf->insns[0])) emit(ARM_MOV_I(r_A, 0), ctx); /* stack space for the BPF_MEM words */ diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index e23fdf2a9c80d..d6d27d51d1310 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -556,19 +556,6 @@ static inline u16 align_sp(unsigned int num) return num; } -static bool is_load_to_a(u16 inst) -{ - switch (inst) { - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - return true; - default: - return false; - } -} - static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) { int i = 0, real_off = 0; @@ -686,7 +673,6 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx) static void build_prologue(struct jit_ctx *ctx) { - u16 first_inst = ctx->skf->insns[0].code; int sp_off; /* Calculate the total offset for the stack pointer */ @@ -700,7 +686,7 @@ static void build_prologue(struct jit_ctx *ctx) emit_jit_reg_move(r_X, r_zero, ctx); /* Do not leak kernel data to userspace */ - if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) + if (bpf_needs_clear_a(&ctx->skf->insns[0])) emit_jit_reg_move(r_A, r_zero, ctx); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 17cea18a09d32..264c473c1b3c0 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, PPC_LI(r_X, 0); } - switch (filter[0].code) { - case BPF_RET | BPF_K: - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - /* first instruction sets A register (or is RET 'constant') */ - break; - default: - /* make sure we dont leak kernel information to user */ + /* make sure we dont leak kernel information to user */ + if (bpf_needs_clear_a(&filter[0])) PPC_LI(r_A, 0); - } } static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 7931eeeb649af..8109e92cd619e 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp) } emit_reg_move(O7, r_saved_O7); - switch (filter[0].code) { - case BPF_RET | BPF_K: - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - /* The first instruction sets the A register (or is - * a "RET 'constant'") - */ - break; - default: - /* Make sure we dont leak kernel information to the - * user. - */ + /* Make sure we dont leak kernel information to the user. */ + if (bpf_needs_clear_a(&filter[0])) emit_clear(r_A); /* A = 0 */ - } for (i = 0; i < flen; i++) { unsigned int K = filter[i].k; diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be5..1ce6e1049a3bb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -428,6 +428,25 @@ static inline void bpf_jit_free(struct bpf_prog *fp) #define BPF_ANC BIT(15) +static inline bool bpf_needs_clear_a(const struct sock_filter *first) +{ + switch (first->code) { + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + return false; + + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X) + return true; + return false; + + default: + return true; + } +} + static inline u16 bpf_anc_helper(const struct sock_filter *ftest) { BUG_ON(ftest->code & BPF_ANC); From 02a1fef61d06bb0417b60f1233320e141266860c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 5 Jan 2016 09:11:36 -0800 Subject: [PATCH 1586/2091] net: sched: fix missing free per cpu on qstats [ Upstream commit 73c20a8b7245273125cfe92c4b46e6fdb568a801 ] When a qdisc is using per cpu stats (currently just the ingress qdisc) only the bstats are being freed. This also free's the qstats. Fixes: b0ab6f92752b9f9d8 ("net: sched: enable per cpu qstats") Signed-off-by: John Fastabend Acked-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_generic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b453270be3fd4..3c6f6b774ba6c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -666,8 +666,10 @@ static void qdisc_rcu_free(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); - if (qdisc_is_percpu_stats(qdisc)) + if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); + free_percpu(qdisc->cpu_qstats); + } kfree((char *) qdisc - qdisc->padded); } From 8f5cd6eea8115c6a4153268508369b81f84e9dce Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Wed, 6 Jan 2016 00:18:48 -0800 Subject: [PATCH 1587/2091] net: possible use after free in dst_release [ Upstream commit 07a5d38453599052aff0877b16bb9c1585f08609 ] dst_release should not access dst->flags after decrementing __refcnt to 0. The dst_entry may be in dst_busy_list and dst_gc_task may dst_destroy it before dst_release gets a chance to access dst->flags. Fixes: d69bbf88c8d0 ("net: fix a race in dst_release()") Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") Signed-off-by: Francesco Ruggeri Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dst.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index f8db4032d45a4..540066cb33efc 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -282,10 +282,11 @@ void dst_release(struct dst_entry *dst) { if (dst) { int newrefcnt; + unsigned short nocache = dst->flags & DST_NOCACHE; newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) + if (!newrefcnt && unlikely(nocache)) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } From a33704eb60850ac837ef83ecc380f5f6b2aa2906 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 Jan 2016 11:26:53 +0100 Subject: [PATCH 1588/2091] vxlan: fix test which detect duplicate vxlan iface [ Upstream commit 07b9b37c227cb8d88d478b4a9c5634fee514ede1 ] When a vxlan interface is created, the driver checks that there is not another vxlan interface with the same properties. To do this, it checks the existing vxlan udp socket. Since commit 1c51a9159dde, the creation of the vxlan socket is done only when the interface is set up, thus it breaks that test. Example: $ ip l a vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 $ ip l a vxlan11 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 $ ip -br l | grep vxlan vxlan10 DOWN f2:55:1c:6a:fb:00 vxlan11 DOWN 7a:cb:b9:38:59:0d Instead of checking sockets, let's loop over the vxlan iface list. Fixes: 1c51a9159dde ("vxlan: fix race caused by dropping rtnl_unlock") Reported-by: Thomas Faivre Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0085b8df83e20..940f78e419932 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2581,7 +2581,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); - struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; struct vxlan_rdst *dst = &vxlan->default_dst; __u32 vni; int err; @@ -2714,9 +2714,13 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL; - if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, - vxlan->dst_port, vxlan->flags)) { - pr_info("duplicate VNI %u\n", vni); + list_for_each_entry(tmp, &vn->vxlan_list, next) { + if (tmp->default_dst.remote_vni == vni && + (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || + tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 && + tmp->dst_port == vxlan->dst_port && + (tmp->flags & VXLAN_F_RCV_FLAGS) == + (vxlan->flags & VXLAN_F_RCV_FLAGS)) return -EEXIST; } From bcea43fb3164cdcdc42c710ca0cb90aa9c6a5530 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 7 Jan 2016 14:52:43 -0500 Subject: [PATCH 1589/2091] net: sctp: prevent writes to cookie_hmac_alg from accessing invalid memory [ Upstream commit 320f1a4a175e7cd5d3f006f92b4d4d3e2cbb7bb5 ] proc_dostring() needs an initialized destination string, while the one provided in proc_sctp_do_hmac_alg() contains stack garbage. Thus, writing to cookie_hmac_alg would strlen() that garbage and end up accessing invalid memory. Fixes: 3c68198e7 ("sctp: Make hmac algorithm selection for cookie generation dynamic") Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f542..3e0fc51272259 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, struct ctl_table tbl; bool changed = false; char *none = "none"; - char tmp[8]; + char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); From add92082e2d14367b27b0e18b0deeaedd7c1f938 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jan 2016 09:35:51 -0800 Subject: [PATCH 1590/2091] ipv6: tcp: add rcu locking in tcp_v6_send_synack() [ Upstream commit 3e4006f0b86a5ae5eb0e8215f9a9e1db24506977 ] When first SYNACK is sent, we already hold rcu_read_lock(), but this is not true if a SYNACK is retransmitted, as a timer (soft) interrupt does not hold rcu_read_lock() Fixes: 45f6fad84cc30 ("ipv6: add complete rcu protection around np->opt") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/tcp_ipv6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c1938ad39f8cb..c1147acbc8c49 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -465,8 +465,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); + rcu_read_lock(); err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } From e7bbeacbafc18a7781036c6111ffd9fe9eaa6ec0 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 11 Jan 2016 13:42:43 -0500 Subject: [PATCH 1591/2091] tcp_yeah: don't set ssthresh below 2 [ Upstream commit 83d15e70c4d8909d722c0d64747d8fb42e38a48f ] For tcp_yeah, use an ssthresh floor of 2, the same floor used by Reno and CUBIC, per RFC 5681 (equation 4). tcp_yeah_ssthresh() was sometimes returning a 0 or negative ssthresh value if the intended reduction is as big or bigger than the current cwnd. Congestion control modules should never return a zero or negative ssthresh. A zero ssthresh generally results in a zero cwnd, causing the connection to stall. A negative ssthresh value will be interpreted as a u32 and will set a target cwnd for PRR near 4 billion. Oleksandr Natalenko reported that a system using tcp_yeah with ECN could see a warning about a prior_cwnd of 0 in tcp_cwnd_reduction(). Testing verified that this was due to tcp_yeah_ssthresh() misbehaving in this way. Reported-by: Oleksandr Natalenko Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_yeah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 17d35662930d0..3e6a472e6b883 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - return tp->snd_cwnd - reduction; + return max_t(int, tp->snd_cwnd - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { From 4bb526bce19d6e4c8cc862ce304b8c072ef43366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 11 Jan 2016 07:50:30 +0100 Subject: [PATCH 1592/2091] udp: disallow UFO for sockets with SO_NO_CHECK option [ Upstream commit 40ba330227ad00b8c0cdf2f425736ff9549cc423 ] Commit acf8dd0a9d0b ("udp: only allow UFO for packets from SOCK_DGRAM sockets") disallows UFO for packets sent from raw sockets. We need to do the same also for SOCK_DGRAM sockets with SO_NO_CHECK options, even if for a bit different reason: while such socket would override the CHECKSUM_PARTIAL set by ip_ufo_append_data(), gso_size is still set and bad offloading flags warning is triggered in __skb_gso_segment(). In the IPv6 case, SO_NO_CHECK option is ignored but we need to disallow UFO for packets sent by sockets with UDP_NO_CHECK6_TX option. Signed-off-by: Michal Kubecek Tested-by: Shannon Nelson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65b93a7b7113..fe16f418e775e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -893,7 +893,7 @@ static int __ip_append_data(struct sock *sk, if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc09cb97b8401..f50228b0abe58 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1329,7 +1329,7 @@ static int __ip6_append_data(struct sock *sk, (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && - (sk->sk_type == SOCK_DGRAM)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); From abefd1b4087b9b5e83e7b4e7689f8b8e3cb2899c Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 8 Jan 2016 15:21:46 +0300 Subject: [PATCH 1593/2091] net: preserve IP control block during GSO segmentation [ Upstream commit 9207f9d45b0ad071baa128e846d7e7ed85016df3 ] Skb_gso_segment() uses skb control block during segmentation. This patch adds 32-bytes room for previous control block which will be copied into all resulting segments. This patch fixes kernel crash during fragmenting forwarded packets. Fragmentation requires valid IP CB in skb for clearing ip options. Also patch removes custom save/restore in ovs code, now it's redundant. Signed-off-by: Konstantin Khlebnikov Link: http://lkml.kernel.org/r/CALYGNiP-0MZ-FExV2HutTvE9U-QQtkKSoE--KN=JQE5STYsjAA@mail.gmail.com Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 3 ++- net/core/dev.c | 5 +++++ net/ipv4/ip_output.c | 1 + net/openvswitch/datapath.c | 5 +---- net/xfrm/xfrm_output.c | 2 ++ 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4307e20a4a4af..1f17abe237251 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3320,7 +3320,8 @@ struct skb_gso_cb { int encap_level; __u16 csum_start; }; -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) +#define SKB_SGO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { diff --git a/net/core/dev.c b/net/core/dev.c index a42b232805a5c..185a3398c651d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2479,6 +2479,8 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) * * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. + * + * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. */ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) @@ -2493,6 +2495,9 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); SKB_GSO_CB(skb)->encap_level = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fe16f418e775e..51573f8a39bca 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -235,6 +235,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) * from host network stack. */ features = netif_skb_features(skb); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 27e14962b5046..b3fe02a2339e4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -337,12 +337,10 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, unsigned short gso_type = skb_shinfo(skb)->gso_type; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; - struct ovs_skb_cb ovs_cb; int err; - ovs_cb = *OVS_CB(skb); + BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET); segs = __skb_gso_segment(skb, NETIF_F_SG, false); - *OVS_CB(skb) = ovs_cb; if (IS_ERR(segs)) return PTR_ERR(segs); if (segs == NULL) @@ -360,7 +358,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, /* Queue all of the segments. */ skb = segs; do { - *OVS_CB(skb) = ovs_cb; if (gso_type & SKB_GSO_UDP && skb != segs) key = &later_key; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fbcedbe331903..5097dce5b9166 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -153,6 +153,8 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(segs)) From 7d9fb947be67ec2e0811a2ab6dd0582ad1bae4db Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Mon, 11 Jan 2016 08:28:43 -0500 Subject: [PATCH 1594/2091] bonding: Prevent IPv6 link local address on enslaved devices [ Upstream commit 03d84a5f83a67e692af00a3d3901e7820e3e84d5 ] Commit 1f718f0f4f97 ("bonding: populate neighbour's private on enslave") undoes the fix provided by commit c2edacf80e15 ("bonding / ipv6: no addrconf for slaves separately from master") by effectively setting the slave flag after the slave has been opened. If the slave comes up quickly enough, it will go through the IPv6 addrconf before the slave flag has been set and will get a link local IPv6 address. In order to ensure that addrconf knows to ignore the slave devices on state change, set IFF_SLAVE before dev_open() during bonding enslavement. Fixes: 1f718f0f4f97 ("bonding: populate neighbour's private on enslave") Signed-off-by: Karl Heiss Signed-off-by: Jay Vosburgh Reviewed-by: Jarod Wilson Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 16d87bf8ac3cd..72ba774df7a77 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1194,7 +1194,6 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); if (err) return err; - slave_dev->flags |= IFF_SLAVE; rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); return 0; } @@ -1452,6 +1451,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } + /* set slave flag before open to prevent IPv6 addrconf */ + slave_dev->flags |= IFF_SLAVE; + /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { @@ -1712,6 +1714,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) dev_close(slave_dev); err_restore_mac: + slave_dev->flags &= ~IFF_SLAVE; if (!bond->params.fail_over_mac || BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* XXX TODO - fom follow mode needs to change master's From 2a1e5e4ab662a159e7e5dfd229597c4752d29b3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jan 2016 08:58:00 -0800 Subject: [PATCH 1595/2091] phonet: properly unshare skbs in phonet_rcv() [ Upstream commit 7aaed57c5c2890634cfadf725173c7c68ea4cb4f ] Ivaylo Dimitrov reported a regression caused by commit 7866a621043f ("dev: add per net_device packet type chains"). skb->dev becomes NULL and we crash in __netif_receive_skb_core(). Before above commit, different kind of bugs or corruptions could happen without major crash. But the root cause is that phonet_rcv() can queue skb without checking if skb is shared or not. Many thanks to Ivaylo Dimitrov for his help, diagnosis and tests. Reported-by: Ivaylo Dimitrov Tested-by: Ivaylo Dimitrov Signed-off-by: Eric Dumazet Cc: Remi Denis-Courmont Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/phonet/af_phonet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 32ab87d348286..11d0b29ce4b87 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; From dc1cfcc266978d0213d7d4f9f813bafd4e9f709a Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 12 Jan 2016 20:17:08 +0100 Subject: [PATCH 1596/2091] net: bpf: reject invalid shifts [ Upstream commit 229394e8e62a4191d592842cf67e80c62a492937 ] On ARM64, a BUG() is triggered in the eBPF JIT if a filter with a constant shift that can't be encoded in the immediate field of the UBFM/SBFM instructions is passed to the JIT. Since these shifts amounts, which are negative or >= regsize, are invalid, reject them in the eBPF verifier and the classic BPF filter checker, for all architectures. Signed-off-by: Rabin Vincent Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 10 ++++++++++ net/core/filter.c | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47dcd3aa6e236..141d562064a72 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1019,6 +1019,16 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) return -EINVAL; } + if ((opcode == BPF_LSH || opcode == BPF_RSH || + opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { + int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; + + if (insn->imm < 0 || insn->imm >= size) { + verbose("invalid shift %d\n", insn->imm); + return -EINVAL; + } + } + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && regs[insn->dst_reg].type == FRAME_PTR && diff --git a/net/core/filter.c b/net/core/filter.c index 0fa2613b5e350..238bb3f9c51dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -775,6 +775,11 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) if (ftest->k == 0) return -EINVAL; break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + if (ftest->k >= 32) + return -EINVAL; + break; case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: From 2980502b9f6d3e15a4dd6695d4cfff8ec3213209 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Jan 2016 04:56:56 -0800 Subject: [PATCH 1597/2091] ipv6: update skb->csum when CE mark is propagated [ Upstream commit 34ae6a1aa0540f0f781dd265366036355fdc8930 ] When a tunnel decapsulates the outer header, it has to comply with RFC 6080 and eventually propagate CE mark into inner header. It turns out IP6_ECN_set_ce() does not correctly update skb->csum for CHECKSUM_COMPLETE packets, triggering infamous "hw csum failure" messages and stack traces. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_ecn.h | 19 ++++++++++++++++--- net/ipv6/xfrm6_mode_tunnel.c | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 84b20835b736c..0dc0a51da38fa 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -111,11 +111,24 @@ static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) struct ipv6hdr; -static inline int IP6_ECN_set_ce(struct ipv6hdr *iph) +/* Note: + * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, + * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE + * In IPv6 case, no checksum compensates the change in IPv6 header, + * so we have to update skb->csum. + */ +static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) { + __be32 from, to; + if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) return 0; - *(__be32*)iph |= htonl(INET_ECN_CE << 20); + + from = *(__be32 *)iph; + to = from | htonl(INET_ECN_CE << 20); + *(__be32 *)iph = to; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(csum_sub(skb->csum, from), to); return 1; } @@ -142,7 +155,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) case cpu_to_be16(ETH_P_IPV6): if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= skb_tail_pointer(skb)) - return IP6_ECN_set_ce(ipv6_hdr(skb)); + return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); break; } diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 901ef6f8addc0..5266ad2d64196 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -24,7 +24,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) struct ipv6hdr *inner_iph = ipipv6_hdr(skb); if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) - IP6_ECN_set_ce(inner_iph); + IP6_ECN_set_ce(skb, inner_iph); } /* Add encapsulation header. From 332fb8799ed1c701339e9b4dad3efbed245b4cf8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 15 Jan 2016 19:03:54 +0100 Subject: [PATCH 1598/2091] bridge: fix lockdep addr_list_lock false positive splat [ Upstream commit c6894dec8ea9ae05747124dce98b3b5c2e69b168 ] After promisc mode management was introduced a bridge device could do dev_set_promiscuity from its ndo_change_rx_flags() callback which in turn can be called after the bridge's addr_list_lock has been taken (e.g. by dev_uc_add). This causes a false positive lockdep splat because the port interfaces' addr_list_lock is taken when br_manage_promisc() runs after the bridge's addr list lock was already taken. To remove the false positive introduce a custom bridge addr_list_lock class and set it on bridge init. A simple way to reproduce this is with the following: $ brctl addbr br0 $ ip l add l br0 br0.100 type vlan id 100 $ ip l set br0 up $ ip l set br0.100 up $ echo 1 > /sys/class/net/br0/bridge/vlan_filtering $ brctl addif br0 eth0 Splat: [ 43.684325] ============================================= [ 43.684485] [ INFO: possible recursive locking detected ] [ 43.684636] 4.4.0-rc8+ #54 Not tainted [ 43.684755] --------------------------------------------- [ 43.684906] brctl/1187 is trying to acquire lock: [ 43.685047] (_xmit_ETHER){+.....}, at: [] dev_set_rx_mode+0x1e/0x40 [ 43.685460] but task is already holding lock: [ 43.685618] (_xmit_ETHER){+.....}, at: [] dev_uc_add+0x27/0x80 [ 43.686015] other info that might help us debug this: [ 43.686316] Possible unsafe locking scenario: [ 43.686743] CPU0 [ 43.686967] ---- [ 43.687197] lock(_xmit_ETHER); [ 43.687544] lock(_xmit_ETHER); [ 43.687886] *** DEADLOCK *** [ 43.688438] May be due to missing lock nesting notation [ 43.688882] 2 locks held by brctl/1187: [ 43.689134] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ 43.689852] #1: (_xmit_ETHER){+.....}, at: [] dev_uc_add+0x27/0x80 [ 43.690575] stack backtrace: [ 43.690970] CPU: 0 PID: 1187 Comm: brctl Not tainted 4.4.0-rc8+ #54 [ 43.691270] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 43.691770] ffffffff826a25c0 ffff8800369fb8e0 ffffffff81360ceb ffffffff826a25c0 [ 43.692425] ffff8800369fb9b8 ffffffff810d0466 ffff8800369fb968 ffffffff81537139 [ 43.693071] ffff88003a08c880 0000000000000000 00000000ffffffff 0000000002080020 [ 43.693709] Call Trace: [ 43.693931] [] dump_stack+0x4b/0x70 [ 43.694199] [] __lock_acquire+0x1e46/0x1e90 [ 43.694483] [] ? netlink_broadcast_filtered+0x139/0x3e0 [ 43.694789] [] ? nlmsg_notify+0x5a/0xc0 [ 43.695064] [] lock_acquire+0xe5/0x1f0 [ 43.695340] [] ? dev_set_rx_mode+0x1e/0x40 [ 43.695623] [] _raw_spin_lock_bh+0x45/0x80 [ 43.695901] [] ? dev_set_rx_mode+0x1e/0x40 [ 43.696180] [] dev_set_rx_mode+0x1e/0x40 [ 43.696460] [] dev_set_promiscuity+0x3c/0x50 [ 43.696750] [] br_port_set_promisc+0x25/0x50 [bridge] [ 43.697052] [] br_manage_promisc+0x8a/0xe0 [bridge] [ 43.697348] [] br_dev_change_rx_flags+0x1e/0x20 [bridge] [ 43.697655] [] __dev_set_promiscuity+0x132/0x1f0 [ 43.697943] [] __dev_set_rx_mode+0x82/0x90 [ 43.698223] [] dev_uc_add+0x5e/0x80 [ 43.698498] [] vlan_device_event+0x542/0x650 [8021q] [ 43.698798] [] notifier_call_chain+0x5d/0x80 [ 43.699083] [] raw_notifier_call_chain+0x16/0x20 [ 43.699374] [] call_netdevice_notifiers_info+0x6e/0x80 [ 43.699678] [] call_netdevice_notifiers+0x16/0x20 [ 43.699973] [] br_add_if+0x47e/0x4c0 [bridge] [ 43.700259] [] add_del_if+0x6e/0x80 [bridge] [ 43.700548] [] br_dev_ioctl+0xaf/0xc0 [bridge] [ 43.700836] [] dev_ifsioc+0x30c/0x3c0 [ 43.701106] [] dev_ioctl+0xf9/0x6f0 [ 43.701379] [] ? mntput_no_expire+0x5/0x450 [ 43.701665] [] ? mntput_no_expire+0xae/0x450 [ 43.701947] [] sock_do_ioctl+0x42/0x50 [ 43.702219] [] sock_ioctl+0x1e5/0x290 [ 43.702500] [] do_vfs_ioctl+0x2cb/0x5c0 [ 43.702771] [] SyS_ioctl+0x79/0x90 [ 43.703033] [] entry_SYSCALL_64_fastpath+0x16/0x7a CC: Vlad Yasevich CC: Stephen Hemminger CC: Bridge list CC: Andy Gospodarek CC: Roopa Prabhu Fixes: 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.") Reported-by: Andy Gospodarek Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_device.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 4ff77a16956c2..3d6c8e2223916 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -28,6 +28,8 @@ const struct nf_br_ops __rcu *nf_br_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_br_ops); +static struct lock_class_key bridge_netdev_addr_lock_key; + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -87,6 +89,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static void br_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); +} + static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -99,6 +106,7 @@ static int br_dev_init(struct net_device *dev) err = br_vlan_init(br); if (err) free_percpu(br->stats); + br_set_lockdep_class(dev); return err; } From 479b539a310152b10c1747ac8c5354ed7bc25181 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 19 Sep 2015 09:08:34 -0700 Subject: [PATCH 1599/2091] tcp/dccp: fix timewait races in timer handling [ Upstream commit ed2e923945892a8372ab70d2f61d364b0b6d9054 ] When creating a timewait socket, we need to arm the timer before allowing other cpus to find it. The signal allowing cpus to find the socket is setting tw_refcnt to non zero value. As we set tw_refcnt in __inet_twsk_hashdance(), we therefore need to call inet_twsk_schedule() first. This also means we need to remove tw_refcnt changes from inet_twsk_schedule() and let the caller handle it. Note that because we use mod_timer_pinned(), we have the guarantee the timer wont expire before we set tw_refcnt as we run in BH context. To make things more readable I introduced inet_twsk_reschedule() helper. When rearming the timer, we can use mod_timer_pending() to make sure we do not rearm a canceled timer. Note: This bug can possibly trigger if packets of a flow can hit multiple cpus. This does not normally happen, unless flow steering is broken somehow. This explains this bug was spotted ~5 months after its introduction. A similar fix is needed for SYN_RECV sockets in reqsk_queue_hash_req(), but will be provided in a separate patch for proper tracking. Fixes: 789f558cfb36 ("tcp/dccp: get rid of central timewait timer") Signed-off-by: Eric Dumazet Reported-by: Ying Cai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_timewait_sock.h | 14 +++++++++++++- net/dccp/minisocks.c | 4 ++-- net/ipv4/inet_timewait_sock.c | 16 ++++++++++------ net/ipv4/tcp_minisocks.c | 13 ++++++------- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 360c4802288db..95a5a77f5c142 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -112,7 +112,19 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, + bool rearm); + +static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + +static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, true); +} + void inet_twsk_deschedule(struct inet_timewait_sock *tw); void inet_twsk_purge(struct inet_hashinfo *hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 30addee2dd037..838f524cf11a1 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6only = sk->sk_ipv6only; } #endif - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) timeo = DCCP_TIMEWAIT_LEN; inet_twsk_schedule(tw, timeo); + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 00ec8d5d7e7ee..bb96c1c4edd68 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -153,13 +153,15 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, /* * Step 2: Hash TW into tcp ehash chain. * Notes : - * - tw_refcnt is set to 3 because : + * - tw_refcnt is set to 4 because : * - We have one reference from bhash chain. * - We have one reference from ehash chain. + * - We have one reference from timer. + * - One reference for ourself (our caller will release it). * We can use atomic_set() because prior spin_lock()/spin_unlock() * committed into memory all tw fields. */ - atomic_set(&tw->tw_refcnt, 1 + 1 + 1); + atomic_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -243,7 +245,7 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw) } EXPORT_SYMBOL(inet_twsk_deschedule); -void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) +void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) { /* timeout := RTO * 3.5 * @@ -271,12 +273,14 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) */ tw->tw_kill = timeo <= 4*HZ; - if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { - atomic_inc(&tw->tw_refcnt); + if (!rearm) { + BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo)); atomic_inc(&tw->tw_dr->tw_count); + } else { + mod_timer_pending(&tw->tw_timer, jiffies + timeo); } } -EXPORT_SYMBOL_GPL(inet_twsk_schedule); +EXPORT_SYMBOL_GPL(__inet_twsk_schedule); void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 17e7339ee5cad..fec2907b85e81 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -163,9 +163,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, tw->tw_timeout); + inet_twsk_reschedule(tw, tw->tw_timeout); else - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -203,7 +203,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -253,7 +253,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); @@ -324,9 +324,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } while (0); #endif - /* Linkage updates. */ - __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); - /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; @@ -340,6 +337,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } inet_twsk_schedule(tw, timeo); + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this From 9ba3d77689a5d07aa5020dbf38d10ce8f641aa16 Mon Sep 17 00:00:00 2001 From: Raanan Avargil Date: Thu, 1 Oct 2015 04:48:53 -0700 Subject: [PATCH 1600/2091] tcp/dccp: fix old style declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8695a144da9e500a5a60fa34c06694346ec1048f ] I’m using the compilation flag -Werror=old-style-declaration, which requires that the “inline” word would come at the beginning of the code line. $ make drivers/net/ethernet/intel/e1000e/e1000e.ko ... include/net/inet_timewait_sock.h:116:1: error: ‘inline’ is not at beginning of declaration [-Werror=old-style-declaration] static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) include/net/inet_timewait_sock.h:121:1: error: ‘inline’ is not at beginning of declaration [-Werror=old-style-declaration] static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) Fixes: ed2e92394589 ("tcp/dccp: fix timewait races in timer handling") Signed-off-by: Raanan Avargil Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_timewait_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 95a5a77f5c142..7682cb2ae2371 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -115,12 +115,12 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, false); } -static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, true); } From 5984398539a2c47834caf1b00dc9f58b7bb2e67a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 1 Nov 2015 16:21:24 +0000 Subject: [PATCH 1601/2091] isdn_ppp: Add checks for allocation failure in isdn_ppp_open() [ Upstream commit 0baa57d8dc32db78369d8b5176ef56c5e2e18ab3 ] Compile-tested only. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/i4l/isdn_ppp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index c4198fa490bfa..86f9abebcb72a 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -301,6 +301,8 @@ isdn_ppp_open(int min, struct file *file) is->compflags = 0; is->reset = isdn_ppp_ccp_reset_alloc(is); + if (!is->reset) + return -ENOMEM; is->lp = NULL; is->mp_seqno = 0; /* MP sequence number */ @@ -320,6 +322,10 @@ isdn_ppp_open(int min, struct file *file) * VJ header compression init */ is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ + if (!is->slcomp) { + isdn_ppp_ccp_reset_free(is); + return -ENOMEM; + } #endif #ifdef CONFIG_IPPP_FILTER is->pass_filter = NULL; From a50a93cc99286dc444c7e5ccc7dfb9d58c2d346d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 1 Nov 2015 16:22:53 +0000 Subject: [PATCH 1602/2091] ppp, slip: Validate VJ compression slot parameters completely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4ab42d78e37a294ac7bc56901d563c642e03c4ae ] Currently slhc_init() treats out-of-range values of rslots and tslots as equivalent to 0, except that if tslots is too large it will dereference a null pointer (CVE-2015-7799). Add a range-check at the top of the function and make it return an ERR_PTR() on error instead of NULL. Change the callers accordingly. Compile-tested only. Reported-by: 郭永刚 References: http://article.gmane.org/gmane.comp.security.oss.general/17908 Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/i4l/isdn_ppp.c | 10 ++++------ drivers/net/ppp/ppp_generic.c | 6 ++---- drivers/net/slip/slhc.c | 12 ++++++++---- drivers/net/slip/slip.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 86f9abebcb72a..9c1e8adaf4fc8 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -322,9 +322,9 @@ isdn_ppp_open(int min, struct file *file) * VJ header compression init */ is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ - if (!is->slcomp) { + if (IS_ERR(is->slcomp)) { isdn_ppp_ccp_reset_free(is); - return -ENOMEM; + return PTR_ERR(is->slcomp); } #endif #ifdef CONFIG_IPPP_FILTER @@ -573,10 +573,8 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) is->maxcid = val; #ifdef CONFIG_ISDN_PPP_VJ sltmp = slhc_init(16, val); - if (!sltmp) { - printk(KERN_ERR "ippp, can't realloc slhc struct\n"); - return -ENOMEM; - } + if (IS_ERR(sltmp)) + return PTR_ERR(sltmp); if (is->slcomp) slhc_free(is->slcomp); is->slcomp = sltmp; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9d15566521a71..cfe49a07c7c12 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -715,10 +715,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) val &= 0xffff; } vj = slhc_init(val2+1, val+1); - if (!vj) { - netdev_err(ppp->dev, - "PPP: no memory (VJ compressor)\n"); - err = -ENOMEM; + if (IS_ERR(vj)) { + err = PTR_ERR(vj); break; } ppp_lock(ppp); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 079f7adfcde5e..27ed25252aac5 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -84,8 +84,9 @@ static long decode(unsigned char **cpp); static unsigned char * put16(unsigned char *cp, unsigned short x); static unsigned short pull16(unsigned char **cpp); -/* Initialize compression data structure +/* Allocate compression data structure * slots must be in range 0 to 255 (zero meaning no compression) + * Returns pointer to structure or ERR_PTR() on error. */ struct slcompress * slhc_init(int rslots, int tslots) @@ -94,11 +95,14 @@ slhc_init(int rslots, int tslots) register struct cstate *ts; struct slcompress *comp; + if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) + return ERR_PTR(-EINVAL); + comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); if (! comp) goto out_fail; - if ( rslots > 0 && rslots < 256 ) { + if (rslots > 0) { size_t rsize = rslots * sizeof(struct cstate); comp->rstate = kzalloc(rsize, GFP_KERNEL); if (! comp->rstate) @@ -106,7 +110,7 @@ slhc_init(int rslots, int tslots) comp->rslot_limit = rslots - 1; } - if ( tslots > 0 && tslots < 256 ) { + if (tslots > 0) { size_t tsize = tslots * sizeof(struct cstate); comp->tstate = kzalloc(tsize, GFP_KERNEL); if (! comp->tstate) @@ -141,7 +145,7 @@ slhc_init(int rslots, int tslots) out_free: kfree(comp); out_fail: - return NULL; + return ERR_PTR(-ENOMEM); } diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 05387b1e2e95e..a17d86a577347 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -164,7 +164,7 @@ static int sl_alloc_bufs(struct slip *sl, int mtu) if (cbuff == NULL) goto err_exit; slcomp = slhc_init(16, 16); - if (slcomp == NULL) + if (IS_ERR(slcomp)) goto err_exit; #endif spin_lock_bh(&sl->lock); From 016cb1d02db9840a62c0e2a465ffd6617eacffc7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 14 Jan 2016 15:28:19 +0100 Subject: [PATCH 1603/2091] batman-adv: Avoid recursive call_rcu for batadv_bla_claim [ Upstream commit 63b399272294e7a939cde41792dca38c549f0484 ] The batadv_claim_free_ref function uses call_rcu to delay the free of the batadv_bla_claim object until no (already started) rcu_read_lock is enabled anymore. This makes sure that no context is still trying to access the object which should be removed. But batadv_bla_claim also contains a reference to backbone_gw which must be removed. The reference drop of backbone_gw was done in the call_rcu function batadv_claim_free_rcu but should actually be done in the batadv_claim_release function to avoid nested call_rcus. This is important because rcu_barrier (e.g. batadv_softif_free or batadv_exit) will not detect the inner call_rcu as relevant for its execution. Otherwise this barrier will most likely be inserted in the queue before the callback of the first call_rcu was executed. The caller of rcu_barrier will therefore continue to run before the inner call_rcu callback finished. Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Signed-off-by: Sven Eckelmann Acked-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bridge_loop_avoidance.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ac4b96eccadeb..bd3357e69c5cb 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -112,21 +112,17 @@ batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) } /* finally deinitialize the claim */ -static void batadv_claim_free_rcu(struct rcu_head *rcu) +static void batadv_claim_release(struct batadv_bla_claim *claim) { - struct batadv_bla_claim *claim; - - claim = container_of(rcu, struct batadv_bla_claim, rcu); - batadv_backbone_gw_free_ref(claim->backbone_gw); - kfree(claim); + kfree_rcu(claim, rcu); } /* free a claim, call claim_free_rcu if its the last reference */ static void batadv_claim_free_ref(struct batadv_bla_claim *claim) { if (atomic_dec_and_test(&claim->refcount)) - call_rcu(&claim->rcu, batadv_claim_free_rcu); + batadv_claim_release(claim); } /** From 34c5bf7c7bf4e285274e28a36b08cbf3da5bd3e3 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 5 Jan 2016 12:06:19 +0100 Subject: [PATCH 1604/2091] batman-adv: Avoid recursive call_rcu for batadv_nc_node [ Upstream commit 44e8e7e91d6c7c7ab19688750f7257292640d1a0 ] The batadv_nc_node_free_ref function uses call_rcu to delay the free of the batadv_nc_node object until no (already started) rcu_read_lock is enabled anymore. This makes sure that no context is still trying to access the object which should be removed. But batadv_nc_node also contains a reference to orig_node which must be removed. The reference drop of orig_node was done in the call_rcu function batadv_nc_node_free_rcu but should actually be done in the batadv_nc_node_release function to avoid nested call_rcus. This is important because rcu_barrier (e.g. batadv_softif_free or batadv_exit) will not detect the inner call_rcu as relevant for its execution. Otherwise this barrier will most likely be inserted in the queue before the callback of the first call_rcu was executed. The caller of rcu_barrier will therefore continue to run before the inner call_rcu callback finished. Fixes: d56b1705e28c ("batman-adv: network coding - detect coding nodes and remove these after timeout") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/network-coding.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a449195c5b2b3..2fbd3a6bde9ae 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -175,28 +175,25 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } /** - * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove - * its refcount on the orig_node - * @rcu: rcu pointer of the nc node + * batadv_nc_node_release - release nc_node from lists and queue for free after + * rcu grace period + * @nc_node: the nc node to free */ -static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +static void batadv_nc_node_release(struct batadv_nc_node *nc_node) { - struct batadv_nc_node *nc_node; - - nc_node = container_of(rcu, struct batadv_nc_node, rcu); batadv_orig_node_free_ref(nc_node->orig_node); - kfree(nc_node); + kfree_rcu(nc_node, rcu); } /** - * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly - * frees it + * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly + * release it * @nc_node: the nc node to free */ static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) { if (atomic_dec_and_test(&nc_node->refcount)) - call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); + batadv_nc_node_release(nc_node); } /** From 9d188c6b672c7d205f43da74b534d2114371edc5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 5 Jan 2016 12:06:21 +0100 Subject: [PATCH 1605/2091] batman-adv: Drop immediate batadv_orig_ifinfo free function [ Upstream commit deed96605f5695cb945e0b3d79429581857a2b9d ] It is not allowed to free the memory of an object which is part of a list which is protected by rcu-read-side-critical sections without making sure that no other context is accessing the object anymore. This usually happens by removing the references to this object and then waiting until the rcu grace period is over and no one (allowedly) accesses it anymore. But the _now functions ignore this completely. They free the object directly even when a different context still tries to access it. This has to be avoided and thus these functions must be removed and all functions have to use batadv_orig_ifinfo_free_ref. Fixes: 7351a4822d42 ("batman-adv: split out router from orig_node") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/originator.c | 59 +++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index dfae97408628d..8327f468972a2 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -516,76 +516,79 @@ static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) } /** - * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free - * the orig_ifinfo (without rcu callback) + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release + * the orig_ifinfo * @orig_ifinfo: the orig_ifinfo object to release */ -static void -batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo) +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) { if (atomic_dec_and_test(&orig_ifinfo->refcount)) - batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu); + call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); } /** - * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free - * the orig_ifinfo - * @orig_ifinfo: the orig_ifinfo object to release + * batadv_orig_node_free_rcu - free the orig_node + * @rcu: rcu pointer of the orig_node */ -void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) +static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { - if (atomic_dec_and_test(&orig_ifinfo->refcount)) - call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); + struct batadv_orig_node *orig_node; + + orig_node = container_of(rcu, struct batadv_orig_node, rcu); + + batadv_mcast_purge_orig(orig_node); + + batadv_frag_purge_orig(orig_node, NULL); + + if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) + orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); + + kfree(orig_node->tt_buff); + kfree(orig_node); } -static void batadv_orig_node_free_rcu(struct rcu_head *rcu) +/** + * batadv_orig_node_release - release orig_node from lists and queue for + * free after rcu grace period + * @orig_node: the orig node to free + */ +static void batadv_orig_node_release(struct batadv_orig_node *orig_node) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; - struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo; - orig_node = container_of(rcu, struct batadv_orig_node, rcu); - spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); - batadv_neigh_node_free_ref_now(neigh_node); + batadv_neigh_node_free_ref(neigh_node); } hlist_for_each_entry_safe(orig_ifinfo, node_tmp, &orig_node->ifinfo_list, list) { hlist_del_rcu(&orig_ifinfo->list); - batadv_orig_ifinfo_free_ref_now(orig_ifinfo); + batadv_orig_ifinfo_free_ref(orig_ifinfo); } spin_unlock_bh(&orig_node->neigh_list_lock); - batadv_mcast_purge_orig(orig_node); - /* Free nc_nodes */ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); - batadv_frag_purge_orig(orig_node, NULL); - - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) - orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); - - kfree(orig_node->tt_buff); - kfree(orig_node); + call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } /** * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly - * schedule an rcu callback for freeing it + * release it * @orig_node: the orig node to free */ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) { if (atomic_dec_and_test(&orig_node->refcount)) - call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); + batadv_orig_node_release(orig_node); } /** From 620493a90c78ce1bd150d48ca6f624ad964e0c8d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 5 Jan 2016 12:06:22 +0100 Subject: [PATCH 1606/2091] batman-adv: Drop immediate batadv_neigh_node free function [ Upstream commit 2baa753c276f27f8e844637561ad597867aa6fb6 ] It is not allowed to free the memory of an object which is part of a list which is protected by rcu-read-side-critical sections without making sure that no other context is accessing the object anymore. This usually happens by removing the references to this object and then waiting until the rcu grace period is over and no one (allowedly) accesses it anymore. But the _now functions ignore this completely. They free the object directly even when a different context still tries to access it. This has to be avoided and thus these functions must be removed and all functions have to use batadv_neigh_node_free_ref. Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/originator.c | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8327f468972a2..c816c668a9465 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -209,21 +209,9 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) kfree(neigh_node); } -/** - * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter - * and possibly free it (without rcu callback) - * @neigh_node: neigh neighbor to free - */ -static void -batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node) -{ - if (atomic_dec_and_test(&neigh_node->refcount)) - batadv_neigh_node_free_rcu(&neigh_node->rcu); -} - /** * batadv_neigh_node_free_ref - decrement the neighbors refcounter - * and possibly free it + * and possibly release it * @neigh_node: neigh neighbor to free */ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) @@ -495,24 +483,23 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, } /** - * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object - * @rcu: rcu pointer of the orig_ifinfo object + * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for + * free after rcu grace period + * @orig_ifinfo: the orig_ifinfo object to release */ -static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) +static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) { - struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router; - orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); - if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) - batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + batadv_hardif_free_ref(orig_ifinfo->if_outgoing); /* this is the last reference to this object */ router = rcu_dereference_protected(orig_ifinfo->router, true); if (router) - batadv_neigh_node_free_ref_now(router); - kfree(orig_ifinfo); + batadv_neigh_node_free_ref(router); + + kfree_rcu(orig_ifinfo, rcu); } /** @@ -523,7 +510,7 @@ static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) { if (atomic_dec_and_test(&orig_ifinfo->refcount)) - call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); + batadv_orig_ifinfo_release(orig_ifinfo); } /** From 924224c6e44a393a2a41385d699230384ea23df5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 5 Jan 2016 12:06:24 +0100 Subject: [PATCH 1607/2091] batman-adv: Drop immediate neigh_ifinfo free function [ Upstream commit ae3e1e36e3cb6c686a7a2725af20ca86aa46d62a ] It is not allowed to free the memory of an object which is part of a list which is protected by rcu-read-side-critical sections without making sure that no other context is accessing the object anymore. This usually happens by removing the references to this object and then waiting until the rcu grace period is over and no one (allowedly) accesses it anymore. But the _now functions ignore this completely. They free the object directly even when a different context still tries to access it. This has to be avoided and thus these functions must be removed and all functions have to use batadv_neigh_ifinfo_free_ref. Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/originator.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index c816c668a9465..23db96f0ee2ac 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -150,42 +150,28 @@ int batadv_originator_init(struct batadv_priv *bat_priv) } /** - * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object - * @rcu: rcu pointer of the neigh_ifinfo object - */ -static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu) -{ - struct batadv_neigh_ifinfo *neigh_ifinfo; - - neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu); - - if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) - batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing); - - kfree(neigh_ifinfo); -} - -/** - * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free - * the neigh_ifinfo (without rcu callback) + * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for + * free after rcu grace period * @neigh_ifinfo: the neigh_ifinfo object to release */ static void -batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo) +batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo) { - if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu); + if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); + + kfree_rcu(neigh_ifinfo, rcu); } /** - * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free + * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release * the neigh_ifinfo * @neigh_ifinfo: the neigh_ifinfo object to release */ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) { if (atomic_dec_and_test(&neigh_ifinfo->refcount)) - call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); + batadv_neigh_ifinfo_release(neigh_ifinfo); } /** @@ -202,7 +188,7 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { - batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } batadv_hardif_free_ref_now(neigh_node->if_incoming); From ae3eb44e0e8b4e0fe4ab51e0a91d76517abc1e30 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 5 Jan 2016 12:06:25 +0100 Subject: [PATCH 1608/2091] batman-adv: Drop immediate batadv_hard_iface free function [ Upstream commit b4d922cfc9c08318eeb77d53b7633740e6b0efb0 ] It is not allowed to free the memory of an object which is part of a list which is protected by rcu-read-side-critical sections without making sure that no other context is accessing the object anymore. This usually happens by removing the references to this object and then waiting until the rcu grace period is over and no one (allowedly) accesses it anymore. But the _now functions ignore this completely. They free the object directly even when a different context still tries to access it. This has to be avoided and thus these functions must be removed and all functions have to use batadv_hardif_free_ref. Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/hard-interface.h | 12 ------------ net/batman-adv/originator.c | 16 +++++++--------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 1918cd50b62ed..b6bff9c1877ac 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -64,18 +64,6 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); } -/** - * batadv_hardif_free_ref_now - decrement the hard interface refcounter and - * possibly free it (without rcu callback) - * @hard_iface: the hard interface to free - */ -static inline void -batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface) -{ - if (atomic_dec_and_test(&hard_iface->refcount)) - batadv_hardif_free_rcu(&hard_iface->rcu); -} - static inline struct batadv_hard_iface * batadv_primary_if_get_selected(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 23db96f0ee2ac..36dfa3bac2b70 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -175,24 +175,22 @@ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) } /** - * batadv_neigh_node_free_rcu - free the neigh_node - * @rcu: rcu pointer of the neigh_node + * batadv_neigh_node_release - release neigh_node from lists and queue for + * free after rcu grace period + * @neigh_node: neigh neighbor to free */ -static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) +static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) { struct hlist_node *node_tmp; - struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; - neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); - hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } - batadv_hardif_free_ref_now(neigh_node->if_incoming); + batadv_hardif_free_ref(neigh_node->if_incoming); - kfree(neigh_node); + kfree_rcu(neigh_node, rcu); } /** @@ -203,7 +201,7 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) { if (atomic_dec_and_test(&neigh_node->refcount)) - call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu); + batadv_neigh_node_release(neigh_node); } /** From 95785b105fa2f784ad7b6be0238338b648ed892a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 5 Jan 2016 12:06:20 +0100 Subject: [PATCH 1609/2091] batman-adv: Drop immediate orig_node free function [ Upstream commit 42eff6a617e23b691f8e4467f4687ed7245a92db ] It is not allowed to free the memory of an object which is part of a list which is protected by rcu-read-side-critical sections without making sure that no other context is accessing the object anymore. This usually happens by removing the references to this object and then waiting until the rcu grace period is over and no one (allowedly) accesses it anymore. But the _now functions ignore this completely. They free the object directly even when a different context still tries to access it. This has to be avoided and thus these functions must be removed and all functions have to use batadv_orig_node_free_ref. Fixes: 72822225bd41 ("batman-adv: Fix rcu_barrier() miss due to double call_rcu() in TT code") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/originator.c | 11 ----------- net/batman-adv/originator.h | 1 - net/batman-adv/translation-table.c | 28 +++++++++++++--------------- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 36dfa3bac2b70..77ea1d4de2ba8 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -562,17 +562,6 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) batadv_orig_node_release(orig_node); } -/** - * batadv_orig_node_free_ref_now - decrement the orig node refcounter and - * possibly free it (without rcu callback) - * @orig_node: the orig node to free - */ -void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node) -{ - if (atomic_dec_and_test(&orig_node->refcount)) - batadv_orig_node_free_rcu(&orig_node->rcu); -} - void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index aa4a436962956..28b751ad549cc 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -25,7 +25,6 @@ int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); -void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4f2a9d2c56dbb..ddd62c9af5b4c 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -219,20 +219,6 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, return count; } -static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_orig_list_entry *orig_entry; - - orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); - - /* We are in an rcu callback here, therefore we cannot use - * batadv_orig_node_free_ref() and its call_rcu(): - * An rcu_barrier() wouldn't wait for that to finish - */ - batadv_orig_node_free_ref_now(orig_entry->orig_node); - kfree(orig_entry); -} - /** * batadv_tt_local_size_mod - change the size by v of the local table identified * by vid @@ -328,13 +314,25 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, batadv_tt_global_size_mod(orig_node, vid, -1); } +/** + * batadv_tt_orig_list_entry_release - release tt orig entry from lists and + * queue for free after rcu grace period + * @orig_entry: tt orig entry to be free'd + */ +static void +batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry) +{ + batadv_orig_node_free_ref(orig_entry->orig_node); + kfree_rcu(orig_entry, rcu); +} + static void batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) { if (!atomic_dec_and_test(&orig_entry->refcount)) return; - call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); + batadv_tt_orig_list_entry_release(orig_entry); } /** From 490c963c1eb9a7f5f74446f4b9738c9fb1b8e325 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 18 Jan 2016 17:30:22 +0200 Subject: [PATCH 1610/2091] team: Replace rcu_read_lock with a mutex in team_vlan_rx_kill_vid [ Upstream commit 60a6531bfe49555581ccd65f66a350cc5693fcde ] We can't be within an RCU read-side critical section when deleting VLANs, as underlying drivers might sleep during the hardware operation. Therefore, replace the RCU critical section with a mutex. This is consistent with team_vlan_rx_add_vid. Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6928448f6b7f1..2b45d0168c3c1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1845,10 +1845,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) struct team *team = netdev_priv(dev); struct team_port *port; - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - rcu_read_unlock(); + mutex_unlock(&team->lock); return 0; } From 534e9016cd88ccd577b226b7172e5cd079f5fb02 Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:07 -0400 Subject: [PATCH 1611/2091] sctp: Prevent soft lockup when sctp_accept() is called during a timeout event [ Upstream commit 635682a14427d241bab7bbdeebb48a7d7b91638e ] A case can occur when sctp_accept() is called by the user during a heartbeat timeout event after the 4-way handshake. Since sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the bh_sock_lock in sctp_generate_heartbeat_event() will be taken with the listening socket but released with the new association socket. The result is a deadlock on any future attempts to take the listening socket lock. Note that this race can occur with other SCTP timeouts that take the bh_lock_sock() in the event sctp_accept() is called. BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0] ... RIP: 0010:[] [] _spin_lock+0x1e/0x30 RSP: 0018:ffff880028323b20 EFLAGS: 00000206 RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48 RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000 R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0 R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225 FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0) Stack: ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00 ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8 Call Trace: [] ? sctp_rcv+0x492/0xa10 [sctp] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? ip_local_deliver_finish+0xdd/0x2d0 [] ? ip_local_deliver+0x98/0xa0 [] ? ip_rcv_finish+0x12d/0x440 [] ? ip_rcv+0x275/0x350 [] ? __netif_receive_skb+0x4ab/0x750 ... With lockdep debugging: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- CslRx/12087 is trying to release lock (slock-AF_INET) at: [] sctp_generate_timeout_event+0x40/0xe0 [sctp] but there are no more locks to release! other info that might help us debug this: 2 locks held by CslRx/12087: #0: (&asoc->timers[i]){+.-...}, at: [] run_timer_softirq+0x16f/0x3e0 #1: (slock-AF_INET){+.-...}, at: [] sctp_generate_timeout_event+0x23/0xe0 [sctp] Ensure the socket taken is also the same one that is released by saving a copy of the socket before entering the timeout event critical section. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_sideeffect.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fef2acdf4a2e6..ecae5561b9127 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -285,11 +286,12 @@ void sctp_generate_t3_rtx_event(unsigned long peer) static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } From 21edf40b5ccbdf48720758fe525f3cbc19c8a202 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 10 Sep 2015 11:18:57 +0100 Subject: [PATCH 1612/2091] xen-netback: respect user provided max_queues [ Upstream commit 4c82ac3c37363e8c4ded6a5fe1ec5fa756b34df3 ] Originally that parameter was always reset to num_online_cpus during module initialisation, which renders it useless. The fix is to only set max_queues to num_online_cpus when user has not provided a value. Reported-by: Johnny Strom Signed-off-by: Wei Liu Reviewed-by: David Vrabel Acked-by: Ian Campbell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 0866c5dfdf87b..5e5b6184e7205 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2007,8 +2007,11 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; - /* Allow as many queues as there are CPUs, by default */ - xenvif_max_queues = num_online_cpus(); + /* Allow as many queues as there are CPUs if user has not + * specified a value. + */ + if (xenvif_max_queues == 0) + xenvif_max_queues = num_online_cpus(); if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", From a1edfa789d1a7f111dffb142bacbc6f2aa9d2e29 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 10 Sep 2015 11:18:58 +0100 Subject: [PATCH 1613/2091] xen-netfront: respect user provided max_queues [ Upstream commit 32a844056fd43dda647e1c3c6b9983bdfa04d17d ] Originally that parameter was always reset to num_online_cpus during module initialisation, which renders it useless. The fix is to only set max_queues to num_online_cpus when user has not provided a value. Signed-off-by: Wei Liu Cc: David Vrabel Reviewed-by: David Vrabel Tested-by: David Vrabel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 52f081f4dfd58..2a01b685b3a06 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2140,8 +2140,11 @@ static int __init netif_init(void) pr_info("Initialising Xen virtual ethernet driver\n"); - /* Allow as many queues as there are CPUs, by default */ - xennet_max_queues = num_online_cpus(); + /* Allow as many queues as there are CPUs if user has not + * specified a value. + */ + if (xennet_max_queues == 0) + xennet_max_queues = num_online_cpus(); return xenbus_register_frontend(&netfront_driver); } From 139bd872dc6868296a992a2b8142b4daba964d3f Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Mon, 19 Oct 2015 13:37:17 +0800 Subject: [PATCH 1614/2091] xen-netfront: update num_queues to real created [ Upstream commit ca88ea1247dfee094e2467a3578eaec9bdf0833a ] Sometimes xennet_create_queues() may failed to created all requested queues, we need to update num_queues to real created to avoid NULL pointer dereference. Signed-off-by: Joe Jin Cc: Boris Ostrovsky Cc: Konrad Rzeszutek Wilk Cc: Wei Liu Cc: Ian Campbell Cc: David S. Miller Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 2a01b685b3a06..fd51626e859e4 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1710,19 +1710,19 @@ static void xennet_destroy_queues(struct netfront_info *info) } static int xennet_create_queues(struct netfront_info *info, - unsigned int num_queues) + unsigned int *num_queues) { unsigned int i; int ret; - info->queues = kcalloc(num_queues, sizeof(struct netfront_queue), + info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue), GFP_KERNEL); if (!info->queues) return -ENOMEM; rtnl_lock(); - for (i = 0; i < num_queues; i++) { + for (i = 0; i < *num_queues; i++) { struct netfront_queue *queue = &info->queues[i]; queue->id = i; @@ -1732,7 +1732,7 @@ static int xennet_create_queues(struct netfront_info *info, if (ret < 0) { dev_warn(&info->netdev->dev, "only created %d queues\n", i); - num_queues = i; + *num_queues = i; break; } @@ -1742,11 +1742,11 @@ static int xennet_create_queues(struct netfront_info *info, napi_enable(&queue->napi); } - netif_set_real_num_tx_queues(info->netdev, num_queues); + netif_set_real_num_tx_queues(info->netdev, *num_queues); rtnl_unlock(); - if (num_queues == 0) { + if (*num_queues == 0) { dev_err(&info->netdev->dev, "no queues\n"); return -EINVAL; } @@ -1792,7 +1792,7 @@ static int talk_to_netback(struct xenbus_device *dev, if (info->queues) xennet_destroy_queues(info); - err = xennet_create_queues(info, num_queues); + err = xennet_create_queues(info, &num_queues); if (err < 0) goto destroy_ring; From eeca98948d8c4922e6deb16bfc9ee0bd9902dbb0 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 29 Oct 2015 09:51:16 -0400 Subject: [PATCH 1615/2091] xfrm: dst_entries_init() per-net dst_ops [ Upstream commit a8a572a6b5f2a79280d6e302cb3c1cb1fbaeb3e8 ] Remove the dst_entries_init/destroy calls for xfrm4 and xfrm6 dst_ops templates; their dst_entries counters will never be used. Move the xfrm dst_ops initialization from the common xfrm/xfrm_policy.c to xfrm4/xfrm4_policy.c and xfrm6/xfrm6_policy.c, and call dst_entries_init and dst_entries_destroy for each net namespace. The ipv4 and ipv6 xfrms each create dst_ops template, and perform dst_entries_init on the templates. The template values are copied to each net namespace's xfrm.xfrm*_dst_ops. The problem there is the dst_ops pcpuc_entries field is a percpu counter and cannot be used correctly by simply copying it to another object. The result of this is a very subtle bug; changes to the dst entries counter from one net namespace may sometimes get applied to a different net namespace dst entries counter. This is because of how the percpu counter works; it has a main count field as well as a pointer to the percpu variables. Each net namespace maintains its own main count variable, but all point to one set of percpu variables. When any net namespace happens to change one of the percpu variables to outside its small batch range, its count is moved to the net namespace's main count variable. So with multiple net namespaces operating concurrently, the dst_ops entries counter can stray from the actual value that it should be; if counts are consistently moved from one net namespace to another (which my testing showed is likely), then one net namespace winds up with a negative dst_ops count while another winds up with a continually increasing count, eventually reaching its gc_thresh limit, which causes all new traffic on the net namespace to fail with -ENOBUFS. Signed-off-by: Dan Streetman Signed-off-by: Dan Streetman Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv4/xfrm4_policy.c | 46 ++++++++++++++++++++++++++++------- net/ipv6/xfrm6_policy.c | 53 +++++++++++++++++++++++++++++------------ net/xfrm/xfrm_policy.c | 38 ----------------------------- 3 files changed, 75 insertions(+), 62 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index bff69746e05f0..78526087126de 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -230,7 +230,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xfrm_dst_ifdown(dst, dev); } -static struct dst_ops xfrm4_dst_ops = { +static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, @@ -244,7 +244,7 @@ static struct dst_ops xfrm4_dst_ops = { static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, - .dst_ops = &xfrm4_dst_ops, + .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, .decode_session = _decode_session4, @@ -266,7 +266,7 @@ static struct ctl_table xfrm4_policy_table[] = { { } }; -static int __net_init xfrm4_net_init(struct net *net) +static int __net_init xfrm4_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -294,7 +294,7 @@ static int __net_init xfrm4_net_init(struct net *net) return -ENOMEM; } -static void __net_exit xfrm4_net_exit(struct net *net) +static void __net_exit xfrm4_net_sysctl_exit(struct net *net) { struct ctl_table *table; @@ -306,12 +306,44 @@ static void __net_exit xfrm4_net_exit(struct net *net) if (!net_eq(net, &init_net)) kfree(table); } +#else /* CONFIG_SYSCTL */ +static int inline xfrm4_net_sysctl_init(struct net *net) +{ + return 0; +} + +static void inline xfrm4_net_sysctl_exit(struct net *net) +{ +} +#endif + +static int __net_init xfrm4_net_init(struct net *net) +{ + int ret; + + memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template, + sizeof(xfrm4_dst_ops_template)); + ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops); + if (ret) + return ret; + + ret = xfrm4_net_sysctl_init(net); + if (ret) + dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); + + return ret; +} + +static void __net_exit xfrm4_net_exit(struct net *net) +{ + xfrm4_net_sysctl_exit(net); + dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); +} static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, }; -#endif static void __init xfrm4_policy_init(void) { @@ -320,13 +352,9 @@ static void __init xfrm4_policy_init(void) void __init xfrm4_init(void) { - dst_entries_init(&xfrm4_dst_ops); - xfrm4_state_init(); xfrm4_policy_init(); xfrm4_protocol_init(); -#ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm4_net_ops); -#endif } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f337a908a76a1..4fb94f6ee15b4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -289,7 +289,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xfrm_dst_ifdown(dst, dev); } -static struct dst_ops xfrm6_dst_ops = { +static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, @@ -303,7 +303,7 @@ static struct dst_ops xfrm6_dst_ops = { static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, - .dst_ops = &xfrm6_dst_ops, + .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, @@ -336,7 +336,7 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static int __net_init xfrm6_net_init(struct net *net) +static int __net_init xfrm6_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -364,7 +364,7 @@ static int __net_init xfrm6_net_init(struct net *net) return -ENOMEM; } -static void __net_exit xfrm6_net_exit(struct net *net) +static void __net_exit xfrm6_net_sysctl_exit(struct net *net) { struct ctl_table *table; @@ -376,24 +376,52 @@ static void __net_exit xfrm6_net_exit(struct net *net) if (!net_eq(net, &init_net)) kfree(table); } +#else /* CONFIG_SYSCTL */ +static int inline xfrm6_net_sysctl_init(struct net *net) +{ + return 0; +} + +static void inline xfrm6_net_sysctl_exit(struct net *net) +{ +} +#endif + +static int __net_init xfrm6_net_init(struct net *net) +{ + int ret; + + memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, + sizeof(xfrm6_dst_ops_template)); + ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); + if (ret) + return ret; + + ret = xfrm6_net_sysctl_init(net); + if (ret) + dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); + + return ret; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + xfrm6_net_sysctl_exit(net); + dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); +} static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, }; -#endif int __init xfrm6_init(void) { int ret; - dst_entries_init(&xfrm6_dst_ops); - ret = xfrm6_policy_init(); - if (ret) { - dst_entries_destroy(&xfrm6_dst_ops); + if (ret) goto out; - } ret = xfrm6_state_init(); if (ret) goto out_policy; @@ -402,9 +430,7 @@ int __init xfrm6_init(void) if (ret) goto out_state; -#ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm6_net_ops); -#endif out: return ret; out_state: @@ -416,11 +442,8 @@ int __init xfrm6_init(void) void xfrm6_fini(void) { -#ifdef CONFIG_SYSCTL unregister_pernet_subsys(&xfrm6_net_ops); -#endif xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); - dst_entries_destroy(&xfrm6_dst_ops); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 638af0655aaf8..4cd2076ff84b4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2806,7 +2806,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { - struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2837,26 +2836,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) } spin_unlock(&xfrm_policy_afinfo_lock); - rtnl_lock(); - for_each_net(net) { - struct dst_ops *xfrm_dst_ops; - - switch (afinfo->family) { - case AF_INET: - xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; - break; -#endif - default: - BUG(); - } - *xfrm_dst_ops = *afinfo->dst_ops; - } - rtnl_unlock(); - return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2892,22 +2871,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static void __net_init xfrm_dst_ops_init(struct net *net) -{ - struct xfrm_policy_afinfo *afinfo; - - rcu_read_lock(); - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); - if (afinfo) - net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; -#if IS_ENABLED(CONFIG_IPV6) - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); - if (afinfo) - net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; -#endif - rcu_read_unlock(); -} - static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -3056,7 +3019,6 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; - xfrm_dst_ops_init(net); rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; From 567a215dd1586dae787f21b8f3e484018763a710 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 19 Nov 2015 15:44:44 +1100 Subject: [PATCH 1616/2091] powerpc/tm: Block signal return setting invalid MSR state commit d2b9d2a5ad5ef04ff978c9923d19730cb05efd55 upstream. Currently we allow both the MSR T and S bits to be set by userspace on a signal return. Unfortunately this is a reserved configuration and will cause a TM Bad Thing exception if attempted (via rfid). This patch checks for this case in both the 32 and 64 bit signals code. If both T and S are set, we mark the context as invalid. Found using a syscall fuzzer. Fixes: 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context") Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/signal_32.c | 14 +++++++++----- arch/powerpc/kernel/signal_64.c | 4 ++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index af56b5c6c81ab..f4f99f01b7468 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -108,6 +108,7 @@ #define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */ #define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */ #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ +#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */ #define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) #define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index da50e0c9c57e6..7356c33dc897f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -875,6 +875,15 @@ static long restore_tm_user_regs(struct pt_regs *regs, return 1; #endif /* CONFIG_SPE */ + /* Get the top half of the MSR from the user context */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + msr_hi <<= 32; + /* If TM bits are set to the reserved value, it's an invalid context */ + if (MSR_TM_RESV(msr_hi)) + return 1; + /* Pull in the MSR TM bits from the user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); /* Now, recheckpoint. This loads up all of the checkpointed (older) * registers, including FP and V[S]Rs. After recheckpointing, the * transactional versions should be loaded. @@ -884,11 +893,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* Get the top half of the MSR */ - if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) - return 1; - /* Pull in MSR TM from user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c7c24d2e2bdbc..164fd64748436 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -427,6 +427,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* Don't allow reserved mode. */ + if (MSR_TM_RESV(msr)) + return -EINVAL; + /* pull in MSR TM from user context */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); From a54d3a4234121d8a9749331f7b10e6ff02f886ba Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 19 Nov 2015 15:44:45 +1100 Subject: [PATCH 1617/2091] powerpc/tm: Check for already reclaimed tasks commit 7f821fc9c77a9b01fe7b1d6e72717b33d8d64142 upstream. Currently we can hit a scenario where we'll tm_reclaim() twice. This results in a TM bad thing exception because the second reclaim occurs when not in suspend mode. The scenario in which this can happen is the following. We attempt to deliver a signal to userspace. To do this we need obtain the stack pointer to write the signal context. To get this stack pointer we must tm_reclaim() in case we need to use the checkpointed stack pointer (see get_tm_stackpointer()). Normally we'd then return directly to userspace to deliver the signal without going through __switch_to(). Unfortunatley, if at this point we get an error (such as a bad userspace stack pointer), we need to exit the process. The exit will result in a __switch_to(). __switch_to() will attempt to save the process state which results in another tm_reclaim(). This tm_reclaim() now causes a TM Bad Thing exception as this state has already been saved and the processor is no longer in TM suspend mode. Whee! This patch checks the state of the MSR to ensure we are TM suspended before we attempt the tm_reclaim(). If we've already saved the state away, we should no longer be in TM suspend mode. This has the additional advantage of checking for a potential TM Bad Thing exception. Found using syscall fuzzer. Fixes: fb09692e71f1 ("powerpc: Add reclaim and recheckpoint functions for context switching transactional memory processes") Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0596373cd1c31..c8c8275765e75 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -551,6 +551,24 @@ static void tm_reclaim_thread(struct thread_struct *thr, msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; } + /* + * Use the current MSR TM suspended bit to track if we have + * checkpointed state outstanding. + * On signal delivery, we'd normally reclaim the checkpointed + * state to obtain stack pointer (see:get_tm_stackpointer()). + * This will then directly return to userspace without going + * through __switch_to(). However, if the stack frame is bad, + * we need to exit this thread which calls __switch_to() which + * will again attempt to reclaim the already saved tm state. + * Hence we need to check that we've not already reclaimed + * this state. + * We do this using the current MSR, rather tracking it in + * some specific thread_struct bit, as it has the additional + * benifit of checking for a potential TM bad thing exception. + */ + if (!MSR_TM_SUSPENDED(mfmsr())) + return; + tm_reclaim(thr, thr->regs->msr, cause); /* Having done the reclaim, we now have the checkpointed From 1e14dd5a386443f9dd4237f7da4df3669d903f0d Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 11 Dec 2015 12:08:23 +1100 Subject: [PATCH 1618/2091] powerpc/powernv: pr_warn_once on unsupported OPAL_MSG type commit 98da62b716a3b24ab8e77453c9a8a954124c18cd upstream. When running on newer OPAL firmware that supports sending extra OPAL_MSG types, we would print a warning on *every* message received. This could be a problem for kernels that don't support OPAL_MSG_OCC on machines that are running real close to thermal limits and the OCC is throttling the chip. For a kernel that is paying attention to the message queue, we could get these notifications quite often. Conceivably, future message types could also come fairly often, and printing that we didn't understand them 10,000 times provides no further information than printing them once. Signed-off-by: Stewart Smith Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2241565b0739f..b831a2ee32e9a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -358,7 +358,7 @@ static void opal_handle_message(void) /* Sanity check */ if (type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Unknown message type: %u\n", __func__, type); + pr_warn_once("%s: Unknown message type: %u\n", __func__, type); return; } opal_message_do_notify(type, (void *)&msg); From af69fe1f70afcd8cf5aa71320e012ca2abedbabe Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 2 Nov 2015 09:30:31 +0800 Subject: [PATCH 1619/2091] powerpc: Make value-returning atomics fully ordered commit 49e9cf3f0c04bf76ffa59242254110309554861d upstream. According to memory-barriers.txt: > Any atomic operation that modifies some state in memory and returns > information about the state (old or new) implies an SMP-conditional > general memory barrier (smp_mb()) on each side of the actual > operation ... Which mean these operations should be fully ordered. However on PPC, PPC_ATOMIC_ENTRY_BARRIER is the barrier before the actual operation, which is currently "lwsync" if SMP=y. The leading "lwsync" can not guarantee fully ordered atomics, according to Paul Mckenney: https://lkml.org/lkml/2015/10/14/970 To fix this, we define PPC_ATOMIC_ENTRY_BARRIER as "sync" to guarantee the fully-ordered semantics. This also makes futex atomics fully ordered, which can avoid possible memory ordering problems if userspace code relies on futex system call for fully ordered semantics. Fixes: b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics") Signed-off-by: Boqun Feng Reviewed-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/synch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index e682a7143edb7..c50868681f9ea 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -44,7 +44,7 @@ static inline void isync(void) MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" -#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n" #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" #else #define PPC_ACQUIRE_BARRIER From 4126ac7cdcefc74d2e14f975f43615f09d263c2c Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 2 Nov 2015 09:30:32 +0800 Subject: [PATCH 1620/2091] powerpc: Make {cmp}xchg* and their atomic_ versions fully ordered commit 81d7a3294de7e9828310bbf986a67246b13fa01e upstream. According to memory-barriers.txt, xchg*, cmpxchg* and their atomic_ versions all need to be fully ordered, however they are now just RELEASE+ACQUIRE, which are not fully ordered. So also replace PPC_RELEASE_BARRIER and PPC_ACQUIRE_BARRIER with PPC_ATOMIC_ENTRY_BARRIER and PPC_ATOMIC_EXIT_BARRIER in __{cmp,}xchg_{u32,u64} respectively to guarantee fully ordered semantics of atomic{,64}_{cmp,}xchg() and {cmp,}xchg(), as a complement of commit b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics") This patch depends on patch "powerpc: Make value-returning atomics fully ordered" for PPC_ATOMIC_ENTRY_BARRIER definition. Signed-off-by: Boqun Feng Reviewed-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/cmpxchg.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d463c68fe7f05..99897f6645c12 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val) unsigned long prev; __asm__ __volatile__( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -152,14 +152,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) unsigned int prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=&r" (prev), "+m" (*p) @@ -198,13 +198,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) unsigned long prev; __asm__ __volatile__ ( - PPC_RELEASE_BARRIER + PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 # __cmpxchg_u64\n\ cmpd 0,%0,%3\n\ bne- 2f\n\ stdcx. %4,0,%2\n\ bne- 1b" - PPC_ACQUIRE_BARRIER + PPC_ATOMIC_EXIT_BARRIER "\n\ 2:" : "=&r" (prev), "+m" (*p) From 1e2c53f19cefa0689798e4a37ecb73f2c8a7d7c7 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 12 Jan 2016 23:14:22 +1100 Subject: [PATCH 1621/2091] scripts/recordmcount.pl: support data in text section on powerpc commit 2e50c4bef77511b42cc226865d6bc568fa7f8769 upstream. If a text section starts out with a data blob before the first function start label, disassembly parsing doing in recordmcount.pl gets confused on powerpc, leading to creation of corrupted module objects. This was not a problem so far since the compiler would never create such text sections. However, this has changed with a recent change in GCC 6 to support distances of > 2GB between a function and its assoicated TOC in the ELFv2 ABI, exposing this problem. There is already code in recordmcount.pl to handle such data blobs on the sparc64 platform. This patch uses the same method to handle those on powerpc as well. Acked-by: Steven Rostedt Signed-off-by: Ulrich Weigand Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- scripts/recordmcount.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 826470d7f0007..96e2486a6fc47 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -263,7 +263,8 @@ sub check_objcopy } elsif ($arch eq "powerpc") { $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; - $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:"; + # See comment in the sparc64 section for why we use '\w'. + $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?\\w*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$"; if ($bits == 64) { From a33b8ff3d6cb996ae964dc04a99c536f31fc463f Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 12 Jan 2016 23:14:23 +1100 Subject: [PATCH 1622/2091] powerpc/module: Handle R_PPC64_ENTRY relocations commit a61674bdfc7c2bf909c4010699607b62b69b7bec upstream. GCC 6 will include changes to generated code with -mcmodel=large, which is used to build kernel modules on powerpc64le. This was necessary because the large model is supposed to allow arbitrary sizes and locations of the code and data sections, but the ELFv2 global entry point prolog still made the unconditional assumption that the TOC associated with any particular function can be found within 2 GB of the function entry point: func: addis r2,r12,(.TOC.-func)@ha addi r2,r2,(.TOC.-func)@l .localentry func, .-func To remove this assumption, GCC will now generate instead this global entry point prolog sequence when using -mcmodel=large: .quad .TOC.-func func: .reloc ., R_PPC64_ENTRY ld r2, -8(r12) add r2, r2, r12 .localentry func, .-func The new .reloc triggers an optimization in the linker that will replace this new prolog with the original code (see above) if the linker determines that the distance between .TOC. and func is in range after all. Since this new relocation is now present in module object files, the kernel module loader is required to handle them too. This patch adds support for the new relocation and implements the same optimization done by the GNU linker. Signed-off-by: Ulrich Weigand Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/uapi/asm/elf.h | 2 ++ arch/powerpc/kernel/module_64.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 59dad113897b0..c2d21d11c2d2c 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -295,6 +295,8 @@ do { \ #define R_PPC64_TLSLD 108 #define R_PPC64_TOCSAVE 109 +#define R_PPC64_ENTRY 118 + #define R_PPC64_REL16 249 #define R_PPC64_REL16_LO 250 #define R_PPC64_REL16_HI 251 diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 68384514506b7..59663af9315fc 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, */ break; + case R_PPC64_ENTRY: + /* + * Optimize ELFv2 large code model entry point if + * the TOC is within 2GB range of current location. + */ + value = my_r2(sechdrs, me) - (unsigned long)location; + if (value + 0x80008000 > 0xffffffff) + break; + /* + * Check for the large code model prolog sequence: + * ld r2, ...(r12) + * add r2, r2, r12 + */ + if ((((uint32_t *)location)[0] & ~0xfffc) + != 0xe84c0000) + break; + if (((uint32_t *)location)[1] != 0x7c426214) + break; + /* + * If found, replace it with: + * addis r2, r12, (.TOC.-func)@ha + * addi r2, r12, (.TOC.-func)@l + */ + ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value); + ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value); + break; + case R_PPC64_REL16_HA: /* Subtract location pointer */ value -= (unsigned long)location; From 8831ded35abdb2ff1eaaa10c3e756a8d45a9d171 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Fri, 30 Oct 2015 16:31:04 +0800 Subject: [PATCH 1623/2091] recordmcount: arm64: Replace the ignored mcount call into nop commit 2ee8a74f2a5da913637f75a19a0da0e7a08c0f86 upstream. By now, the recordmcount only records the function that in following sections: .text/.ref.text/.sched.text/.spinlock.text/.irqentry.text/ .kprobes.text/.text.unlikely For the function that not in these sections, the call mcount will be in place and not be replaced when kernel boot up. And it will bring performance overhead, such as do_mem_abort (in .exception.text section). This patch make the call mcount to nop for this case in recordmcount. Link: http://lkml.kernel.org/r/1446019445-14421-1-git-send-email-huawei.libin@huawei.com Link: http://lkml.kernel.org/r/1446193864-24593-4-git-send-email-huawei.libin@huawei.com Cc: Cc: Cc: Acked-by: Will Deacon Signed-off-by: Li Bin Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- scripts/recordmcount.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 3d1984e59a301..e00bcd1293367 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -42,6 +42,7 @@ #ifndef EM_AARCH64 #define EM_AARCH64 183 +#define R_AARCH64_NONE 0 #define R_AARCH64_ABS64 257 #endif @@ -160,6 +161,22 @@ static int make_nop_x86(void *map, size_t const offset) return 0; } +static unsigned char ideal_nop4_arm64[4] = {0x1f, 0x20, 0x03, 0xd5}; +static int make_nop_arm64(void *map, size_t const offset) +{ + uint32_t *ptr; + + ptr = map + offset; + /* bl <_mcount> is 0x94000000 before relocation */ + if (*ptr != 0x94000000) + return -1; + + /* Convert to nop */ + ulseek(fd_map, offset, SEEK_SET); + uwrite(fd_map, ideal_nop, 4); + return 0; +} + /* * Get the whole file as a programming convenience in order to avoid * malloc+lseek+read+free of many pieces. If successful, then mmap @@ -353,7 +370,12 @@ do_file(char const *const fname) altmcount = "__gnu_mcount_nc"; break; case EM_AARCH64: - reltype = R_AARCH64_ABS64; gpfx = '_'; break; + reltype = R_AARCH64_ABS64; + make_nop = make_nop_arm64; + rel_type_nop = R_AARCH64_NONE; + ideal_nop = ideal_nop4_arm64; + gpfx = '_'; + break; case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; case EM_METAG: reltype = R_METAG_ADDR32; altmcount = "_mcount_wrapper"; From 40c5dde6eb7e1d23ab8f4152d28bd71b5e30f8f6 Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Tue, 3 Nov 2015 22:56:44 -0800 Subject: [PATCH 1624/2091] arm64: bpf: fix div-by-zero case commit 251599e1d6906621f49218d7b474ddd159e58f3b upstream. In the case of division by zero in a BPF program: A = A / X; (X == 0) the expected behavior is to terminate with return value 0. This is confirmed by the test case introduced in commit 86bf1721b226 ("test_bpf: add tests checking that JIT/interpreter sets A and X to 0."). Reported-by: Yang Shi Tested-by: Yang Shi CC: Xi Wang CC: Alexei Starovoitov CC: linux-arm-kernel@lists.infradead.org CC: linux-kernel@vger.kernel.org Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Signed-off-by: Zi Shen Lim Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit.h | 3 ++- arch/arm64/net/bpf_jit_comp.c | 37 +++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 98a26ce82d266..aee5637ea436f 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim + * Copyright (C) 2014-2015 Zi Shen Lim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -35,6 +35,7 @@ aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \ AARCH64_INSN_BRANCH_COMP_##type) #define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO) +#define A64_CBNZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, NONZERO) /* Conditional branch (immediate) */ #define A64_COND_BRANCH(cond, offset) \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c047598b09e05..9ae6f2373a6da 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim + * Copyright (C) 2014-2015 Zi Shen Lim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -225,6 +225,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 jmp_cond; s32 jmp_offset; +#define check_imm(bits, imm) do { \ + if ((((imm) > 0) && ((imm) >> (bits))) || \ + (((imm) < 0) && (~(imm) >> (bits)))) { \ + pr_info("[%2d] imm=%d(0x%x) out of range\n", \ + i, imm, imm); \ + return -EINVAL; \ + } \ +} while (0) +#define check_imm19(imm) check_imm(19, imm) +#define check_imm26(imm) check_imm(26, imm) + switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: @@ -258,8 +269,21 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: + { + const u8 r0 = bpf2a64[BPF_REG_0]; + + /* if (src == 0) return 0 */ + jmp_offset = 3; /* skip ahead to else path */ + check_imm19(jmp_offset); + emit(A64_CBNZ(is64, src, jmp_offset), ctx); + emit(A64_MOVZ(1, r0, 0, 0), ctx); + jmp_offset = epilogue_offset(ctx); + check_imm26(jmp_offset); + emit(A64_B(jmp_offset), ctx); + /* else */ emit(A64_UDIV(is64, dst, dst, src), ctx); break; + } case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: ctx->tmp_used = 1; @@ -393,17 +417,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit(A64_ASR(is64, dst, dst, imm), ctx); break; -#define check_imm(bits, imm) do { \ - if ((((imm) > 0) && ((imm) >> (bits))) || \ - (((imm) < 0) && (~(imm) >> (bits)))) { \ - pr_info("[%2d] imm=%d(0x%x) out of range\n", \ - i, imm, imm); \ - return -EINVAL; \ - } \ -} while (0) -#define check_imm19(imm) check_imm(19, imm) -#define check_imm26(imm) check_imm(26, imm) - /* JUMP off */ case BPF_JMP | BPF_JA: jmp_offset = bpf2a64_offset(i + off, i, ctx); From f22c64cd0745e9b6bc1c68d82affe9dd2151625a Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Wed, 4 Nov 2015 20:43:59 -0800 Subject: [PATCH 1625/2091] arm64: bpf: fix mod-by-zero case commit 14e589ff4aa3f28a5424e92b6495ecb8950080f7 upstream. Turns out in the case of modulo by zero in a BPF program: A = A % X; (X == 0) the expected behavior is to terminate with return value 0. The bug in JIT is exposed by a new test case [1]. [1] https://lkml.org/lkml/2015/11/4/499 Signed-off-by: Zi Shen Lim Reported-by: Yang Shi Reported-by: Xi Wang CC: Alexei Starovoitov Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit_comp.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 9ae6f2373a6da..6217f80702d2a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -269,6 +269,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: { const u8 r0 = bpf2a64[BPF_REG_0]; @@ -281,16 +283,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); /* else */ - emit(A64_UDIV(is64, dst, dst, src), ctx); + switch (BPF_OP(code)) { + case BPF_DIV: + emit(A64_UDIV(is64, dst, dst, src), ctx); + break; + case BPF_MOD: + ctx->tmp_used = 1; + emit(A64_UDIV(is64, tmp, dst, src), ctx); + emit(A64_MUL(is64, tmp, tmp, src), ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + break; + } break; } - case BPF_ALU | BPF_MOD | BPF_X: - case BPF_ALU64 | BPF_MOD | BPF_X: - ctx->tmp_used = 1; - emit(A64_UDIV(is64, tmp, dst, src), ctx); - emit(A64_MUL(is64, tmp, tmp, src), ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); - break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(A64_LSLV(is64, dst, dst, src), ctx); From 75f1fde24b56a5838c22f6eea2a16593a1b80fb4 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 17 Nov 2015 11:50:51 +0000 Subject: [PATCH 1626/2091] arm64: kernel: pause/unpause function graph tracer in cpu_suspend() commit de818bd4522c40ea02a81b387d2fa86f989c9623 upstream. The function graph tracer adds instrumentation that is required to trace both entry and exit of a function. In particular the function graph tracer updates the "return address" of a function in order to insert a trace callback on function exit. Kernel power management functions like cpu_suspend() are called upon power down entry with functions called "finishers" that are in turn called to trigger the power down sequence but they may not return to the kernel through the normal return path. When the core resumes from low-power it returns to the cpu_suspend() function through the cpu_resume path, which leaves the trace stack frame set-up by the function tracer in an incosistent state upon return to the kernel when tracing is enabled. This patch fixes the issue by pausing/resuming the function graph tracer on the thread executing cpu_suspend() (ie the function call that subsequently triggers the "suspend finishers"), so that the function graph tracer state is kept consistent across functions that enter power down states and never return by effectively disabling graph tracer while they are executing. Fixes: 819e50e25d0c ("arm64: Add ftrace support") Signed-off-by: Lorenzo Pieralisi Reported-by: Catalin Marinas Reported-by: AKASHI Takahiro Suggested-by: Steven Rostedt Acked-by: Steven Rostedt Cc: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/suspend.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 53f1f8dccf6c0..357418137db7c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -70,6 +71,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ local_dbg_save(flags); + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka suspend finishers) hence + * disable graph tracing during their execution. + */ + pause_graph_tracing(); + /* * mm context saved on the stack, it will be restored when * the cpu comes out of reset through the identity mapped @@ -111,6 +119,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) hw_breakpoint_restore(NULL); } + unpause_graph_tracing(); + /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully From 959cad3a68a7b47370f092fd76be3ea411127515 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 10 Nov 2015 15:11:20 +0100 Subject: [PATCH 1627/2091] ARM/arm64: KVM: test properly for a PTE's uncachedness commit e6fab54423450d699a09ec2b899473a541f61971 upstream. The open coded tests for checking whether a PTE maps a page as uncached use a flawed '(pte_val(xxx) & CONST) != CONST' pattern, which is not guaranteed to work since the type of a mapping is not a set of mutually exclusive bits For HYP mappings, the type is an index into the MAIR table (i.e, the index itself does not contain any information whatsoever about the type of the mapping), and for stage-2 mappings it is a bit field where normal memory and device types are defined as follows: #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 I.e., masking *and* comparing with the latter matches on the former, and we have been getting lucky merely because the S2 device mappings also have the PTE_UXN bit set, or we would misidentify memory mappings as device mappings. Since the unmap_range() code path (which contains one instance of the flawed test) is used both for HYP mappings and stage-2 mappings, and considering the difference between the two, it is non-trivial to fix this by rewriting the tests in place, as it would involve passing down the type of mapping through all the functions. However, since HYP mappings and stage-2 mappings both deal with host physical addresses, we can simply check whether the mapping is backed by memory that is managed by the host kernel, and only perform the D-cache maintenance if this is the case. Signed-off-by: Ard Biesheuvel Tested-by: Pavel Fedin Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 191dcfab9f609..49ef4451e893b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -98,6 +98,11 @@ static void kvm_flush_dcache_pud(pud_t pud) __kvm_flush_dcache_pud(pud); } +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + /** * stage2_dissolve_pmd() - clear and flush huge PMD entry * @kvm: pointer to kvm structure. @@ -213,7 +218,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, kvm_tlb_flush_vmid_ipa(kvm, addr); /* No need to invalidate the cache for device mappings */ - if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + if (!kvm_is_device_pfn(__phys_to_pfn(addr))) kvm_flush_dcache_pte(old_pte); put_page(virt_to_page(pte)); @@ -305,8 +310,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, pte = pte_offset_kernel(pmd, addr); do { - if (!pte_none(*pte) && - (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + if (!pte_none(*pte) && !kvm_is_device_pfn(__phys_to_pfn(addr))) kvm_flush_dcache_pte(*pte); } while (pte++, addr += PAGE_SIZE, addr != end); } @@ -1037,11 +1041,6 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) return kvm_vcpu_dabt_iswrite(vcpu); } -static bool kvm_is_device_pfn(unsigned long pfn) -{ - return !pfn_valid(pfn); -} - /** * stage2_wp_ptes - write protect PMD range * @pmd: pointer to pmd entry From f94cf332a8061d7cec4940d29f58b6f5a1e3f765 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Nov 2015 10:28:17 +0000 Subject: [PATCH 1628/2091] arm64: KVM: Fix AArch32 to AArch64 register mapping commit c0f0963464c24e034b858441205455bf2a5d93ad upstream. When running a 32bit guest under a 64bit hypervisor, the ARMv8 architecture defines a mapping of the 32bit registers in the 64bit space. This includes banked registers that are being demultiplexed over the 64bit ones. On exceptions caused by an operation involving a 32bit register, the HW exposes the register number in the ESR_EL2 register. It was so far understood that SW had to distinguish between AArch32 and AArch64 accesses (based on the current AArch32 mode and register number). It turns out that I misinterpreted the ARM ARM, and the clue is in D1.20.1: "For some exceptions, the exception syndrome given in the ESR_ELx identifies one or more register numbers from the issued instruction that generated the exception. Where the exception is taken from an Exception level using AArch32 these register numbers give the AArch64 view of the register." Which means that the HW is already giving us the translated version, and that we shouldn't try to interpret it at all (for example, doing an MMIO operation from the IRQ mode using the LR register leads to very unexpected behaviours). The fix is thus not to perform a call to vcpu_reg32() at all from vcpu_reg(), and use whatever register number is supplied directly. The only case we need to find out about the mapping is when we actively generate a register access, which only occurs when injecting a fault in a guest. Reviewed-by: Robin Murphy Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_emulate.h | 8 +++++--- arch/arm64/kvm/inject_fault.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 17e92f05b1fe5..3ca894ecf699b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -99,11 +99,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } +/* + * vcpu_reg should always be passed a register number coming from a + * read of ESR_EL2. Otherwise, it may give the wrong result on AArch32 + * with banked registers. + */ static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_reg32(vcpu, reg_num); - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 85c57158dcd96..648112e90ed54 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -48,7 +48,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) /* Note: These now point to the banked copies */ *vcpu_spsr(vcpu) = new_spsr_value; - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ if (sctlr & (1 << 13)) From 3f0b20e1a2d8e9eadbe8ca81dfc7e3d324b813da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Nov 2015 12:12:21 +0100 Subject: [PATCH 1629/2091] arm64: fix building without CONFIG_UID16 commit fbc416ff86183e2203cdf975e2881d7c164b0271 upstream. As reported by Michal Simek, building an ARM64 kernel with CONFIG_UID16 disabled currently fails because the system call table still needs to reference the individual function entry points that are provided by kernel/sys_ni.c in this case, and the declarations are hidden inside of #ifdef CONFIG_UID16: arch/arm64/include/asm/unistd32.h:57:8: error: 'sys_lchown16' undeclared here (not in a function) __SYSCALL(__NR_lchown, sys_lchown16) I believe this problem only exists on ARM64, because older architectures tend to not need declarations when their system call table is built in assembly code, while newer architectures tend to not need UID16 support. ARM64 only uses these system calls for compatibility with 32-bit ARM binaries. This changes the CONFIG_UID16 check into CONFIG_HAVE_UID16, which is set unconditionally on ARM64 with CONFIG_COMPAT, so we see the declarations whenever we need them, but otherwise the behavior is unchanged. Fixes: af1839eb4bd4 ("Kconfig: clean up the long arch list for the UID16 config option") Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- include/linux/syscalls.h | 2 +- include/linux/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 76d1e38aabe1d..0c53fd51bf9b7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename, asmlinkage long sys_lchown(const char __user *filename, uid_t user, gid_t group); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 asmlinkage long sys_chown16(const char __user *filename, old_uid_t user, old_gid_t group); asmlinkage long sys_lchown16(const char __user *filename, diff --git a/include/linux/types.h b/include/linux/types.h index 8715287c3b1f6..69c44d981da30 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -35,7 +35,7 @@ typedef __kernel_gid16_t gid16_t; typedef unsigned long uintptr_t; -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ typedef __kernel_old_uid_t old_uid_t; typedef __kernel_old_gid_t old_gid_t; From 426bfb6a778411df0245373aa0979ce11d8fff85 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Dec 2015 09:25:22 +0100 Subject: [PATCH 1630/2091] ARM/arm64: KVM: correct PTE uncachedness check commit 0de58f852875a0f0dcfb120bb8433e4e73c7803b upstream. Commit e6fab5442345 ("ARM/arm64: KVM: test properly for a PTE's uncachedness") modified the logic to test whether a HYP or stage-2 mapping needs flushing, from [incorrectly] interpreting the page table attributes to [incorrectly] checking whether the PFN that backs the mapping is covered by host system RAM. The PFN number is part of the output of the translation, not the input, so we have to use pte_pfn() on the contents of the PTE, not __phys_to_pfn() on the HYP virtual address or stage-2 intermediate physical address. Fixes: e6fab5442345 ("ARM/arm64: KVM: test properly for a PTE's uncachedness") Tested-by: Pavel Fedin Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 49ef4451e893b..da09ddcfcc00e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -218,7 +218,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, kvm_tlb_flush_vmid_ipa(kvm, addr); /* No need to invalidate the cache for device mappings */ - if (!kvm_is_device_pfn(__phys_to_pfn(addr))) + if (!kvm_is_device_pfn(pte_pfn(old_pte))) kvm_flush_dcache_pte(old_pte); put_page(virt_to_page(pte)); @@ -310,7 +310,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, pte = pte_offset_kernel(pmd, addr); do { - if (!pte_none(*pte) && !kvm_is_device_pfn(__phys_to_pfn(addr))) + if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte))) kvm_flush_dcache_pte(*pte); } while (pte++, addr += PAGE_SIZE, addr != end); } From 3e804c36399e7b39b39eff847a44f3933f48ac52 Mon Sep 17 00:00:00 2001 From: John Blackwood Date: Mon, 7 Dec 2015 11:50:34 +0000 Subject: [PATCH 1631/2091] arm64: Clear out any singlestep state on a ptrace detach operation commit 5db4fd8c52810bd9740c1240ebf89223b171aa70 upstream. Make sure to clear out any ptrace singlestep state when a ptrace(2) PTRACE_DETACH call is made on arm64 systems. Otherwise, the previously ptraced task will die off with a SIGTRAP signal if the debugger just previously singlestepped the ptraced task. Signed-off-by: John Blackwood [will: added comment to justify why this is in the arch code] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d882b833dbdb5..608ac6aa497b6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -58,6 +58,12 @@ */ void ptrace_disable(struct task_struct *child) { + /* + * This would be better off in core code, but PTRACE_DETACH has + * grown its fair share of arch-specific worts and changing it + * is likely to cause regressions on obscure architectures. + */ + user_disable_single_step(child); } #ifdef CONFIG_HAVE_HW_BREAKPOINT From f21731a54a666aec2242849207f6369dc976079c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Dec 2015 16:05:36 +0000 Subject: [PATCH 1632/2091] arm64: mm: ensure that the zero page is visible to the page table walker commit 32d6397805d00573ce1fa55f408ce2bca15b0ad3 upstream. In paging_init, we allocate the zero page, memset it to zero and then point TTBR0 to it in order to avoid speculative fetches through the identity mapping. In order to guarantee that the freshly zeroed page is indeed visible to the page table walker, we need to execute a dsb instruction prior to writing the TTBR. Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 5b8b664422d36..cb34eb8bbb9db 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -450,6 +450,9 @@ void __init paging_init(void) empty_zero_page = virt_to_page(zero_page); + /* Ensure the zero page is visible to the page table walker */ + dsb(ishst); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. From e479822e1e3a518250cb7c9be51ed8f857dfab86 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 09:28:06 +0100 Subject: [PATCH 1633/2091] iommu/vt-d: Fix ATSR handling for Root-Complex integrated endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d14053b3c714178525f22660e6aaf41263d00056 upstream. The VT-d specification says that "Software must enable ATS on endpoint devices behind a Root Port only if the Root Port is reported as supporting ATS transactions." We walk up the tree to find a Root Port, but for integrated devices we don't find one — we get to the host bridge. In that case we *should* allow ATS. Currently we don't, which means that we are incorrectly failing to use ATS for the integrated graphics. Fix that. We should never break out of this loop "naturally" with bus==NULL, since we'll always find bridge==NULL in that case (and now return 1). So remove the check for (!bridge) after the loop, since it can never happen. If it did, it would be worthy of a BUG_ON(!bridge). But since it'll oops anyway in that case, that'll do just as well. Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 8b0178db6a045..b85a8614c1280 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3928,14 +3928,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) dev = pci_physfn(dev); for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; - if (!bridge || !pci_is_pcie(bridge) || + /* If it's an integrated device, allow ATS */ + if (!bridge) + return 1; + /* Connected via non-PCIe: no ATS */ + if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) return 0; + /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; } - if (!bridge) - return 0; rcu_read_lock(); list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { From c777e9ab2a44c802ca1453b820f9e3ff5dbe11a4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 30 Nov 2015 14:47:46 -0500 Subject: [PATCH 1634/2091] parisc iommu: fix panic due to trying to allocate too large region commit e46e31a3696ae2d66f32c207df3969613726e636 upstream. When using the Promise TX2+ SATA controller on PA-RISC, the system often crashes with kernel panic, for example just writing data with the dd utility will make it crash. Kernel panic - not syncing: drivers/parisc/sba_iommu.c: I/O MMU @ 000000000000a000 is out of mapping resources CPU: 0 PID: 18442 Comm: mkspadfs Not tainted 4.4.0-rc2 #2 Backtrace: [<000000004021497c>] show_stack+0x14/0x20 [<0000000040410bf0>] dump_stack+0x88/0x100 [<000000004023978c>] panic+0x124/0x360 [<0000000040452c18>] sba_alloc_range+0x698/0x6a0 [<0000000040453150>] sba_map_sg+0x260/0x5b8 [<000000000c18dbb4>] ata_qc_issue+0x264/0x4a8 [libata] [<000000000c19535c>] ata_scsi_translate+0xe4/0x220 [libata] [<000000000c19a93c>] ata_scsi_queuecmd+0xbc/0x320 [libata] [<0000000040499bbc>] scsi_dispatch_cmd+0xfc/0x130 [<000000004049da34>] scsi_request_fn+0x6e4/0x970 [<00000000403e95a8>] __blk_run_queue+0x40/0x60 [<00000000403e9d8c>] blk_run_queue+0x3c/0x68 [<000000004049a534>] scsi_run_queue+0x2a4/0x360 [<000000004049be68>] scsi_end_request+0x1a8/0x238 [<000000004049de84>] scsi_io_completion+0xfc/0x688 [<0000000040493c74>] scsi_finish_command+0x17c/0x1d0 The cause of the crash is not exhaustion of the IOMMU space, there is plenty of free pages. The function sba_alloc_range is called with size 0x11000, thus the pages_needed variable is 0x11. The function sba_search_bitmap is called with bits_wanted 0x11 and boundary size is 0x10 (because dma_get_seg_boundary(dev) returns 0xffff). The function sba_search_bitmap attempts to allocate 17 pages that must not cross 16-page boundary - it can't satisfy this requirement (iommu_is_span_boundary always returns true) and fails even if there are many free entries in the IOMMU space. How did it happen that we try to allocate 17 pages that don't cross 16-page boundary? The cause is in the function iommu_coalesce_chunks. This function tries to coalesce adjacent entries in the scatterlist. The function does several checks if it may coalesce one entry with the next, one of those checks is this: if (startsg->length + dma_len > max_seg_size) break; When it finishes coalescing adjacent entries, it allocates the mapping: sg_dma_len(contig_sg) = dma_len; dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); sg_dma_address(contig_sg) = PIDE_FLAG | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) | dma_offset; It is possible that (startsg->length + dma_len > max_seg_size) is false (we are just near the 0x10000 max_seg_size boundary), so the funcion decides to coalesce this entry with the next entry. When the coalescing succeeds, the function performs dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); And now, because of non-zero dma_offset, dma_len is greater than 0x10000. iommu_alloc_range (a pointer to sba_alloc_range) is called and it attempts to allocate 17 pages for a device that must not cross 16-page boundary. To fix the bug, we must make sure that dma_len after addition of dma_offset and alignment doesn't cross the segment boundary. I.e. change if (startsg->length + dma_len > max_seg_size) break; to if (ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > max_seg_size) break; This patch makes this change (it precalculates max_seg_boundary at the beginning of the function iommu_coalesce_chunks). I also added a check that the mapping length doesn't exceed dma_get_seg_boundary(dev) (it is not needed for Promise TX2+ SATA, but it may be needed for other devices that have dma_get_seg_boundary lower than dma_get_max_seg_size). Signed-off-by: Mikulas Patocka Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/iommu-helpers.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h index 761e77bfce5d2..e56f1569f6c39 100644 --- a/drivers/parisc/iommu-helpers.h +++ b/drivers/parisc/iommu-helpers.h @@ -104,7 +104,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, struct scatterlist *contig_sg; /* contig chunk head */ unsigned long dma_offset, dma_len; /* start/len of DMA stream */ unsigned int n_mappings = 0; - unsigned int max_seg_size = dma_get_max_seg_size(dev); + unsigned int max_seg_size = min(dma_get_max_seg_size(dev), + (unsigned)DMA_CHUNK_SIZE); + unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1; + if (max_seg_boundary) /* check if the addition above didn't overflow */ + max_seg_size = min(max_seg_size, max_seg_boundary); while (nents > 0) { @@ -138,14 +142,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev, /* ** First make sure current dma stream won't - ** exceed DMA_CHUNK_SIZE if we coalesce the + ** exceed max_seg_size if we coalesce the ** next entry. */ - if(unlikely(ALIGN(dma_len + dma_offset + startsg->length, - IOVP_SIZE) > DMA_CHUNK_SIZE)) - break; - - if (startsg->length + dma_len > max_seg_size) + if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > + max_seg_size)) break; /* From b50a2b556d1e487c2430e13042a98325adca0be5 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 18 Sep 2015 16:31:33 -0700 Subject: [PATCH 1635/2091] HID: core: Avoid uninitialized buffer access commit 79b568b9d0c7c5d81932f4486d50b38efdd6da6d upstream. hid_connect adds various strings to the buffer but they're all conditional. You can find circumstances where nothing would be written to it but the kernel will still print the supposedly empty buffer with printk. This leads to corruption on the console/in the logs. Ensure buf is initialized to an empty string. Signed-off-by: Richard Purdie [dvhart: Initialize string to "" rather than assign buf[0] = NULL;] Cc: Jiri Kosina Cc: linux-input@vger.kernel.org Signed-off-by: Darren Hart Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 722a925795a28..9ce9dfeb12583 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1589,7 +1589,7 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask) "Multi-Axis Controller" }; const char *type, *bus; - char buf[64]; + char buf[64] = ""; unsigned int i; int len; int ret; From 3f0cf7dcf712ef29ca47cd2f0b20292aab6e1d59 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 1 Dec 2015 19:52:12 +0000 Subject: [PATCH 1636/2091] staging: lustre: echo_copy.._lsm() dereferences userland pointers directly commit 9225c0b7b976dd9ceac2b80727a60d8fcb906a62 upstream. missing get_user() Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- .../lustre/lustre/obdecho/echo_client.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index d542e06d6cd38..10e520d6bb753 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -1268,6 +1268,7 @@ static int echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) { struct lov_stripe_md *ulsm = _ulsm; + struct lov_oinfo **p; int nob, i; nob = offsetof(struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]); @@ -1277,9 +1278,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) if (copy_to_user(ulsm, lsm, sizeof(*ulsm))) return -EFAULT; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (copy_to_user(ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i], - sizeof(lsm->lsm_oinfo[0]))) + for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) { + struct lov_oinfo __user *up; + if (get_user(up, ulsm->lsm_oinfo + i) || + copy_to_user(up, *p, sizeof(struct lov_oinfo))) return -EFAULT; } return 0; @@ -1287,9 +1289,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob) static int echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm, - void *ulsm, int ulsm_nob) + struct lov_stripe_md __user *ulsm, int ulsm_nob) { struct echo_client_obd *ec = ed->ed_ec; + struct lov_oinfo **p; int i; if (ulsm_nob < sizeof(*lsm)) @@ -1305,11 +1308,10 @@ echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm, return -EINVAL; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (copy_from_user(lsm->lsm_oinfo[i], - ((struct lov_stripe_md *)ulsm)-> \ - lsm_oinfo[i], - sizeof(lsm->lsm_oinfo[0]))) + for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) { + struct lov_oinfo __user *up; + if (get_user(up, ulsm->lsm_oinfo + i) || + copy_from_user(*p, up, sizeof(struct lov_oinfo))) return -EFAULT; } return 0; From b824d64b153a9683aed6730e9f093a7102c36799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Salva=20Peir=C3=B3?= Date: Wed, 7 Oct 2015 07:09:26 -0300 Subject: [PATCH 1637/2091] media/vivid-osd: fix info leak in ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit eda98796aff0d9bf41094b06811f5def3b4c333c upstream. The vivid_fb_ioctl() code fails to initialize the 16 _reserved bytes of struct fb_vblank after the ->hcount member. Add an explicit memset(0) before filling the structure to avoid the info leak. Signed-off-by: Salva Peiró Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/vivid/vivid-osd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/vivid/vivid-osd.c b/drivers/media/platform/vivid/vivid-osd.c index 084d346fb4c4f..e15eef6a94e55 100644 --- a/drivers/media/platform/vivid/vivid-osd.c +++ b/drivers/media/platform/vivid/vivid-osd.c @@ -85,6 +85,7 @@ static int vivid_fb_ioctl(struct fb_info *info, unsigned cmd, unsigned long arg) case FBIOGET_VBLANK: { struct fb_vblank vblank; + memset(&vblank, 0, sizeof(vblank)); vblank.flags = FB_VBLANK_HAVE_COUNT | FB_VBLANK_HAVE_VCOUNT | FB_VBLANK_HAVE_VSYNC; vblank.count = 0; From 8885e7f3d76a9d42eb4fd97a3cfc5a8ac7f6f2ff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 30 Nov 2015 10:15:42 -0700 Subject: [PATCH 1638/2091] direct-io: Fix negative return from dio read beyond eof commit 74cedf9b6c603f2278a05bc91b140b32b434d0b5 upstream. Assume a filesystem with 4KB blocks. When a file has size 1000 bytes and we issue direct IO read at offset 1024, blockdev_direct_IO() reads the tail of the last block and the logic for handling short DIO reads in dio_complete() results in a return value -24 (1000 - 1024) which obviously confuses userspace. Fix the problem by bailing out early once we sample i_size and can reliably check that direct IO read starts beyond i_size. Reported-by: Avi Kivity Fixes: 9fe55eea7e4b444bafc42fa0000cc2d1d2847275 CC: Steven Whitehouse Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/direct-io.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 745d2342651a0..6bc4bacd89843 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1159,6 +1159,15 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } } + /* Once we sampled i_size check for reads beyond EOF */ + dio->i_size = i_size_read(inode); + if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { + if (dio->flags & DIO_LOCKING) + mutex_unlock(&inode->i_mutex); + kmem_cache_free(dio_cache, dio); + goto out; + } + /* * For file extending writes updating i_size before data writeouts * complete can expose uninitialized blocks in dumb filesystems. @@ -1212,7 +1221,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, sdio.next_block_for_io = -1; dio->iocb = iocb; - dio->i_size = i_size_read(inode); spin_lock_init(&dio->bio_lock); dio->refcount = 1; From 57b7d61c2d89e3e0665b9cdcc8fb73b08d10f0c2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Dec 2015 12:22:47 -0500 Subject: [PATCH 1639/2091] fix the regression from "direct-io: Fix negative return from dio read beyond eof" commit 2d4594acbf6d8f75a27f3578476b6a27d8b13ebb upstream. Sure, it's better to bail out of past-the-eof read and return 0 than return a bogus negative value on such. Only we'd better make sure we are bailing out with 0 and not -ENOMEM... Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/direct-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/direct-io.c b/fs/direct-io.c index 6bc4bacd89843..d83a021a659fd 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1165,6 +1165,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, if (dio->flags & DIO_LOCKING) mutex_unlock(&inode->i_mutex); kmem_cache_free(dio_cache, dio); + retval = 0; goto out; } From 705164dbe08ff960bce54eb2edad20f8def2b247 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 28 Nov 2015 08:52:04 -0800 Subject: [PATCH 1640/2091] mn10300: Select CONFIG_HAVE_UID16 to fix build failure commit c86576ea114a9a881cf7328dc7181052070ca311 upstream. mn10300 builds fail with fs/stat.c: In function 'cp_old_stat': fs/stat.c:163:2: error: 'old_uid_t' undeclared ipc/util.c: In function 'ipc64_perm_to_ipc_perm': ipc/util.c:540:2: error: 'old_uid_t' undeclared Select CONFIG_HAVE_UID16 and remove local definition of CONFIG_UID16 to fix the problem. Fixes: fbc416ff8618 ("arm64: fix building without CONFIG_UID16") Cc: Arnd Bergmann Acked-by: Arnd Bergmann Acked-by: Acked-by: David Howells Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- arch/mn10300/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 4434b54e1d87c..78ae5552fdb89 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -1,6 +1,7 @@ config MN10300 def_bool y select HAVE_OPROFILE + select HAVE_UID16 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select HAVE_ARCH_TRACEHOOK @@ -37,9 +38,6 @@ config HIGHMEM config NUMA def_bool n -config UID16 - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y From 1f6d936d33f4b5875ca3aed0e8084e0fb56d4e0d Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 18 Nov 2015 10:48:55 -0800 Subject: [PATCH 1641/2091] arm64: restore bogomips information in /proc/cpuinfo commit 92e788b749862ebe9920360513a718e5dd4da7a9 upstream. As previously reported, some userspace applications depend on bogomips showed by /proc/cpuinfo. Although there is much less legacy impact on aarch64 than arm, it does break libvirt. This patch reverts commit 326b16db9f69 ("arm64: delay: don't bother reporting bogomips in /proc/cpuinfo"), but with some tweak due to context change and without the pr_info(). Fixes: 326b16db9f69 ("arm64: delay: don't bother reporting bogomips in /proc/cpuinfo") Signed-off-by: Yang Shi Acked-by: Will Deacon Cc: # 3.12+ Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/setup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 74753132c3ac8..bbdb53b87e136 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -523,6 +523,10 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "processor\t: %d\n", i); #endif + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + /* * Dump out the common processor features in a single line. * Userspace should read the hwcaps with getauxval(AT_HWCAP) From d17367a77457f556e7614f2e80db5d5a902e1268 Mon Sep 17 00:00:00 2001 From: libin Date: Tue, 3 Nov 2015 08:58:47 +0800 Subject: [PATCH 1642/2091] recordmcount: Fix endianness handling bug for nop_mcount commit c84da8b9ad3761eef43811181c7e896e9834b26b upstream. In nop_mcount, shdr->sh_offset and welp->r_offset should handle endianness properly, otherwise it will trigger Segmentation fault if the recordmcount main and file.o have different endianness. Link: http://lkml.kernel.org/r/563806C7.7070606@huawei.com Signed-off-by: Li Bin Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- scripts/recordmcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 49b582a225b0b..b9897e2be404d 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -377,7 +377,7 @@ static void nop_mcount(Elf_Shdr const *const relhdr, if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { if (make_nop) - ret = make_nop((void *)ehdr, shdr->sh_offset + relp->r_offset); + ret = make_nop((void *)ehdr, _w(shdr->sh_offset) + _w(relp->r_offset)); if (warn_on_notrace_sect && !once) { printf("Section %s has mcount callers being ignored\n", txtname); From 2d5f6b0413359df065fd02d695c08bbc7d998bbd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 31 Jan 2016 11:23:48 -0800 Subject: [PATCH 1643/2091] Linux 4.1.17 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7609f1dcdcb94..d398dd440bc90 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = NAME = Series 4800 From c000201d0b514b2324bfdaccb8bf8f63eafec6e8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 10 Nov 2015 17:08:41 +0100 Subject: [PATCH 1644/2091] ovl: allow zero size xattr [ Upstream commit 97daf8b97ad6f913a34c82515be64dc9ac08d63e ] When ovl_copy_xattr() encountered a zero size xattr no more xattrs were copied and the function returned success. This is clearly not the desired behavior. Signed-off-by: Miklos Szeredi Cc: Signed-off-by: Sasha Levin --- fs/overlayfs/copy_up.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 871fcb67be974..394e87f8340f7 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -54,7 +54,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); - if (size <= 0) { + if (size < 0) { error = size; goto out_free_value; } From 3c0a63d74b4a12a0a98cd85b195031d8d4b109a6 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Sat, 24 Oct 2015 07:19:46 -0500 Subject: [PATCH 1645/2091] ovl: use a minimal buffer in ovl_copy_xattr [ Upstream commit e4ad29fa0d224d05e08b2858e65f112fd8edd4fe ] Rather than always allocating the high-order XATTR_SIZE_MAX buffer which is costly and prone to failure, only allocate what is needed and realloc if necessary. Fixes https://github.com/coreos/bugs/issues/489 Signed-off-by: Miklos Szeredi Cc: Signed-off-by: Sasha Levin --- fs/overlayfs/copy_up.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 394e87f8340f7..758012bfd5f03 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -22,9 +22,9 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) { - ssize_t list_size, size; - char *buf, *name, *value; - int error; + ssize_t list_size, size, value_size = 0; + char *buf, *name, *value = NULL; + int uninitialized_var(error); if (!old->d_inode->i_op->getxattr || !new->d_inode->i_op->getxattr) @@ -41,29 +41,40 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) if (!buf) return -ENOMEM; - error = -ENOMEM; - value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); - if (!value) - goto out; - list_size = vfs_listxattr(old, buf, list_size); if (list_size <= 0) { error = list_size; - goto out_free_value; + goto out; } for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { - size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); +retry: + size = vfs_getxattr(old, name, value, value_size); + if (size == -ERANGE) + size = vfs_getxattr(old, name, NULL, 0); + if (size < 0) { error = size; - goto out_free_value; + break; + } + + if (size > value_size) { + void *new; + + new = krealloc(value, size, GFP_KERNEL); + if (!new) { + error = -ENOMEM; + break; + } + value = new; + value_size = size; + goto retry; } + error = vfs_setxattr(new, name, value, size, 0); if (error) - goto out_free_value; + break; } - -out_free_value: kfree(value); out: kfree(buf); From b377c2be9c4cd3a8c422627d9964f257e6d0ed53 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 1 Feb 2016 11:27:06 -0500 Subject: [PATCH 1646/2091] [media] vb2: fix a regression in poll() behavior for output,streams [ Upstream commit 4623e5967448444a4ea1e77beb58898c4af48693 ] In the 3.17 kernel the poll() behavior changed for output streams: as long as not all buffers were queued up poll() would return that userspace can write. This is fine for the write() call, but when using stream I/O this changed the behavior since the expectation was that it would wait for buffers to become available for dequeuing. This patch only enables the check whether you can queue buffers for file I/O only, and skips it for stream I/O. Signed-off-by: Hans Verkuil Acked-by: Laurent Pinchart Cc: # for v3.17 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/v4l2-core/videobuf2-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index cf9d644a8aff4..472eaad6fb78d 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -2662,10 +2662,10 @@ unsigned int vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait) return res | POLLERR; /* - * For output streams you can write as long as there are fewer buffers - * queued than there are buffers available. + * For output streams you can call write() as long as there are fewer + * buffers queued than there are buffers available. */ - if (V4L2_TYPE_IS_OUTPUT(q->type) && q->queued_count < q->num_buffers) + if (V4L2_TYPE_IS_OUTPUT(q->type) && q->fileio && q->queued_count < q->num_buffers) return res | POLLOUT | POLLWRNORM; if (list_empty(&q->done_list)) From bf114c8b221d078ba4a5530732ea17b32367f216 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Fri, 2 Oct 2015 17:33:13 -0300 Subject: [PATCH 1647/2091] [media] gspca: ov534/topro: prevent a division by 0 [ Upstream commit dcc7fdbec53a960588f2c40232db2c6466c09917 ] v4l2-compliance sends a zeroed struct v4l2_streamparm in v4l2-test-formats.cpp::testParmType(), and this results in a division by 0 in some gspca subdrivers: divide error: 0000 [#1] SMP Modules linked in: gspca_ov534 gspca_main ... CPU: 0 PID: 17201 Comm: v4l2-compliance Not tainted 4.3.0-rc2-ao2 #1 Hardware name: System manufacturer System Product Name/M2N-E SLI, BIOS ASUS M2N-E SLI ACPI BIOS Revision 1301 09/16/2010 task: ffff8800818306c0 ti: ffff880095c4c000 task.ti: ffff880095c4c000 RIP: 0010:[] [] sd_set_streamparm+0x12/0x60 [gspca_ov534] RSP: 0018:ffff880095c4fce8 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffff8800c9522000 RCX: ffffffffa077a140 RDX: 0000000000000000 RSI: ffff880095e0c100 RDI: ffff8800c9522000 RBP: ffff880095e0c100 R08: ffffffffa077a100 R09: 00000000000000cc R10: ffff880067ec7740 R11: 0000000000000016 R12: ffffffffa07bb400 R13: 0000000000000000 R14: ffff880081b6a800 R15: 0000000000000000 FS: 00007fda0de78740(0000) GS:ffff88012fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000014630f8 CR3: 00000000cf349000 CR4: 00000000000006f0 Stack: ffffffffa07a6431 ffff8800c9522000 ffffffffa077656e 00000000c0cc5616 ffff8800c9522000 ffffffffa07a5e20 ffff880095e0c100 0000000000000000 ffff880067ec7740 ffffffffa077a140 ffff880067ec7740 0000000000000016 Call Trace: [] ? v4l_s_parm+0x21/0x50 [videodev] [] ? vidioc_s_parm+0x4e/0x60 [gspca_main] [] ? __video_do_ioctl+0x280/0x2f0 [videodev] [] ? video_ioctl2+0x20/0x20 [videodev] [] ? video_usercopy+0x319/0x4e0 [videodev] [] ? page_add_new_anon_rmap+0x71/0xa0 [] ? mem_cgroup_commit_charge+0x52/0x90 [] ? handle_mm_fault+0xc18/0x1680 [] ? v4l2_ioctl+0xac/0xd0 [videodev] [] ? do_vfs_ioctl+0x28f/0x480 [] ? SyS_ioctl+0x74/0x80 [] ? entry_SYSCALL_64_fastpath+0x16/0x75 Code: c7 93 d9 79 a0 5b 5d e9 f1 f3 9a e0 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 31 d2 48 89 fb 48 83 ec 08 8b 46 10 76 0c 80 bf ac 0c 00 00 00 88 87 4e 0e 00 00 74 09 80 bf 4f RIP [] sd_set_streamparm+0x12/0x60 [gspca_ov534] RSP ---[ end trace 279710c2c6c72080 ]--- Following what the doc says about a zeroed timeperframe (see http://www.linuxtv.org/downloads/v4l-dvb-apis/vidioc-g-parm.html): ... To reset manually applications can just set this field to zero. fix the issue by resetting the frame rate to a default value in case of an unusable timeperframe. The fix is done in the subdrivers instead of gspca.c because only the subdrivers have notion of a default frame rate to reset the camera to. Signed-off-by: Antonio Ospite Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/gspca/ov534.c | 9 +++++++-- drivers/media/usb/gspca/topro.c | 6 +++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/gspca/ov534.c b/drivers/media/usb/gspca/ov534.c index 146071b8e1161..bfff1d1c70ab0 100644 --- a/drivers/media/usb/gspca/ov534.c +++ b/drivers/media/usb/gspca/ov534.c @@ -1491,8 +1491,13 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev, struct v4l2_fract *tpf = &cp->timeperframe; struct sd *sd = (struct sd *) gspca_dev; - /* Set requested framerate */ - sd->frame_rate = tpf->denominator / tpf->numerator; + if (tpf->numerator == 0 || tpf->denominator == 0) + /* Set default framerate */ + sd->frame_rate = 30; + else + /* Set requested framerate */ + sd->frame_rate = tpf->denominator / tpf->numerator; + if (gspca_dev->streaming) set_frame_rate(gspca_dev); diff --git a/drivers/media/usb/gspca/topro.c b/drivers/media/usb/gspca/topro.c index c70ff406b07ac..c028a5c2438ed 100644 --- a/drivers/media/usb/gspca/topro.c +++ b/drivers/media/usb/gspca/topro.c @@ -4802,7 +4802,11 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev, struct v4l2_fract *tpf = &cp->timeperframe; int fr, i; - sd->framerate = tpf->denominator / tpf->numerator; + if (tpf->numerator == 0 || tpf->denominator == 0) + sd->framerate = 30; + else + sd->framerate = tpf->denominator / tpf->numerator; + if (gspca_dev->streaming) setframerate(gspca_dev, v4l2_ctrl_g_ctrl(gspca_dev->exposure)); From 6649b1104932f3213f7e5aca7aefbdff7fbfae6b Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Mon, 31 Aug 2015 06:13:45 -0300 Subject: [PATCH 1648/2091] [media] media: dvb-core: Don't force CAN_INVERSION_AUTO in oneshot mode [ Upstream commit c9d57de6103e343f2d4e04ea8d9e417e10a24da7 ] When in FE_TUNE_MODE_ONESHOT the frontend must report the actual capabilities so user can take appropriate action. With frontends that can't do auto inversion this is done by dvb-core automatically so CAN_INVERSION_AUTO is valid. However, when in FE_TUNE_MODE_ONESHOT this is not true. So only set FE_CAN_INVERSION_AUTO in modes other than FE_TUNE_MODE_ONESHOT Signed-off-by: Malcolm Priestley Cc: Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-core/dvb_frontend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 882ca417f328a..3ab874703d119 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -2333,9 +2333,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file, dev_dbg(fe->dvb->device, "%s: current delivery system on cache: %d, V3 type: %d\n", __func__, c->delivery_system, fe->ops.info.type); - /* Force the CAN_INVERSION_AUTO bit on. If the frontend doesn't - * do it, it is done for it. */ - info->caps |= FE_CAN_INVERSION_AUTO; + /* Set CAN_INVERSION_AUTO bit on in other than oneshot mode */ + if (!(fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT)) + info->caps |= FE_CAN_INVERSION_AUTO; err = 0; break; } From f0193949e113c8bf87ed8d6143bcf5823dd78db0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Nov 2015 17:25:16 -0500 Subject: [PATCH 1649/2091] tools lib traceevent: Fix output of %llu for 64 bit values read on 32 bit machines [ Upstream commit 32abc2ede536aae52978d6c0a8944eb1df14f460 ] When a long value is read on 32 bit machines for 64 bit output, the parsing needs to change "%lu" into "%llu", as the value is read natively. Unfortunately, if "%llu" is already there, the code will add another "l" to it and fail to parse it properly. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20151116172516.4b79b109@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/lib/traceevent/event-parse.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ed5461f065bd8..f64a2d54d4674 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4841,13 +4841,12 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event sizeof(long) != 8) { char *p; - ls = 2; /* make %l into %ll */ - p = strchr(format, 'l'); - if (p) + if (ls == 1 && (p = strchr(format, 'l'))) memmove(p+1, p, strlen(p)+1); else if (strcmp(format, "%p") == 0) strcpy(format, "0x%llx"); + ls = 2; } switch (ls) { case -2: From f04d333fc68c2dc2441e2ef2ca12e2fc4793a587 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Nov 2015 14:32:44 -0500 Subject: [PATCH 1650/2091] drm/radeon: call hpd_irq_event on resume [ Upstream commit dbb17a21c131eca94eb31136eee9a7fe5aff00d9 ] Need to call this on resume if displays changes during suspend in order to properly be notified of changes. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 604c44d88e7a4..ccab94ed9d94e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1734,6 +1734,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) } drm_kms_helper_poll_enable(dev); + drm_helper_hpd_irq_event(dev); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) From 549913712496e0534b1ee23dcaa24721e0edd6f4 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 4 Dec 2015 14:29:02 +0100 Subject: [PATCH 1651/2091] ARM: mvebu: remove duplicated regulator definition in Armada 388 GP [ Upstream commit 079ae0c121fd23287f4ad2be9e9f8a13f63cae73 ] The Armada 388 GP Device Tree file describes two times a regulator named 'reg_usb2_1_vbus', with the exact same description. This has been wrong since Armada 388 GP support was introduced. Fixes: 928413bd859c0 ("ARM: mvebu: Add Armada 388 General Purpose Development Board support") Signed-off-by: Thomas Petazzoni Cc: # v4.0+ Signed-off-by: Gregory CLEMENT Signed-off-by: Sasha Levin --- arch/arm/boot/dts/armada-388-gp.dts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index 78514ab0b47ac..757ac079e7f23 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -288,16 +288,6 @@ gpio = <&expander0 4 GPIO_ACTIVE_HIGH>; }; - reg_usb2_1_vbus: v5-vbus1 { - compatible = "regulator-fixed"; - regulator-name = "v5.0-vbus1"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - enable-active-high; - regulator-always-on; - gpio = <&expander0 4 GPIO_ACTIVE_HIGH>; - }; - reg_sata0: pwr-sata0 { compatible = "regulator-fixed"; regulator-name = "pwr_en_sata0"; From 51975a203210d3a94a7711651b1045ed0e3514ed Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 30 Nov 2015 12:30:30 +1100 Subject: [PATCH 1652/2091] time: Avoid signed overflow in timekeeping_get_ns() [ Upstream commit 35a4933a895927990772ae96fdcfd2f806929ee2 ] 1e75fa8 "time: Condense timekeeper.xtime into xtime_sec" replaced a call to clocksource_cyc2ns() from timekeeping_get_ns() with an open-coded version of the same logic to avoid keeping a semi-redundant struct timespec in struct timekeeper. However, the commit also introduced a subtle semantic change - where clocksource_cyc2ns() uses purely unsigned math, the new version introduces a signed temporary, meaning that if (delta * tk->mult) has a 63-bit overflow the following shift will still give a negative result. The choice of 'maxsec' in __clocksource_updatefreq_scale() means this will generally happen if there's a ~10 minute pause in examining the clocksource. This can be triggered on a powerpc KVM guest by stopping it from qemu for a bit over 10 minutes. After resuming time has jumped backwards several minutes causing numerous problems (jiffies does not advance, msleep()s can be extended by minutes..). It doesn't happen on x86 KVM guests, because the guest TSC is effectively frozen while the guest is stopped, which is not the case for the powerpc timebase. Obviously an unsigned (64 bit) overflow will only take twice as long as a signed, 63-bit overflow. I don't know the time code well enough to know if that will still cause incorrect calculations, or if a 64-bit overflow is avoided elsewhere. Still, an incorrect forwards clock adjustment will cause less trouble than time going backwards. So, this patch removes the potential for intermediate signed overflow. Cc: stable@vger.kernel.org (3.7+) Suggested-by: Laurent Vivier Tested-by: Laurent Vivier Signed-off-by: David Gibson Signed-off-by: John Stultz Signed-off-by: Sasha Levin --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 414d9df947242..65dbf8aee751d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -316,8 +316,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) delta = timekeeping_get_delta(tkr); - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; + nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift; /* If arch requires, add in get_arch_timeoffset() */ return nsec + arch_gettimeoffset(); From a94ff8213e3bd0918424f3208450955638f98197 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 20 Nov 2015 09:11:45 +0100 Subject: [PATCH 1653/2091] KVM: PPC: Fix emulation of H_SET_DABR/X on POWER8 [ Upstream commit 760a7364f27d974d100118d88190e574626e18a6 ] In the old DABR register, the BT (Breakpoint Translation) bit is bit number 61. In the new DAWRX register, the WT (Watchpoint Translation) bit is bit number 59. So to move the DABR-BT bit into the position of the DAWRX-WT bit, it has to be shifted by two, not only by one. This fixes hardware watchpoints in gdb of older guests that only use the H_SET_DABR/X interface instead of the new H_SET_MODE interface. Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Thomas Huth Reviewed-by: Laurent Vivier Reviewed-by: David Gibson Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ffd98b2bfa166..f8338e6d3dd78 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2047,7 +2047,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW - rlwimi r5, r4, 1, DAWRX_WT + rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) std r5, VCPU_DAWRX(r3) From ba122961c9ef892d5b94d68fbb91d59e3d6ec38a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 9 Dec 2015 16:11:59 +0100 Subject: [PATCH 1654/2091] ovl: root: copy attr [ Upstream commit ed06e069775ad9236087594a1c1667367e983fb5 ] We copy i_uid and i_gid of underlying inode into overlayfs inode. Except for the root inode. Fix this omission. Signed-off-by: Miklos Szeredi Cc: Signed-off-by: Sasha Levin --- fs/overlayfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d74af7f78fec3..c4b595e2fc209 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -986,6 +986,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) root_dentry->d_fsdata = oe; + ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode, + root_dentry->d_inode); + sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; sb->s_root = root_dentry; From 138792935ec5f83d0e18b18e692be7341fa42a4a Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sat, 5 Dec 2015 14:09:36 +0300 Subject: [PATCH 1655/2091] Bluetooth: Add support of Toshiba Broadcom based devices [ Upstream commit 1623d0bf847d3b38d8cf24367b3689ba0e3fe2aa ] BugLink: https://bugs.launchpad.net/bugs/1522949 T: Bus=03 Lev=02 Prnt=02 Port=05 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=ff(vend.) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0930 ProdID=0225 Rev=01.12 S: Manufacturer=Broadcom Corp S: Product=BCM43142A0 S: SerialNumber=4CBB58034671 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=0mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 3 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none) Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7bf87d9bfd7d5..fdba79c3877cd 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -144,6 +144,10 @@ static const struct usb_device_id btusb_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01), .driver_info = BTUSB_BCM_PATCHRAM }, + /* Toshiba Corp - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01), + .driver_info = BTUSB_BCM_PATCHRAM }, + /* Intel Bluetooth USB Bootloader (RAM module) */ { USB_DEVICE(0x8087, 0x0a5a), .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC }, From 01e6b1f4cc792b0c932b3b37f273012af0ae50a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 3 Dec 2015 23:14:09 +0200 Subject: [PATCH 1656/2091] drm: Don't overwrite UNVERFIED mode status to OK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit be8719a610003297c28b140f1ebd4445aef1d613 ] The way the mode probing works is this: 1. All modes currently on the mode list are marked as UNVERIFIED 2. New modes are on the probed_modes list (they start with status OK) 3. Modes are moved from the probed_modes list to the actual mode list. If a mode already on the mode list is deemed to match one of the probed modes, the duplicate is dropped and the mode status updated to OK. After this the probed_modes list will be empty. 4. All modes on the mode list are verified to not violate any constraints. Any that do are marked as such. 5. Any mode left with a non-OK status is pruned from the list, with an appropriate debug message. What all this means is that any mode on the original list that didn't have a duplicate on the probed_modes list, should be left with status UNVERFIED (or previously could have been left with some other status, but never OK). I broke that in commit 05acaec334fc ("drm: Reorganize probed mode validation") by always assigning something to the mode->status during the validation step. So any mode from the old list that still passed the validation would be left on the list with status OK in the end. Fix this by not doing the basic mode validation unless the mode already has status OK (meaning it came from the probed_modes list, or at least a duplicate of it was on that list). This way we will correctly prune away any mode from the old mode list that didn't appear on the probed_modes list. Cc: stable@vger.kernel.org Cc: Adam Jackson Fixes: 05acaec334fc ("drm: Reorganize probed mode validation") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1449177255-9515-2-git-send-email-ville.syrjala@linux.intel.com Testcase: igt/kms_force_connector_basic/prune-stale-modes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93332 [danvet: Also applying to drm-misc to avoid too much conflict hell - there's a big pile of patches from Ville on top of this one.] Signed-off-by: Daniel Vetter Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_probe_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 63503879a676c..0d75e75b1da34 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -195,7 +195,8 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect mode_flags |= DRM_MODE_FLAG_3D_MASK; list_for_each_entry(mode, &connector->modes, head) { - mode->status = drm_mode_validate_basic(mode); + if (mode->status == MODE_OK) + mode->status = drm_mode_validate_basic(mode); if (mode->status == MODE_OK) mode->status = drm_mode_validate_size(mode, maxX, maxY); From 76e8046ae4f3c7abd9e4f8ab69000609e554eb77 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Mon, 7 Dec 2015 01:07:31 +0100 Subject: [PATCH 1657/2091] rtlwifi: fix memory leak for USB device [ Upstream commit 17bc55864f81dd730d05f09b1641312a7990d636 ] Free skb for received frames with a wrong checksum. This can happen pretty rapidly, exhausting all memory. This fixes a memleak (detected with kmemleak). Originally found while using monitor mode, but it also appears during managed mode (once the link is up). Cc: stable@vger.kernel.org Signed-off-by: Peter Wu ACKed-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c index 2721cf89fb160..aac1ed3f7bb41 100644 --- a/drivers/net/wireless/rtlwifi/usb.c +++ b/drivers/net/wireless/rtlwifi/usb.c @@ -531,6 +531,8 @@ static void _rtl_usb_rx_process_noagg(struct ieee80211_hw *hw, ieee80211_rx(hw, skb); else dev_kfree_skb_any(skb); + } else { + dev_kfree_skb_any(skb); } } From 3d51d36e7148182e76fc36d66c494d192c410fdd Mon Sep 17 00:00:00 2001 From: Uri Mashiach Date: Thu, 10 Dec 2015 15:12:56 +0200 Subject: [PATCH 1658/2091] wlcore/wl12xx: spi: fix oops on firmware load [ Upstream commit 9b2761cb72dc41e1948c8a5512b4efd384eda130 ] The maximum chunks used by the function is (SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE + 1). The original commands array had space for (SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) commands. When the last chunk is used (len > 4 * WSPI_MAX_CHUNK_SIZE), the last command is stored outside the bounds of the commands array. Oops 5 (page fault) is generated during current wl1271 firmware load attempt: root@debian-armhf:~# ifconfig wlan0 up [ 294.312399] Unable to handle kernel paging request at virtual address 00203fc4 [ 294.320173] pgd = de528000 [ 294.323028] [00203fc4] *pgd=00000000 [ 294.326916] Internal error: Oops: 5 [#1] SMP ARM [ 294.331789] Modules linked in: bnep rfcomm bluetooth ipv6 arc4 wl12xx wlcore mac80211 musb_dsps cfg80211 musb_hdrc usbcore usb_common wlcore_spi omap_rng rng_core musb_am335x omap_wdt cpufreq_dt thermal_sys hwmon [ 294.351838] CPU: 0 PID: 1827 Comm: ifconfig Not tainted 4.2.0-00002-g3e9ad27-dirty #78 [ 294.360154] Hardware name: Generic AM33XX (Flattened Device Tree) [ 294.366557] task: dc9d6d40 ti: de550000 task.ti: de550000 [ 294.372236] PC is at __spi_validate+0xa8/0x2ac [ 294.376902] LR is at __spi_sync+0x78/0x210 [ 294.381200] pc : [] lr : [] psr: 60000013 [ 294.381200] sp : de551998 ip : de5519d8 fp : 00200000 [ 294.393242] r10: de551c8c r9 : de5519d8 r8 : de3a9000 [ 294.398730] r7 : de3a9258 r6 : de3a9400 r5 : de551a48 r4 : 00203fbc [ 294.405577] r3 : 00000000 r2 : 00000000 r1 : 00000000 r0 : de3a9000 [ 294.412420] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 294.419918] Control: 10c5387d Table: 9e528019 DAC: 00000015 [ 294.425954] Process ifconfig (pid: 1827, stack limit = 0xde550218) [ 294.432437] Stack: (0xde551998 to 0xde552000) ... [ 294.883613] [] (__spi_validate) from [] (__spi_sync+0x78/0x210) [ 294.891670] [] (__spi_sync) from [] (wl12xx_spi_raw_write+0xfc/0x148 [wlcore_spi]) [ 294.901661] [] (wl12xx_spi_raw_write [wlcore_spi]) from [] (wlcore_boot_upload_firmware+0x1ec/0x458 [wlcore]) [ 294.914038] [] (wlcore_boot_upload_firmware [wlcore]) from [] (wl12xx_boot+0xc10/0xfac [wl12xx]) [ 294.925161] [] (wl12xx_boot [wl12xx]) from [] (wl1271_op_add_interface+0x5b0/0x910 [wlcore]) [ 294.936364] [] (wl1271_op_add_interface [wlcore]) from [] (ieee80211_do_open+0x44c/0xf7c [mac80211]) [ 294.947963] [] (ieee80211_do_open [mac80211]) from [] (__dev_open+0xa8/0x110) [ 294.957307] [] (__dev_open) from [] (__dev_change_flags+0x88/0x148) [ 294.965713] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 294.974576] [] (dev_change_flags) from [] (devinet_ioctl+0x6b4/0x7d0) [ 294.983191] [] (devinet_ioctl) from [] (sock_ioctl+0x1e4/0x2bc) [ 294.991244] [] (sock_ioctl) from [] (do_vfs_ioctl+0x420/0x6b0) [ 294.999208] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x6c/0x7c) [ 295.006880] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 295.014835] Code: e1550004 e2444034 0a00007d e5953018 (e5942008) [ 295.021544] ---[ end trace 66ed188198f4e24e ]--- Signed-off-by: Uri Mashiach Acked-by: Igor Grinberg Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/spi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index f1ac2839d97cb..720e4e4b5a3cd 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -73,7 +73,10 @@ */ #define SPI_AGGR_BUFFER_SIZE (4 * PAGE_SIZE) -#define WSPI_MAX_NUM_OF_CHUNKS (SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) +/* Maximum number of SPI write chunks */ +#define WSPI_MAX_NUM_OF_CHUNKS \ + ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) + struct wl12xx_spi_glue { struct device *dev; @@ -268,9 +271,10 @@ static int __must_check wl12xx_spi_raw_write(struct device *child, int addr, void *buf, size_t len, bool fixed) { struct wl12xx_spi_glue *glue = dev_get_drvdata(child->parent); - struct spi_transfer t[2 * (WSPI_MAX_NUM_OF_CHUNKS + 1)]; + /* SPI write buffers - 2 for each chunk */ + struct spi_transfer t[2 * WSPI_MAX_NUM_OF_CHUNKS]; struct spi_message m; - u32 commands[WSPI_MAX_NUM_OF_CHUNKS]; + u32 commands[WSPI_MAX_NUM_OF_CHUNKS]; /* 1 command per chunk */ u32 *cmd; u32 chunk_len; int i; From c8e1bebf86dff9277e21e9a2769724ca05f40c02 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 11 Dec 2015 16:30:49 +0100 Subject: [PATCH 1659/2091] ovl: setattr: check permissions before copy-up [ Upstream commit cf9a6784f7c1b5ee2b9159a1246e327c331c5697 ] Without this copy-up of a file can be forced, even without actually being allowed to do anything on the file. [Arnd Bergmann] include for PAGE_CACHE_SIZE (used by MAX_LFS_FILESIZE definition). Signed-off-by: Miklos Szeredi Cc: Signed-off-by: Sasha Levin --- fs/overlayfs/inode.c | 13 +++++++++++++ fs/overlayfs/super.c | 2 ++ 2 files changed, 15 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ba0db26389465..a1b069e5e363e 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -45,6 +45,19 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) int err; struct dentry *upperdentry; + /* + * Check for permissions before trying to copy-up. This is redundant + * since it will be rechecked later by ->setattr() on upper dentry. But + * without this, copy-up can be triggered by just about anybody. + * + * We don't initialize inode->size, which just means that + * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not + * check for a swapfile (which this won't be anyway). + */ + err = inode_change_ok(dentry->d_inode, attr); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c4b595e2fc209..bd6d5c1e667d2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -847,6 +848,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } sb->s_stack_depth = 0; + sb->s_maxbytes = MAX_LFS_FILESIZE; if (ufs->config.upperdir) { if (!ufs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); From 1cbc3b7717e8a0c13ea5009b4157dfdf8f90b0b3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 16 Nov 2015 18:44:11 +0300 Subject: [PATCH 1660/2091] ovl: check dentry positiveness in ovl_cleanup_whiteouts() [ Upstream commit 84889d49335627bc770b32787c1ef9ebad1da232 ] This patch fixes kernel crash at removing directory which contains whiteouts from lower layers. Cache of directory content passed as "list" contains entries from all layers, including whiteouts from lower layers. So, lookup in upper dir (moved into work at this stage) will return negative entry. Plus this cache is filled long before and we can race with external removal. Example: mkdir -p lower0/dir lower1/dir upper work overlay touch lower0/dir/a lower0/dir/b mknod lower1/dir/a c 0 0 mount -t overlay none overlay -o lowerdir=lower1:lower0,upperdir=upper,workdir=work rm -fr overlay/dir Signed-off-by: Konstantin Khlebnikov Signed-off-by: Miklos Szeredi Cc: # 3.18+ Signed-off-by: Sasha Levin --- fs/overlayfs/readdir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 70e9af5516004..adcb1398c4812 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -571,7 +571,8 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) (int) PTR_ERR(dentry)); continue; } - ovl_cleanup(upper->d_inode, dentry); + if (dentry->d_inode) + ovl_cleanup(upper->d_inode, dentry); dput(dentry); } mutex_unlock(&upper->d_inode->i_mutex); From eddf977af760a73a9072299e5a41bdb3de4c3930 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Dec 2015 15:52:36 +0100 Subject: [PATCH 1661/2091] EDAC, mc_sysfs: Fix freeing bus' name [ Upstream commit 12e26969b32c79018165d52caff3762135614aa1 ] I get the splat below when modprobing/rmmoding EDAC drivers. It happens because bus->name is invalid after bus_unregister() has run. The Code: section below corresponds to: .loc 1 1108 0 movq 672(%rbx), %rax # mci_1(D)->bus, mci_1(D)->bus .loc 1 1109 0 popq %rbx # .loc 1 1108 0 movq (%rax), %rdi # _7->name, jmp kfree # and %rax has some funky stuff 2030203020312030 which looks a lot like something walked over it. Fix that by saving the name ptr before doing stuff to string it points to. general protection fault: 0000 [#1] SMP Modules linked in: ... CPU: 4 PID: 10318 Comm: modprobe Tainted: G I EN 3.12.51-11-default+ #48 Hardware name: HP ProLiant DL380 G7, BIOS P67 05/05/2011 task: ffff880311320280 ti: ffff88030da3e000 task.ti: ffff88030da3e000 RIP: 0010:[] [] edac_unregister_sysfs+0x22/0x30 [edac_core] RSP: 0018:ffff88030da3fe28 EFLAGS: 00010292 RAX: 2030203020312030 RBX: ffff880311b4e000 RCX: 000000000000095c RDX: 0000000000000001 RSI: ffff880327bb9600 RDI: 0000000000000286 RBP: ffff880311b4e750 R08: 0000000000000000 R09: ffffffff81296110 R10: 0000000000000400 R11: 0000000000000000 R12: ffff88030ba1ac68 R13: 0000000000000001 R14: 00000000011b02f0 R15: 0000000000000000 FS: 00007fc9bf8f5700(0000) GS:ffff8801a7c40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000403c90 CR3: 000000019ebdf000 CR4: 00000000000007e0 Stack: Call Trace: i7core_unregister_mci.isra.9 i7core_remove pci_device_remove __device_release_driver driver_detach bus_remove_driver pci_unregister_driver i7core_exit SyS_delete_module system_call_fastpath 0x7fc9bf426536 Code: 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 48 89 fb e8 52 2a 1f e1 48 8b bb a0 02 00 00 e8 46 59 1f e1 48 8b 83 a0 02 00 00 5b <48> 8b 38 e9 26 9a fe e0 66 0f 1f 44 00 00 66 66 66 66 90 48 8b RIP [] edac_unregister_sysfs+0x22/0x30 [edac_core] RSP Signed-off-by: Borislav Petkov Cc: Mauro Carvalho Chehab Cc: # v3.6.. Fixes: 7a623c039075 ("edac: rewrite the sysfs code to use struct device") Signed-off-by: Sasha Levin --- drivers/edac/edac_mc_sysfs.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 112d63ad11547..67dc90365389a 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -977,21 +977,26 @@ static int edac_create_debug_nodes(struct mem_ctl_info *mci) int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups) { + char *name; int i, err; /* * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus->name) + name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!name) return -ENOMEM; + mci->bus->name = name; + edac_dbg(0, "creating bus %s\n", mci->bus->name); err = bus_register(mci->bus); - if (err < 0) - goto fail_free_name; + if (err < 0) { + kfree(name); + return err; + } /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; @@ -1060,8 +1065,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, device_unregister(&mci->dev); fail_unregister_bus: bus_unregister(mci->bus); -fail_free_name: - kfree(mci->bus->name); + kfree(name); + return err; } @@ -1092,10 +1097,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { + const char *name = mci->bus->name; + edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); bus_unregister(mci->bus); - kfree(mci->bus->name); + kfree(name); } static void mc_attr_release(struct device *dev) From 94fde0abd09b14f0e50efe87edb36ec0d1e88510 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Nov 2015 10:38:38 +0100 Subject: [PATCH 1662/2091] EDAC: Robustify workqueues destruction [ Upstream commit fcd5c4dd8201595d4c598c9cca5e54760277d687 ] EDAC workqueue destruction is really fragile. We cancel delayed work but if it is still running and requeues itself, we still go ahead and destroy the workqueue and the queued work explodes when workqueue core attempts to run it. Make the destruction more robust by switching op_state to offline so that requeuing stops. Cancel any pending work *synchronously* too. EDAC i7core: Driver loaded. general protection fault: 0000 [#1] SMP CPU 12 Modules linked in: Supported: Yes Pid: 0, comm: kworker/0:1 Tainted: G IE 3.0.101-0-default #1 HP ProLiant DL380 G7 RIP: 0010:[] [] __queue_work+0x17/0x3f0 < ... regs ...> Process kworker/0:1 (pid: 0, threadinfo ffff88019def6000, task ffff88019def4600) Stack: ... Call Trace: call_timer_fn run_timer_softirq __do_softirq call_softirq do_softirq irq_exit smp_apic_timer_interrupt apic_timer_interrupt intel_idle cpuidle_idle_call cpu_idle Code: ... RIP __queue_work RSP <...> Signed-off-by: Borislav Petkov Cc: Signed-off-by: Sasha Levin --- drivers/edac/edac_device.c | 11 ++++------- drivers/edac/edac_mc.c | 14 +++----------- drivers/edac/edac_pci.c | 9 ++++----- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 592af5f0cf391..53587377e6726 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -435,16 +435,13 @@ void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, */ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { - int status; - if (!edac_dev->edac_check) return; - status = cancel_delayed_work(&edac_dev->work); - if (status == 0) { - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + edac_dev->op_state = OP_OFFLINE; + + cancel_delayed_work_sync(&edac_dev->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index af3be1914dbb8..63ceb2d985654 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -581,18 +581,10 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec, */ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) { - int status; - - if (mci->op_state != OP_RUNNING_POLL) - return; - - status = cancel_delayed_work(&mci->work); - if (status == 0) { - edac_dbg(0, "not canceled, flush the queue\n"); + mci->op_state = OP_OFFLINE; - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + cancel_delayed_work_sync(&mci->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 2cf44b4db80c8..b4b38603b804e 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -274,13 +274,12 @@ static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, */ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) { - int status; - edac_dbg(0, "\n"); - status = cancel_delayed_work(&pci->work); - if (status == 0) - flush_workqueue(edac_workqueue); + pci->op_state = OP_OFFLINE; + + cancel_delayed_work_sync(&pci->work); + flush_workqueue(edac_workqueue); } /* From bf0a6a9a3d53615aee7a3e04c12f88fdd3fc1d4c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Dec 2015 16:08:12 +0000 Subject: [PATCH 1663/2091] iommu/io-pgtable-arm: Ensure we free the final level on teardown [ Upstream commit 12c2ab09571e8aae3a87da2a4a452632a5fac1e5 ] When tearing down page tables, we return early for the final level since we know that we won't have any table pointers to follow. Unfortunately, this also means that we forget to free the final level, so we end up leaking memory. Fix the issue by always freeing the current level, but just don't bother to iterate over the ptes if we're at the final level. Cc: Reported-by: Zhang Bo Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/io-pgtable-arm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e29d5d7fe2206..937832cfa48ee 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -341,17 +341,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *start, *end; unsigned long table_size; - /* Only leaf entries at the last level */ - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - return; - if (lvl == ARM_LPAE_START_LVL(data)) table_size = data->pgd_size; else table_size = 1UL << data->pg_shift; start = ptep; - end = (void *)ptep + table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + end = ptep; + else + end = (void *)ptep + table_size; while (ptep != end) { arm_lpae_iopte pte = *ptep++; From 9e612a000cf889cfa624d984ccef770fdce43cf6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 17 Dec 2015 18:03:35 +0200 Subject: [PATCH 1664/2091] dm thin: fix race condition when destroying thin pool workqueue [ Upstream commit 18d03e8c25f173f4107a40d0b8c24defb6ed69f3 ] When a thin pool is being destroyed delayed work items are cancelled using cancel_delayed_work(), which doesn't guarantee that on return the delayed item isn't running. This can cause the work item to requeue itself on an already destroyed workqueue. Fix this by using cancel_delayed_work_sync() which guarantees that on return the work item is not running anymore. Fixes: 905e51b39a555 ("dm thin: commit outstanding data every second") Fixes: 85ad643b7e7e5 ("dm thin: add timeout to stop out-of-data-space mode holding IO forever") Signed-off-by: Nikolay Borisov Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/md/dm-thin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 7073b22d4cb42..cb58bb3187824 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3210,8 +3210,8 @@ static void pool_postsuspend(struct dm_target *ti) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - cancel_delayed_work(&pool->waker); - cancel_delayed_work(&pool->no_space_timeout); + cancel_delayed_work_sync(&pool->waker); + cancel_delayed_work_sync(&pool->no_space_timeout); flush_workqueue(pool->wq); (void) commit(pool); } From 51697d5f76e114f27ec08958ea1970fc55db0438 Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Thu, 17 Dec 2015 11:09:58 -0500 Subject: [PATCH 1665/2091] drm/radeon: Fix "slow" audio over DP on DCE8+ [ Upstream commit ac4a9350abddc51ccb897abf0d9f3fd592b97e0b ] DP audio is derived from the dfs clock. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/dce6_afmt.c | 16 ++++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_atombios.c | 7 +++++++ drivers/gpu/drm/radeon/sid.h | 5 +++++ 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 44480c1b9738c..883dce66e2dcf 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -282,6 +282,22 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ if (ASIC_IS_DCE8(rdev)) { + unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + + if (div < 128 && div >= 96) + div -= 64; + else if (div >= 64) + div = div / 2 - 16; + else if (div >= 8) + div /= 4; + else + div = 0; + + if (div) + clock = rdev->clock.gpupll_outputfreq * 10 / div; + WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); } else { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 91c3f60f8bac4..95770d02a6797 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,6 +268,7 @@ struct radeon_clock { uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; + uint32_t gpupll_outputfreq; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 8f285244c839a..13e5513b73af5 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1263,6 +1263,13 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) rdev->mode_info.firmware_flags = le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess); + if (ASIC_IS_DCE8(rdev)) { + rdev->clock.gpupll_outputfreq = + le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); + if (rdev->clock.gpupll_outputfreq == 0) + rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */ + } + return true; } diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 3afac30139838..c126f6bfbed15 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -915,6 +915,11 @@ #define DCCG_AUDIO_DTO1_PHASE 0x05c0 #define DCCG_AUDIO_DTO1_MODULE 0x05c4 +#define DENTIST_DISPCLK_CNTL 0x0490 +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + #define AFMT_AUDIO_SRC_CONTROL 0x713c #define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) /* AFMT_AUDIO_SRC_SELECT From e70aade221a271f91e2d71901b2d602df2faee15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 Dec 2015 20:07:38 +0000 Subject: [PATCH 1666/2091] futex: Drop refcount if requeue_pi() acquired the rtmutex [ Upstream commit fb75a4282d0d9a3c7c44d940582c2d226cf3acfb ] If the proxy lock in the requeue loop acquires the rtmutex for a waiter then it acquired also refcount on the pi_state related to the futex, but the waiter side does not drop the reference count. Add the missing free_pi_state() call. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Bhuvanesh_Surachari@mentor.com Cc: Andy Lowe Link: http://lkml.kernel.org/r/20151219200607.178132067@linutronix.de Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- kernel/futex.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 2579e407ff67d..f3043db6d36f1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2632,6 +2632,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + /* + * Drop the reference to the pi state which + * the requeue_pi() code acquired for us. + */ + free_pi_state(q.pi_state); spin_unlock(q.lock_ptr); } } else { From 1d4c425164b922424b3f86f93f3f0b7f85293fa7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 20 Aug 2015 11:47:13 +0100 Subject: [PATCH 1667/2091] arm64: mdscr_el1: avoid exposing DCC to userspace [ Upstream commit d8d23fa0f27f3b2942a7bbc7378c7735324ed519 ] We don't want to expose the DCC to userspace, particularly as there is a kernel console driver for it. This patch resets mdscr_el1 to disable userspace access to the DCC registers on the cold boot path. Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/mm/proc.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index cdd754e19b9b2..d2c2e3b6c0f93 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -202,7 +202,8 @@ ENTRY(__cpu_setup) mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD - msr mdscr_el1, xzr // Reset mdscr_el1 + mov x0, #1 << 12 // Reset mdscr_el1 and disable + msr mdscr_el1, x0 // access to the DCC from EL0 /* * Memory region attributes for LPAE: * From 5de8e1eed3221cb8737c2617c4567b70833795e6 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 18 Dec 2015 10:35:54 +0000 Subject: [PATCH 1668/2091] arm64: kernel: enforce pmuserenr_el0 initialization and restore [ Upstream commit d2d39a3b91628ef5abdf58e83905b173e63d5ecf ] commit 60792ad349f3c6dc5735aafefe5dc9121c79e320 upstream. The pmuserenr_el0 register value is architecturally UNKNOWN on reset. Current kernel code resets that register value iff the core pmu device is correctly probed in the kernel. On platforms with missing DT pmu nodes (or disabled perf events in the kernel), the pmu is not probed, therefore the pmuserenr_el0 register is not reset in the kernel, which means that its value retains the reset value that is architecturally UNKNOWN (system may run with eg pmuserenr_el0 == 0x1, which means that PMU counters access is available at EL0, which must be disallowed). This patch adds code that resets pmuserenr_el0 on cold boot and restores it on core resume from shutdown, so that the pmuserenr_el0 setup is always enforced in the kernel. Cc: Mark Rutland Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- arch/arm64/kernel/perf_event.c | 3 --- arch/arm64/mm/proc.S | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7778453762d8c..b67b01cb51096 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1242,9 +1242,6 @@ static void armv8pmu_reset(void *info) /* Initialize & Reset PMNC: C and P bits. */ armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); - - /* Disable access from userspace. */ - asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); } static int armv8_pmuv3_map_event(struct perf_event *event) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index d2c2e3b6c0f93..55b3f1400b3e7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -165,6 +165,7 @@ ENTRY(cpu_do_resume) */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 + msr pmuserenr_el0, xzr // Disable PMU access from EL0 mov x0, x12 dsb nsh // Make sure local tlb invalidation completed isb @@ -204,6 +205,7 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + msr pmuserenr_el0, xzr // Disable PMU access from EL0 /* * Memory region attributes for LPAE: * From dc25a0a0d2bc9296d0d66cbe7a17f698122d3c41 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 17 Dec 2015 08:47:02 -0700 Subject: [PATCH 1669/2091] coresight: checking for NULL string in coresight_name_match() [ Upstream commit fadf3a44e974b030e7145218ad1ab25e3ef91738 ] Connection child names associated to ports can sometimes be NULL, which is the case when booting a system on QEMU or when the Coresight power domain isn't switched on. This patch is adding a check to make sure a NULL string isn't fed to strcmp(), something that avoid crashing the system. Cc: # v3.18+ Reported-by: Tyler Baker Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 894531d315b83..046144fc5aff1 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -543,7 +543,7 @@ static int coresight_name_match(struct device *dev, void *data) to_match = data; i_csdev = to_coresight_device(dev); - if (!strcmp(to_match, dev_name(&i_csdev->dev))) + if (to_match && !strcmp(to_match, dev_name(&i_csdev->dev))) return 1; return 0; From 47062c9c6cf99ef7fa09a2458a26bb95edb02759 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 23 Nov 2015 17:39:11 -0500 Subject: [PATCH 1670/2091] drm/radeon: Fix off-by-one errors in radeon_vm_bo_set_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 42ef344c0994cc453477afdc7a8eadc578ed0257 ] eoffset is sometimes treated as the last address inside the address range, and sometimes as the first address outside the range. This was resulting in errors when a test filled up the entire address space. Make it consistent to always be the last address within the range. Also fixed related errors when checking the VA limit and in radeon_vm_fence_pts. Signed-off-by: Felix.Kuehling Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_vm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 9c3377ca17b75..8ec4e4591756e 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -456,15 +456,15 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (soffset) { /* make sure object fit at this offset */ - eoffset = soffset + size; + eoffset = soffset + size - 1; if (soffset >= eoffset) { r = -EINVAL; goto error_unreserve; } last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + if (last_pfn >= rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n", last_pfn, rdev->vm_manager.max_pfn); r = -EINVAL; goto error_unreserve; @@ -479,7 +479,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, eoffset /= RADEON_GPU_PAGE_SIZE; if (soffset || eoffset) { struct interval_tree_node *it; - it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); + it = interval_tree_iter_first(&vm->va, soffset, eoffset); if (it && it != &bo_va->it) { struct radeon_bo_va *tmp; tmp = container_of(it, struct radeon_bo_va, it); @@ -522,7 +522,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (soffset || eoffset) { bo_va->it.start = soffset; - bo_va->it.last = eoffset - 1; + bo_va->it.last = eoffset; interval_tree_insert(&bo_va->it, &vm->va); } @@ -891,7 +891,7 @@ static void radeon_vm_fence_pts(struct radeon_vm *vm, unsigned i; start >>= radeon_vm_block_size; - end >>= radeon_vm_block_size; + end = (end - 1) >> radeon_vm_block_size; for (i = start; i <= end; ++i) radeon_bo_fence(vm->page_tables[i].bo, fence, true); From 86062e985300bc13fc5d29c862e34047bcb65c7e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 17 Dec 2015 12:52:17 -0500 Subject: [PATCH 1671/2091] drm/radeon: clean up fujitsu quirks [ Upstream commit 0eb1c3d4084eeb6fb3a703f88d6ce1521f8fcdd1 ] Combine the two quirks. bug: https://bugzilla.kernel.org/show_bug.cgi?id=109481 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_atombios.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 13e5513b73af5..08fc1b5effa8a 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -437,7 +437,9 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } /* Fujitsu D3003-S2 board lists DVI-I as DVI-D and VGA */ - if (((dev->pdev->device == 0x9802) || (dev->pdev->device == 0x9806)) && + if (((dev->pdev->device == 0x9802) || + (dev->pdev->device == 0x9805) || + (dev->pdev->device == 0x9806)) && (dev->pdev->subsystem_vendor == 0x1734) && (dev->pdev->subsystem_device == 0x11bd)) { if (*connector_type == DRM_MODE_CONNECTOR_VGA) { @@ -448,14 +450,6 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } } - /* Fujitsu D3003-S2 board lists DVI-I as DVI-I and VGA */ - if ((dev->pdev->device == 0x9805) && - (dev->pdev->subsystem_vendor == 0x1734) && - (dev->pdev->subsystem_device == 0x11bd)) { - if (*connector_type == DRM_MODE_CONNECTOR_VGA) - return false; - } - return true; } From e6cbcb008a279d7794fba01799ecfb6be391484f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:47 +0200 Subject: [PATCH 1672/2091] mmc: sdio: Fix invalid vdd in voltage switch power cycle [ Upstream commit d9bfbb95ed598a09cf336adb0f190ee0ff802f0d ] The 'ocr' parameter passed to mmc_set_signal_voltage() defines the power-on voltage used when power cycling after a failure to set the voltage. However, in the case of mmc_sdio_init_card(), the value passed has the R4_18V_PRESENT flag set which is not valid for power-on and results in an invalid vdd. Fix by passing the card's ocr value which does not have the flag. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 5bc6c7dbbd608..3bd6a93602b68 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -661,7 +661,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, */ if (!powered_resume && (rocr & ocr & R4_18V_PRESENT)) { err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180, - ocr); + ocr_card); if (err == -EAGAIN) { sdio_reset(host); mmc_go_idle(host); From b0aed82cdc3204ef04bfb05cab8a7892ed8bd9db Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:48 +0200 Subject: [PATCH 1673/2091] mmc: sdhci: Fix DMA descriptor with zero data length [ Upstream commit 347ea32dc118326c4f2636928239a29d192cc9b8 ] SDHCI has built-in DMA called ADMA2. ADMA2 uses a descriptor table to define DMA scatter-gather. Each desciptor can specify a data length up to 65536 bytes, however the length field is only 16-bits so zero means 65536. Consequently, putting zero when the size is zero must not be allowed. This patch fixes one case where zero data length could be set inadvertently. The problem happens because unaligned data gets split and the code did not consider that the remaining aligned portion might be zero length. That case really only happens for SDIO because SD and eMMC cards transfer blocks that are invariably sector- aligned. For SDIO, access to function registers is done by data transfer (CMD53) when the register is bigger than 1 byte. Generally registers are 4 bytes but 2-byte registers are possible. So DMA of 4 bytes or less can happen. When 32-bit DMA is used, the data alignment must be 4, so 4-byte transfers won't casue a problem, but a 2-byte transfer could. However with the introduction of 64-bit DMA, the data alignment for 64-bit DMA was made 8 bytes, so all 4-byte transfers not on 8-byte boundaries get "split" into a 4-byte chunk and a 0-byte chunk, thereby hitting the bug. In fact, a closer look at the SDHCI specs indicates that only the descriptor table requires 8-byte alignment for 64-bit DMA. That will be dealt with in a separate patch, but the potential for a 2-byte access remains, so this fix is needed anyway. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index cbaf3df3ebd98..6acef8164c8fd 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -555,9 +555,12 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, BUG_ON(len > 65536); - /* tran, valid */ - sdhci_adma_write_desc(host, desc, addr, len, ADMA2_TRAN_VALID); - desc += host->desc_sz; + if (len) { + /* tran, valid */ + sdhci_adma_write_desc(host, desc, addr, len, + ADMA2_TRAN_VALID); + desc += host->desc_sz; + } /* * If this triggers then we have a calculation bug From a2da01ce927dd98cb8f1a8f2d2343d2f5827b988 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:50 +0200 Subject: [PATCH 1674/2091] mmc: sdhci: Fix sdhci_runtime_pm_bus_on/off() [ Upstream commit 5c671c410c8704800f4f1673b6f572137e7e6ddd ] sdhci has a legacy facility to prevent runtime suspend if the bus power is on. This is needed in cases where the power to the card is dependent on the bus power. It is controlled by a pair of functions: sdhci_runtime_pm_bus_on() and sdhci_runtime_pm_bus_off(). These functions use a boolean variable 'bus_on' to ensure changes are always paired. There is an additional check for 'runtime_suspended' which is the problem. In fact, its use is ill-conceived as the only requirement for the logic is that 'on' and 'off' are paired, which is actually broken by the check, for example if the bus power is turned on during runtime resume. So remove the check. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 6acef8164c8fd..f47c4a8370bea 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2793,7 +2793,7 @@ static int sdhci_runtime_pm_put(struct sdhci_host *host) static void sdhci_runtime_pm_bus_on(struct sdhci_host *host) { - if (host->runtime_suspended || host->bus_on) + if (host->bus_on) return; host->bus_on = true; pm_runtime_get_noresume(host->mmc->parent); @@ -2801,7 +2801,7 @@ static void sdhci_runtime_pm_bus_on(struct sdhci_host *host) static void sdhci_runtime_pm_bus_off(struct sdhci_host *host) { - if (host->runtime_suspended || !host->bus_on) + if (!host->bus_on) return; host->bus_on = false; pm_runtime_put_noidle(host->mmc->parent); From 4476dc677064d27c1fb6ff28a47eeac384e8ba64 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 11 Dec 2015 15:54:16 +0100 Subject: [PATCH 1675/2091] udf: limit the maximum number of indirect extents in a row [ Upstream commit b0918d9f476a8434b055e362b83fa4fd1d462c3f ] udf_next_aext() just follows extent pointers while extents are marked as indirect. This can loop forever for corrupted filesystem. Limit number the of indirect extents we are willing to follow in a row. [JK: Updated changelog, limit, style] Signed-off-by: Vegard Nossum Cc: stable@vger.kernel.org Cc: Jan Kara Cc: Quentin Casasnovas Cc: Andrew Morton Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/inode.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6afac3d561ac8..78a40ef0c4636 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -2052,14 +2052,29 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, epos->offset += adsize; } +/* + * Only 1 indirect extent in a row really makes sense but allow upto 16 in case + * someone does some weird stuff. + */ +#define UDF_MAX_INDIR_EXTS 16 + int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, struct kernel_lb_addr *eloc, uint32_t *elen, int inc) { int8_t etype; + unsigned int indirections = 0; while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { int block; + + if (++indirections > UDF_MAX_INDIR_EXTS) { + udf_err(inode->i_sb, + "too many indirect extents in inode %lu\n", + inode->i_ino); + return -1; + } + epos->block = *eloc; epos->offset = sizeof(struct allocExtDesc); brelse(epos->bh); From 33c7c20d62a7916522628fbb45bb65cbc0b9f899 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 22 Dec 2015 02:27:35 -0200 Subject: [PATCH 1676/2091] [media] rc: sunxi-cir: Initialize the spinlock properly [ Upstream commit 768acf46e1320d6c41ed1b7c4952bab41c1cde79 ] The driver allocates the spinlock but fails to initialize it correctly. The kernel reports a BUG indicating bad spinlock magic when spinlock debugging is enabled. Call spin_lock_init() on it to initialize it correctly. Fixes: b4e3e59fb59c ("[media] rc: add sunxi-ir driver") Signed-off-by: Chen-Yu Tsai Acked-by: Hans de Goede Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/rc/sunxi-cir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/sunxi-cir.c b/drivers/media/rc/sunxi-cir.c index 7830aef3db459..40f77685cc4a2 100644 --- a/drivers/media/rc/sunxi-cir.c +++ b/drivers/media/rc/sunxi-cir.c @@ -153,6 +153,8 @@ static int sunxi_ir_probe(struct platform_device *pdev) if (!ir) return -ENOMEM; + spin_lock_init(&ir->ir_lock); + if (of_device_is_compatible(dn, "allwinner,sun5i-a13-ir")) ir->fifo_size = 64; else From 40b7566cb9872529f0b95bd913e16022e1107731 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Wed, 2 Dec 2015 09:20:57 -0500 Subject: [PATCH 1677/2091] nfs: Fix race in __update_open_stateid() [ Upstream commit 361cad3c89070aeb37560860ea8bfc092d545adc ] We've seen this in a packet capture - I've intermixed what I think was going on. The fix here is to grab the so_lock sooner. 1964379 -> #1 open (for write) reply seqid=1 1964393 -> #2 open (for read) reply seqid=2 __nfs4_close(), state->n_wronly-- nfs4_state_set_mode_locked(), changes state->state = [R] state->flags is [RW] state->state is [R], state->n_wronly == 0, state->n_rdonly == 1 1964398 -> #3 open (for write) call -> because close is already running 1964399 -> downgrade (to read) call seqid=2 (close of #1) 1964402 -> #3 open (for write) reply seqid=3 __update_open_stateid() nfs_set_open_stateid_locked(), changes state->flags state->flags is [RW] state->state is [R], state->n_wronly == 0, state->n_rdonly == 1 new sequence number is exposed now via nfs4_stateid_copy() next step would be update_open_stateflags(), pending so_lock 1964403 -> downgrade reply seqid=2, fails with OLD_STATEID (close of #1) nfs4_close_prepare() gets so_lock and recalcs flags -> send close 1964405 -> downgrade (to read) call seqid=3 (close of #1 retry) __update_open_stateid() gets so_lock * update_open_stateflags() updates state->n_wronly. nfs4_state_set_mode_locked() updates state->state state->flags is [RW] state->state is [RW], state->n_wronly == 1, state->n_rdonly == 1 * should have suppressed the preceding nfs4_close_prepare() from sending open_downgrade 1964406 -> write call 1964408 -> downgrade (to read) reply seqid=4 (close of #1 retry) nfs_clear_open_stateid_locked() state->flags is [R] state->state is [RW], state->n_wronly == 1, state->n_rdonly == 1 1964409 -> write reply (fails, openmode) Signed-off-by: Andrew Elble Cc: stable@vger,kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8f393fcc313b1..1310a710192bf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1284,6 +1284,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ + spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { nfs4_stateid_copy(&state->stateid, deleg_stateid); @@ -1292,7 +1293,6 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s if (open_stateid != NULL) nfs_set_open_stateid_locked(state, open_stateid, fmode); write_sequnlock(&state->seqlock); - spin_lock(&state->owner->so_lock); update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } From 247d403072cc86564439a919a4f9c48f46d138cd Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 30 Oct 2015 16:31:47 +0800 Subject: [PATCH 1678/2091] Thermal: initialize thermal zone device correctly [ Upstream commit bb431ba26c5cd0a17c941ca6c3a195a3a6d5d461 ] After thermal zone device registered, as we have not read any temperature before, thus tz->temperature should not be 0, which actually means 0C, and thermal trend is not available. In this case, we need specially handling for the first thermal_zone_device_update(). Both thermal core framework and step_wise governor is enhanced to handle this. And since the step_wise governor is the only one that uses trends, so it's the only thermal governor that needs to be updated. CC: #3.18+ Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Tested-by: Matthias Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Sasha Levin --- drivers/thermal/step_wise.c | 17 +++++++++++++++-- drivers/thermal/thermal_core.c | 19 +++++++++++++++++-- drivers/thermal/thermal_core.h | 1 + include/linux/thermal.h | 3 +++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 5a0f12d08e8b8..ec4ea5940bf76 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance, next_target = instance->target; dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state); + if (!instance->initialized) { + if (throttle) { + next_target = (cur_state + 1) >= instance->upper ? + instance->upper : + ((cur_state + 1) < instance->lower ? + instance->lower : (cur_state + 1)); + } else { + next_target = THERMAL_NO_TARGET; + } + + return next_target; + } + switch (trend) { case THERMAL_TREND_RAISING: if (throttle) { @@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n", old_target, (int)instance->target); - if (old_target == instance->target) + if (instance->initialized && old_target == instance->target) continue; /* Activate a passive thermal instance */ @@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) instance->target == THERMAL_NO_TARGET) update_passive_instance(tz, trip_type, -1); - + instance->initialized = true; instance->cdev->updated = false; /* cdev needs update */ } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 4108db7e10c10..80e812378c0c1 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -471,8 +471,22 @@ static void update_temperature(struct thermal_zone_device *tz) mutex_unlock(&tz->lock); trace_thermal_temperature(tz); - dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", - tz->last_temperature, tz->temperature); + if (tz->last_temperature == THERMAL_TEMP_INVALID) + dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n", + tz->temperature); + else + dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", + tz->last_temperature, tz->temperature); +} + +static void thermal_zone_device_reset(struct thermal_zone_device *tz) +{ + struct thermal_instance *pos; + + tz->temperature = THERMAL_TEMP_INVALID; + tz->passive = 0; + list_for_each_entry(pos, &tz->thermal_instances, tz_node) + pos->initialized = false; } void thermal_zone_device_update(struct thermal_zone_device *tz) @@ -1576,6 +1590,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!tz->ops->get_temp) thermal_zone_device_set_polling(tz, 0); + thermal_zone_device_reset(tz); thermal_zone_device_update(tz); return tz; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 8e391812e5037..dce86ee8e9d71 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -41,6 +41,7 @@ struct thermal_instance { struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; int trip; + bool initialized; unsigned long upper; /* Highest cooling state for this trip point */ unsigned long lower; /* Lowest cooling state for this trip point */ unsigned long target; /* expected cooling state */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5eac316490eab..fb96b15693d44 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,6 +40,9 @@ /* No upper/lower limit requirement */ #define THERMAL_NO_LIMIT ((u32)~0) +/* use value, which < 0K, to indicate an invalid/uninitialized temperature */ +#define THERMAL_TEMP_INVALID -274000 + /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) From ab4827691501adf64fc580f7a8cadca0332b84b7 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 30 Oct 2015 16:31:58 +0800 Subject: [PATCH 1679/2091] Thermal: handle thermal zone device properly during system sleep [ Upstream commit ff140fea847e1c2002a220571ab106c2456ed252 ] Current thermal code does not handle system sleep well because 1. the cooling device cooling state may be changed during suspend 2. the previous temperature reading becomes invalid after resumed because it is got before system sleep 3. updating thermal zone device during suspending/resuming is wrong because some devices may have already been suspended or may have not been resumed. Thus, the proper way to do this is to cancel all thermal zone device update requirements during suspend/resume, and after all the devices have been resumed, reset and update every registered thermal zone devices. This also fixes a regression introduced by: Commit 19593a1fb1f6 ("ACPI / fan: convert to platform driver") Because, with above commit applied, all the fan devices are attached to the acpi_general_pm_domain, and they are turned on by the pm_domain automatically after resume, without the awareness of thermal core. CC: #3.18+ Reference: https://bugzilla.kernel.org/show_bug.cgi?id=78201 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=91411 Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Tested-by: Matthias Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Sasha Levin --- drivers/thermal/thermal_core.c | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 80e812378c0c1..d2d089173165b 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static atomic_t in_suspend; + static struct thermal_governor *def_governor; static struct thermal_governor *__find_governor(const char *name) @@ -493,6 +496,9 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) { int count; + if (atomic_read(&in_suspend)) + return; + if (!tz->ops->get_temp) return; @@ -1825,6 +1831,36 @@ static void thermal_unregister_governors(void) thermal_gov_user_space_unregister(); } +static int thermal_pm_notify(struct notifier_block *nb, + unsigned long mode, void *_unused) +{ + struct thermal_zone_device *tz; + + switch (mode) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + case PM_SUSPEND_PREPARE: + atomic_set(&in_suspend, 1); + break; + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + atomic_set(&in_suspend, 0); + list_for_each_entry(tz, &thermal_tz_list, node) { + thermal_zone_device_reset(tz); + thermal_zone_device_update(tz); + } + break; + default: + break; + } + return 0; +} + +static struct notifier_block thermal_pm_nb = { + .notifier_call = thermal_pm_notify, +}; + static int __init thermal_init(void) { int result; @@ -1845,6 +1881,11 @@ static int __init thermal_init(void) if (result) goto exit_netlink; + result = register_pm_notifier(&thermal_pm_nb); + if (result) + pr_warn("Thermal: Can not register suspend notifier, return %d\n", + result); + return 0; exit_netlink: @@ -1864,6 +1905,7 @@ static int __init thermal_init(void) static void __exit thermal_exit(void) { + unregister_pm_notifier(&thermal_pm_nb); of_thermal_destroy_zones(); genetlink_exit(); class_unregister(&thermal_class); From bdfc34b6cffea584f49ed9356924674e3e40b7e6 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 30 Oct 2015 16:32:10 +0800 Subject: [PATCH 1680/2091] Thermal: do thermal zone update after a cooling device registered [ Upstream commit 4511f7166a2deb5f7a578cf87fd2fe1ae83527e3 ] When a new cooling device is registered, we need to update the thermal zone to set the new registered cooling device to a proper state. This fixes a problem that the system is cool, while the fan devices are left running on full speed after boot, if fan device is registered after thermal zone device. Here is the history of why current patch looks like this: https://patchwork.kernel.org/patch/7273041/ CC: #3.18+ Reference:https://bugzilla.kernel.org/show_bug.cgi?id=92431 Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Sasha Levin --- drivers/thermal/thermal_core.c | 14 +++++++++++++- include/linux/thermal.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index d2d089173165b..a3282bfb343dc 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1036,6 +1036,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, if (!result) { list_add_tail(&dev->tz_node, &tz->thermal_instances); list_add_tail(&dev->cdev_node, &cdev->thermal_instances); + atomic_set(&tz->need_update, 1); } mutex_unlock(&cdev->lock); mutex_unlock(&tz->lock); @@ -1142,6 +1143,7 @@ __thermal_cooling_device_register(struct device_node *np, const struct thermal_cooling_device_ops *ops) { struct thermal_cooling_device *cdev; + struct thermal_zone_device *pos = NULL; int result; if (type && strlen(type) >= THERMAL_NAME_LENGTH) @@ -1186,6 +1188,12 @@ __thermal_cooling_device_register(struct device_node *np, /* Update binding information for 'this' new cdev */ bind_cdev(cdev); + mutex_lock(&thermal_list_lock); + list_for_each_entry(pos, &thermal_tz_list, node) + if (atomic_cmpxchg(&pos->need_update, 1, 0)) + thermal_zone_device_update(pos); + mutex_unlock(&thermal_list_lock); + return cdev; } @@ -1516,6 +1524,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, tz->trips = trips; tz->passive_delay = passive_delay; tz->polling_delay = polling_delay; + /* A new thermal zone needs to be updated anyway. */ + atomic_set(&tz->need_update, 1); dev_set_name(&tz->device, "thermal_zone%d", tz->id); result = device_register(&tz->device); @@ -1597,7 +1607,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, thermal_zone_device_set_polling(tz, 0); thermal_zone_device_reset(tz); - thermal_zone_device_update(tz); + /* Update the new thermal zone and mark it as already updated. */ + if (atomic_cmpxchg(&tz->need_update, 1, 0)) + thermal_zone_device_update(tz); return tz; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index fb96b15693d44..2e7d0f7a0ecca 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -162,6 +162,7 @@ struct thermal_attr { * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. + * @need_update: if equals 1, thermal_zone_device_update needs to be invoked. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone @@ -188,6 +189,7 @@ struct thermal_zone_device { int emul_temperature; int passive; unsigned int forced_passive; + atomic_t need_update; struct thermal_zone_device_ops *ops; const struct thermal_zone_params *tzp; struct thermal_governor *governor; From 35de29368026457b742115a340cead9bfea9b0bd Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 22 Dec 2015 22:19:58 +0100 Subject: [PATCH 1681/2091] posix-clock: Fix return code on the poll method's error path [ Upstream commit 1b9f23727abb92c5e58f139e7d180befcaa06fe0 ] The posix_clock_poll function is supposed to return a bit mask of POLLxxx values. However, in case the hardware has disappeared (due to hot plugging for example) this code returns -ENODEV in a futile attempt to throw an error at the file descriptor level. The kernel's file_operations interface does not accept such error codes from the poll method. Instead, this function aught to return POLLERR. The value -ENODEV does, in fact, contain the POLLERR bit (and almost all the other POLLxxx bits as well), but only by chance. This patch fixes code to return a proper bit mask. Credit goes to Markus Elfring for pointing out the suspicious signed/unsigned mismatch. Reported-by: Markus Elfring igned-off-by: Richard Cochran Cc: John Stultz Cc: Julia Lawall Link: http://lkml.kernel.org/r/1450819198-17420-1-git-send-email-richardcochran@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- kernel/time/posix-clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ce033c7aa2e8f..9cff0ab82b635 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) { struct posix_clock *clk = get_posix_clock(fp); - int result = 0; + unsigned int result = 0; if (!clk) - return -ENODEV; + return POLLERR; if (clk->ops.poll) result = clk->ops.poll(clk, fp, wait); From 07493d2c6685b2e0a243f5dfda8cabbf581994d8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:34 -0600 Subject: [PATCH 1682/2091] rtlwifi: rtl8188ee: Fix module parameter initialization [ Upstream commit 06f34572c6110e2e2d5e653a957f1d74db9e3f2b ] In this driver, parameters disable_watchdog and sw_crypto are never copied into the locations used in the main code. While modifying the parameter handling, the copying of parameter msi_support is moved to be with the rest. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/rtl8188ee/sw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c index 11344121c55e0..47e32cb0ec1a4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/sw.c @@ -88,8 +88,6 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) u8 tid; rtl8188ee_bt_reg_init(hw); - rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; - rtlpriv->dm.dm_initialgain_enable = 1; rtlpriv->dm.dm_flag = 0; rtlpriv->dm.disable_framebursting = 0; @@ -138,6 +136,11 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; + rtlpriv->cfg->mod_params->disable_watchdog = + rtlpriv->cfg->mod_params->disable_watchdog; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); if (!rtlpriv->psc.inactiveps) From 1a37aece44090c945a9b764e413b901863d32e86 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:35 -0600 Subject: [PATCH 1683/2091] rtlwifi: rtl8192de: Fix incorrect module parameter descriptions [ Upstream commit d4d60b4caaa5926e1b243070770968f05656107a ] Two of the module parameters are listed with incorrect default values. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/rtl8192de/sw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/rtlwifi/rtl8192de/sw.c index b19d0398215fd..c6e09a19de1ac 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/sw.c @@ -376,8 +376,8 @@ module_param_named(swlps, rtl92de_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92de_mod_params.fwctrl_lps, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); From 214e5cc67f9f5636b3272410f9ad0c151dafa0ac Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:36 -0600 Subject: [PATCH 1684/2091] rtlwifi: rtl8192se: Fix module parameter initialization [ Upstream commit 7503efbd82c15c4070adffff1344e5169d3634b4 ] Two of the module parameter descriptions show incorrect default values. In addition the value for software encryption is not transferred to the locations used by the driver. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/rtl8192se/sw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index e1fd27c888bfc..31baca41ac2f4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -187,6 +187,8 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; if (!rtlpriv->psc.inactiveps) pr_info("Power Save off (module option)\n"); if (!rtlpriv->psc.fwctrl_lps) @@ -425,8 +427,8 @@ module_param_named(swlps, rtl92se_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92se_mod_params.fwctrl_lps, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); From 0375ae541a81271fba5adf2b05adff1945cf3251 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:37 -0600 Subject: [PATCH 1685/2091] rtlwifi: rtl8192ce: Fix handling of module parameters [ Upstream commit b24f19f16b9e43f54218c07609b783ea8625406a ] The module parameter for software encryption was never transferred to the location used by the driver. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/rtl8192ce/sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c index de6cb6c3a48cc..4780bdc63b2bf 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c @@ -139,6 +139,8 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; if (!rtlpriv->psc.inactiveps) pr_info("rtl8192ce: Power Save off (module option)\n"); if (!rtlpriv->psc.fwctrl_lps) From de944618d562173faaf27a853acb37ee85825682 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:38 -0600 Subject: [PATCH 1686/2091] rtlwifi: rtl8192cu: Add missing parameter setup [ Upstream commit b68d0ae7e58624c33f2eddab471fee55db27dbf9 ] This driver fails to copy the module parameter for software encryption to the locations used by the main code. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index fd4a5353d2169..7c6f7f0d18c60 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -65,6 +65,8 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->dm.disable_framebursting = false; rtlpriv->dm.thermalvalue = 0; rtlpriv->dbg.global_debuglevel = rtlpriv->cfg->mod_params->debug; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; /* for firmware buf */ rtlpriv->rtlhal.pfirmware = vzalloc(0x4000); From 226864aff728a7253570e8dd3073ee6dc588d1de Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 11:12:07 -0400 Subject: [PATCH 1687/2091] NFS: Remove the "NFS_CAP_CHANGE_ATTR" capability [ Upstream commit cd812599796f500b042f5464b6665755eca21137 ] Setting the change attribute has been mandatory for all NFS versions, since commit 3a1556e8662c ("NFSv2/v3: Simulate the change attribute"). We should therefore not have anything be conditional on it being set/unset. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/client.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/nfs4proc.c | 3 --- include/linux/nfs_fs_sb.h | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff36300..fdd234206dff9 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7f22b6c6fb502..7fdb9a25f80c0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -442,7 +442,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); @@ -1688,7 +1688,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) + } else nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1310a710192bf..2c4f41c343668 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8512,7 +8512,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, .init_client = nfs40_init_client, .shutdown_client = nfs40_shutdown_client, @@ -8538,7 +8537,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, @@ -8561,7 +8559,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de140..eda4a72a9b25d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -220,7 +220,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -#define NFS_CAP_CHANGE_ATTR (1U << 5) +/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_MODE (1U << 7) #define NFS_CAP_NLINK (1U << 8) From 8572f2fa313c47750609178b20caf0598fd1042a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Dec 2015 18:55:19 -0500 Subject: [PATCH 1688/2091] NFS: Fix attribute cache revalidation [ Upstream commit ade14a7df796d4e86bd9d181193c883a57b13db0 ] If a NFSv4 client uses the cache_consistency_bitmask in order to request only information about the change attribute, timestamps and size, then it has not revalidated all attributes, and hence the attribute timeout timestamp should not be updated. Reported-by: Donald Buczek Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/inode.c | 54 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7fdb9a25f80c0..723b8922d76b9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1627,6 +1627,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; + bool cache_revalidated = true; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1688,22 +1689,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else + } else { nfsi->cache_validity |= save_cache_validity; + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } else if (server->caps & NFS_CAP_MTIME) + } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } else if (server->caps & NFS_CAP_CTIME) + } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1723,19 +1730,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)cur_isize, (long long)new_isize); } - } else + } else { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - else if (server->caps & NFS_CAP_ATIME) + else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { @@ -1744,36 +1755,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - } else if (server->caps & NFS_CAP_MODE) + } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } else if (server->caps & NFS_CAP_OWNER) + } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) + } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1782,19 +1799,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); } - } else if (server->caps & NFS_CAP_NLINK) + } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } - if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; + else + cache_revalidated = false; /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { @@ -1804,9 +1824,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { - if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + if (cache_revalidated) { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, + nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + nfsi->attrtimeo <<= 1; + if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + } nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ @@ -1815,7 +1839,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Don't declare attrcache up to date if there were no attrs! */ - if (fattr->valid != 0) + if (cache_revalidated) invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ From e96bbdf82d1943c73af99d1a4e2ea9d140144983 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 21 Dec 2015 17:05:08 -0600 Subject: [PATCH 1689/2091] rtlwifi: rtl_pci: Fix kernel panic [ Upstream commit f99551a2d39dc26ea03dc6761be11ac913eb2d57 ] In commit 38506ecefab9 (rtlwifi: rtl_pci: Start modification for new drivers), a bug was introduced that causes a NULL pointer dereference. As this bug only affects the infrequently used RTL8192EE and only under low-memory conditions, it has taken a long time for the bug to show up. The bug was reported on the linux-wireless mailing list and also at https://bugs.launchpad.net/ubuntu/+source/ubuntu-release-upgrader/ as bug #1527603 (kernel crashes due to rtl8192ee driver on ubuntu 15.10). Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rtlwifi/pci.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index f46c9d7f65281..7f471bff435c0 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -801,7 +801,9 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) hw_queue); if (rx_remained_cnt == 0) return; - + buffer_desc = &rtlpci->rx_ring[rxring_idx].buffer_desc[ + rtlpci->rx_ring[rxring_idx].idx]; + pdesc = (struct rtl_rx_desc *)skb->data; } else { /* rx descriptor */ pdesc = &rtlpci->rx_ring[rxring_idx].desc[ rtlpci->rx_ring[rxring_idx].idx]; @@ -824,13 +826,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) new_skb = dev_alloc_skb(rtlpci->rxbuffersize); if (unlikely(!new_skb)) goto no_new; - if (rtlpriv->use_new_trx_flow) { - buffer_desc = - &rtlpci->rx_ring[rxring_idx].buffer_desc - [rtlpci->rx_ring[rxring_idx].idx]; - /*means rx wifi info*/ - pdesc = (struct rtl_rx_desc *)skb->data; - } memset(&rx_status , 0 , sizeof(rx_status)); rtlpriv->cfg->ops->query_rx_desc(hw, &stats, &rx_status, (u8 *)pdesc, skb); From c505638900d191ae10d1f8afdc8d09ca8c2de0d4 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 29 Nov 2015 17:17:05 -0800 Subject: [PATCH 1690/2091] bcache: fix a livelock when we cause a huge number of cache misses [ Upstream commit 2ef9ccbfcb90cf84bdba320a571b18b05c41101b ] Subject : [PATCH v2] bcache: fix a livelock in btree lock Date : Wed, 25 Feb 2015 20:32:09 +0800 (02/25/2015 04:32:09 AM) This commit tries to fix a livelock in bcache. This livelock might happen when we causes a huge number of cache misses simultaneously. When we get a cache miss, bcache will execute the following path. ->cached_dev_make_request() ->cached_dev_read() ->cached_lookup() ->bch->btree_map_keys() ->btree_root() <------------------------ ->bch_btree_map_keys_recurse() | ->cache_lookup_fn() | ->cached_dev_cache_miss() | ->bch_btree_insert_check_key() -| [If btree->seq is not equal to seq + 1, we should return EINTR and traverse btree again.] In bch_btree_insert_check_key() function we first need to check upgrade flag (op->lock == -1), and when this flag is true we need to release read btree->lock and try to take write btree->lock. During taking and releasing this write lock, btree->seq will be monotone increased in order to prevent other threads modify this in cache miss (see btree.h:74). But if there are some cache misses caused by some requested, we could meet a livelock because btree->seq is always changed by others. Thus no one can make progress. This commit will try to take write btree->lock if it encounters a race when we traverse btree. Although it sacrifice the scalability but we can ensure that only one can modify the btree. Signed-off-by: Zheng Liu Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Joshua Schmid Cc: Zhu Yanhai Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 00cde40db5726..9aaa8f86b92a3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2162,8 +2162,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, rw_lock(true, b, b->level); if (b->key.ptr[0] != btree_ptr || - b->seq != seq + 1) + b->seq != seq + 1) { + op->lock = b->level; goto out; + } } SET_KEY_PTRS(check_key, 1); From 08656518aa2812e8650b8036201bd9e5463f22a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Nov 2015 17:18:33 -0800 Subject: [PATCH 1691/2091] bcache: Add a cond_resched() call to gc [ Upstream commit c5f1e5adf956e3ba82d204c7c141a75da9fa449a ] Signed-off-by: Takashi Iwai Tested-by: Eric Wheeler Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/btree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 9aaa8f86b92a3..43829d9493f70 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c) do { ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); + cond_resched(); if (ret && ret != -EAGAIN) pr_warn("gc failed!"); From 14dca8988999b6ccf4126cf6a3455e5822777a8f Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 29 Nov 2015 17:19:32 -0800 Subject: [PATCH 1692/2091] bcache: clear BCACHE_DEV_UNLINK_DONE flag when attaching a backing device [ Upstream commit fecaee6f20ee122ad75402c53d8278f9bb142ddc ] This bug can be reproduced by the following script: #!/bin/bash bcache_sysfs="/sys/fs/bcache" function clear_cache() { if [ ! -e $bcache_sysfs ]; then echo "no bcache sysfs" exit fi cset_uuid=$(ls -l $bcache_sysfs|head -n 2|tail -n 1|awk '{print $9}') sudo sh -c "echo $cset_uuid > /sys/block/sdb/sdb1/bcache/detach" sleep 5 sudo sh -c "echo $cset_uuid > /sys/block/sdb/sdb1/bcache/attach" } for ((i=0;i<10;i++)); do clear_cache done The warning messages look like below: [ 275.948611] ------------[ cut here ]------------ [ 275.963840] WARNING: at fs/sysfs/dir.c:512 sysfs_add_one+0xb8/0xd0() (Tainted: P W --------------- ) [ 275.979253] Hardware name: Tecal RH2285 [ 275.994106] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:09.0/0000:08:00.0/host4/target4:2:1/4:2:1:0/block/sdb/sdb1/bcache/cache' [ 276.024105] Modules linked in: bcache tcp_diag inet_diag ipmi_devintf ipmi_si ipmi_msghandler bonding 8021q garp stp llc ipv6 ext3 jbd loop sg iomemory_vsl(P) bnx2 microcode serio_raw i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support i7core_edac edac_core shpchp ext4 jbd2 mbcache megaraid_sas pata_acpi ata_generic ata_piix dm_mod [last unloaded: scsi_wait_scan] [ 276.072643] Pid: 2765, comm: sh Tainted: P W --------------- 2.6.32 #1 [ 276.089315] Call Trace: [ 276.105801] [] ? warn_slowpath_common+0x87/0xc0 [ 276.122650] [] ? warn_slowpath_fmt+0x46/0x50 [ 276.139361] [] ? sysfs_add_one+0xb8/0xd0 [ 276.156012] [] ? sysfs_do_create_link+0x12b/0x170 [ 276.172682] [] ? sysfs_create_link+0x13/0x20 [ 276.189282] [] ? bcache_device_link+0xc1/0x110 [bcache] [ 276.205993] [] ? bch_cached_dev_attach+0x478/0x4f0 [bcache] [ 276.222794] [] ? bch_cached_dev_store+0x627/0x780 [bcache] [ 276.239680] [] ? alloc_pages_current+0xaa/0x110 [ 276.256594] [] ? sysfs_write_file+0xe5/0x170 [ 276.273364] [] ? vfs_write+0xb8/0x1a0 [ 276.290133] [] ? sys_write+0x51/0x90 [ 276.306368] [] ? system_call_fastpath+0x16/0x1b [ 276.322301] ---[ end trace 9f5d4fcdd0c3edfb ]--- [ 276.338241] ------------[ cut here ]------------ [ 276.354109] WARNING: at /home/wenqing.lz/bcache/bcache/super.c:720 bcache_device_link+0xdf/0x110 [bcache]() (Tainted: P W --------------- ) [ 276.386017] Hardware name: Tecal RH2285 [ 276.401430] Couldn't create device <-> cache set symlinks [ 276.401759] Modules linked in: bcache tcp_diag inet_diag ipmi_devintf ipmi_si ipmi_msghandler bonding 8021q garp stp llc ipv6 ext3 jbd loop sg iomemory_vsl(P) bnx2 microcode serio_raw i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support i7core_edac edac_core shpchp ext4 jbd2 mbcache megaraid_sas pata_acpi ata_generic ata_piix dm_mod [last unloaded: scsi_wait_scan] [ 276.465477] Pid: 2765, comm: sh Tainted: P W --------------- 2.6.32 #1 [ 276.482169] Call Trace: [ 276.498610] [] ? warn_slowpath_common+0x87/0xc0 [ 276.515405] [] ? warn_slowpath_fmt+0x46/0x50 [ 276.532059] [] ? bcache_device_link+0xdf/0x110 [bcache] [ 276.548808] [] ? bch_cached_dev_attach+0x478/0x4f0 [bcache] [ 276.565569] [] ? bch_cached_dev_store+0x627/0x780 [bcache] [ 276.582418] [] ? alloc_pages_current+0xaa/0x110 [ 276.599341] [] ? sysfs_write_file+0xe5/0x170 [ 276.616142] [] ? vfs_write+0xb8/0x1a0 [ 276.632607] [] ? sys_write+0x51/0x90 [ 276.648671] [] ? system_call_fastpath+0x16/0x1b [ 276.664756] ---[ end trace 9f5d4fcdd0c3edfc ]--- We forget to clear BCACHE_DEV_UNLINK_DONE flag in bcache_device_attach() function when we attach a backing device first time. After detaching this backing device, this flag will be true and sysfs_remove_link() isn't called in bcache_device_unlink(). Then when we attach this backing device again, sysfs_create_link() will return EEXIST error in bcache_device_link(). So the fix is trival and we clear this flag in bcache_device_link(). Signed-off-by: Zheng Liu Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4dd2bb7167f05..f624ae8be4665 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -708,6 +708,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || sysfs_create_link(&c->kobj, &d->kobj, d->name), "Couldn't create device <-> cache set symlinks"); + + clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); } static void bcache_device_detach(struct bcache_device *d) From 888841b4a1fc60d064919dde3da80102f8dbe53c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Nov 2015 17:20:59 -0800 Subject: [PATCH 1693/2091] bcache: fix a leak in bch_cached_dev_run() [ Upstream commit 4d4d8573a8451acc9f01cbea24b7e55f04a252fe ] Signed-off-by: Al Viro Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f624ae8be4665..9d7b6ee454af4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -880,8 +880,11 @@ void bch_cached_dev_run(struct cached_dev *dc) buf[SB_LABEL_SIZE] = '\0'; env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); - if (atomic_xchg(&dc->running, 1)) + if (atomic_xchg(&dc->running, 1)) { + kfree(env[1]); + kfree(env[2]); return; + } if (!d->c && BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { From 322d0128f1e4af6b5b3532e0848fae633fd5a992 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 29 Nov 2015 17:21:57 -0800 Subject: [PATCH 1694/2091] bcache: unregister reboot notifier if bcache fails to unregister device [ Upstream commit 2ecf0cdb2b437402110ab57546e02abfa68a716b ] In bcache_init() function it forgot to unregister reboot notifier if bcache fails to unregister a block device. This commit fixes this. Signed-off-by: Zheng Liu Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 9d7b6ee454af4..53f15126182a6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2105,8 +2105,10 @@ static int __init bcache_init(void) closure_debug_init(); bcache_major = register_blkdev(0, "bcache"); - if (bcache_major < 0) + if (bcache_major < 0) { + unregister_reboot_notifier(&reboot); return bcache_major; + } if (!(bcache_wq = create_workqueue("bcache")) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || From d5402134c27d66e83295ae2961772b89bc6a50bc Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Sun, 29 Nov 2015 18:40:23 -0800 Subject: [PATCH 1695/2091] bcache: allows use of register in udev to avoid "device_busy" error. [ Upstream commit d7076f21629f8f329bca4a44dc408d94670f49e2 ] Allows to use register, not register_quiet in udev to avoid "device_busy" error. The initial patch proposed at https://lkml.org/lkml/2013/8/26/549 by Gabriel de Perthuis does not unlock the mutex and hangs the kernel. See http://thread.gmane.org/gmane.linux.kernel.bcache.devel/2594 for the discussion. Cc: Denis Bychkov Cc: Kent Overstreet Cc: Eric Wheeler Cc: Gabriel de Perthuis Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 53f15126182a6..42522c8f13c6f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1972,6 +1972,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (attr == &ksysfs_register_quiet) + goto out; } goto err; } @@ -2010,8 +2012,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, err_close: blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); err: - if (attr != &ksysfs_register_quiet) - pr_info("error opening %s: %s", path, err); + pr_info("error opening %s: %s", path, err); ret = -EINVAL; goto out; } From ad148665b58dcd0a4336188ce4a2194d7f353c5f Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Sun, 29 Nov 2015 18:44:49 -0800 Subject: [PATCH 1696/2091] bcache: prevent crash on changing writeback_running [ Upstream commit 8d16ce540c94c9d366eb36fc91b7154d92d6397b ] Added a safeguard in the shutdown case. At least while not being attached it is also possible to trigger a kernel bug by writing into writeback_running. This change adds the same check before trying to wake up the thread for that case. Signed-off-by: Stefan Bader Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/writeback.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 0a9dab187b79c..073a042aed243 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, static inline void bch_writeback_queue(struct cached_dev *dc) { - wake_up_process(dc->writeback_thread); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) + wake_up_process(dc->writeback_thread); } static inline void bch_writeback_add(struct cached_dev *dc) From ebd138117e5be089d0e10fea7cb308e8345ed643 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Nov 2015 18:47:01 -0800 Subject: [PATCH 1697/2091] bcache: Change refill_dirty() to always scan entire disk if necessary [ Upstream commit 627ccd20b4ad3ba836472468208e2ac4dfadbf03 ] Previously, it would only scan the entire disk if it was starting from the very start of the disk - i.e. if the previous scan got to the end. This was broken by refill_full_stripes(), which updates last_scanned so that refill_dirty was never triggering the searched_from_start path. But if we change refill_dirty() to always scan the entire disk if necessary, regardless of what last_scanned was, the code gets cleaner and we fix that bug too. Signed-off-by: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/writeback.c | 37 ++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f1986bcd1bf05..540256a0df4fd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, static bool dirty_pred(struct keybuf *buf, struct bkey *k) { + struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys); + + BUG_ON(KEY_INODE(k) != dc->disk.id); + return KEY_DIRTY(k); } @@ -372,11 +376,24 @@ static void refill_full_stripes(struct cached_dev *dc) } } +/* + * Returns true if we scanned the entire disk + */ static bool refill_dirty(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; + struct bkey start = KEY(dc->disk.id, 0, 0); struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); - bool searched_from_start = false; + struct bkey start_pos; + + /* + * make sure keybuf pos is inside the range for this disk - at bringup + * we might not be attached yet so this disk's inode nr isn't + * initialized then + */ + if (bkey_cmp(&buf->last_scanned, &start) < 0 || + bkey_cmp(&buf->last_scanned, &end) > 0) + buf->last_scanned = start; if (dc->partial_stripes_expensive) { refill_full_stripes(dc); @@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc) return false; } - if (bkey_cmp(&buf->last_scanned, &end) >= 0) { - buf->last_scanned = KEY(dc->disk.id, 0, 0); - searched_from_start = true; - } - + start_pos = buf->last_scanned; bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); - return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; + if (bkey_cmp(&buf->last_scanned, &end) < 0) + return false; + + /* + * If we get to the end start scanning again from the beginning, and + * only scan up to where we initially started scanning from: + */ + buf->last_scanned = start; + bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred); + + return bkey_cmp(&buf->last_scanned, &start_pos) >= 0; } static int bch_writeback_thread(void *arg) From 34ed7459a09569f4b2dd8a549f420fac8c3619e0 Mon Sep 17 00:00:00 2001 From: Uri Mashiach Date: Thu, 24 Dec 2015 16:05:00 +0200 Subject: [PATCH 1698/2091] wlcore/wl12xx: spi: fix NULL pointer dereference (Oops) [ Upstream commit e47301b06d5a65678690f04c2248fd181db1e59a ] Fix the below Oops when trying to modprobe wlcore_spi. The oops occurs because the wl1271_power_{off,on}() function doesn't check the power() function pointer. [ 23.401447] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 23.409954] pgd = c0004000 [ 23.412922] [00000000] *pgd=00000000 [ 23.416693] Internal error: Oops: 80000007 [#1] SMP ARM [ 23.422168] Modules linked in: wl12xx wlcore mac80211 cfg80211 musb_dsps musb_hdrc usbcore usb_common snd_soc_simple_card evdev joydev omap_rng wlcore_spi snd_soc_tlv320aic23_i2c rng_core snd_soc_tlv320aic23 c_can_platform c_can can_dev snd_soc_davinci_mcasp snd_soc_edma snd_soc_omap omap_wdt musb_am335x cpufreq_dt thermal_sys hwmon [ 23.453253] CPU: 0 PID: 36 Comm: kworker/0:2 Not tainted 4.2.0-00002-g951efee-dirty #233 [ 23.461720] Hardware name: Generic AM33XX (Flattened Device Tree) [ 23.468123] Workqueue: events request_firmware_work_func [ 23.473690] task: de32efc0 ti: de4ee000 task.ti: de4ee000 [ 23.479341] PC is at 0x0 [ 23.482112] LR is at wl12xx_set_power_on+0x28/0x124 [wlcore] [ 23.488074] pc : [<00000000>] lr : [] psr: 60000013 [ 23.488074] sp : de4efe50 ip : 00000002 fp : 00000000 [ 23.500162] r10: de7cdd00 r9 : dc848800 r8 : bf27af00 [ 23.505663] r7 : bf27a1a8 r6 : dcbd8a80 r5 : dce0e2e0 r4 : dce0d2e0 [ 23.512536] r3 : 00000000 r2 : 00000000 r1 : 00000001 r0 : dc848810 [ 23.519412] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel [ 23.527109] Control: 10c5387d Table: 9cb78019 DAC: 00000015 [ 23.533160] Process kworker/0:2 (pid: 36, stack limit = 0xde4ee218) [ 23.539760] Stack: (0xde4efe50 to 0xde4f0000) [...] [ 23.665030] [] (wl12xx_set_power_on [wlcore]) from [] (wlcore_nvs_cb+0x118/0xa4c [wlcore]) [ 23.675604] [] (wlcore_nvs_cb [wlcore]) from [] (request_firmware_work_func+0x30/0x58) [ 23.685784] [] (request_firmware_work_func) from [] (process_one_work+0x1b4/0x4b4) [ 23.695591] [] (process_one_work) from [] (worker_thread+0x3c/0x4a4) [ 23.704124] [] (worker_thread) from [] (kthread+0xd4/0xf0) [ 23.711747] [] (kthread) from [] (ret_from_fork+0x14/0x3c) [ 23.719357] Code: bad PC value [ 23.722760] ---[ end trace 981be8510db9b3a9 ]--- Prevent oops by validationg power() pointer value before calling the function. Signed-off-by: Uri Mashiach Cc: stable@vger.kernel.org Acked-by: Igor Grinberg Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/io.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/io.h b/drivers/net/wireless/ti/wlcore/io.h index 0305729d09868..10cf3747694d5 100644 --- a/drivers/net/wireless/ti/wlcore/io.h +++ b/drivers/net/wireless/ti/wlcore/io.h @@ -207,19 +207,23 @@ static inline int __must_check wlcore_write_reg(struct wl1271 *wl, int reg, static inline void wl1271_power_off(struct wl1271 *wl) { - int ret; + int ret = 0; if (!test_bit(WL1271_FLAG_GPIO_POWER, &wl->flags)) return; - ret = wl->if_ops->power(wl->dev, false); + if (wl->if_ops->power) + ret = wl->if_ops->power(wl->dev, false); if (!ret) clear_bit(WL1271_FLAG_GPIO_POWER, &wl->flags); } static inline int wl1271_power_on(struct wl1271 *wl) { - int ret = wl->if_ops->power(wl->dev, true); + int ret = 0; + + if (wl->if_ops->power) + ret = wl->if_ops->power(wl->dev, true); if (ret == 0) set_bit(WL1271_FLAG_GPIO_POWER, &wl->flags); From eaf7c91b334172cc4ec2871940baee3cc0ebeede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Francillon?= Date: Sat, 2 Jan 2016 20:39:54 -0800 Subject: [PATCH 1699/2091] Input: i8042 - add Fujitsu Lifebook U745 to the nomux list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dd0d0d4de582a6a61c032332c91f4f4cb2bab569 ] Without i8042.nomux=1 the Elantech touch pad is not working at all on a Fujitsu Lifebook U745. This patch does not seem necessary for all U745 (maybe because of different BIOS versions?). However, it was verified that the patch does not break those (see opensuse bug 883192: https://bugzilla.opensuse.org/show_bug.cgi?id=883192). Signed-off-by: Aurélien Francillon Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index c11556563ef06..68f5f4a0f1e72 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -257,6 +257,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"), }, }, + { + /* Fujitsu Lifebook U745 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U745"), + }, + }, { /* Fujitsu T70H */ .matches = { From d6341753c418d3699948290d8c0b9d9dc78bd209 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Thu, 24 Dec 2015 10:25:32 -0600 Subject: [PATCH 1700/2091] udf: Prevent buffer overrun with multi-byte characters [ Upstream commit ad402b265ecf6fa22d04043b41444cdfcdf4f52d ] udf_CS0toUTF8 function stops the conversion when the output buffer length reaches UDF_NAME_LEN-2, which is correct maximum name length, but, when checking, it leaves the space for a single byte only, while multi-bytes output characters can take more space, causing buffer overflow. Similar error exists in udf_CS0toNLS function, that restricts the output length to UDF_NAME_LEN, while actual maximum allowed length is UDF_NAME_LEN-2. In these cases the output can override not only the current buffer length field, causing corruption of the name buffer itself, but also following allocation structures, causing kernel crash. Adjust the output length checks in both functions to prevent buffer overruns in case of multi-bytes UTF8 or NLS characters. CC: stable@vger.kernel.org Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/unicode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index b84fee372734b..7e2e866ace518 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -133,11 +133,15 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) if (c < 0x80U) utf_o->u_name[utf_o->u_len++] = (uint8_t)c; else if (c < 0x800U) { + if (utf_o->u_len > (UDF_NAME_LEN - 4)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); } else { + if (utf_o->u_len > (UDF_NAME_LEN - 5)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); utf_o->u_name[utf_o->u_len++] = @@ -282,7 +286,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, c = (c << 8) | ocu[i++]; len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - utf_o->u_len); + UDF_NAME_LEN - 2 - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; From 516a102b0add0610805d0062670b18b1b44ccdda Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Thu, 24 Dec 2015 10:25:33 -0600 Subject: [PATCH 1701/2091] udf: Check output buffer length when converting name to CS0 [ Upstream commit bb00c898ad1ce40c4bb422a8207ae562e9aea7ae ] If a name contains at least some characters with Unicode values exceeding single byte, the CS0 output should have 2 bytes per character. And if other input characters have single byte Unicode values, then the single input byte is converted to 2 output bytes, and the length of output becomes larger than the length of input. And if the input name is long enough, the output length may exceed the allocated buffer length. All this means that conversion from UTF8 or NLS to CS0 requires checking of output length in order to stop when it exceeds the given output buffer size. [JK: Make code return -ENAMETOOLONG instead of silently truncating the name] CC: stable@vger.kernel.org Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/unicode.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 7e2e866ace518..2eafe2c4d2397 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -182,17 +182,22 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) { unsigned c, i, max_val, utf_char; - int utf_cnt, u_len; + int utf_cnt, u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; utf_char = 0U; utf_cnt = 0U; for (i = 0U; i < utf->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; + c = (uint8_t)utf->u_name[i]; /* Complete a multi-byte UTF-8 character */ @@ -234,6 +239,7 @@ static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) if (max_val == 0xffU) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } goto error_out; @@ -304,15 +310,19 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int len; unsigned i, max_val; uint16_t uni_char; - int u_len; + int u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; for (i = 0U; i < uni->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); if (!len) continue; @@ -325,6 +335,7 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, if (uni_char > max_val) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } From e76bda944f6f601dd7bf2a26b34803a30d4503e3 Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 18 Dec 2015 17:14:42 -0500 Subject: [PATCH 1702/2091] drm/dp/mst: process broadcast messages correctly [ Upstream commit bd9343208704fcc70a5b919f228a7d26ae472727 ] In case broadcast message received in UP request, RAD cannot be used to identify message originator. Message should be parsed, originator should be found by GUID from parsed message. Also reply with broadcast in case broadcast message received (for now it is always broadcast) Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 95 +++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 109e776345d39..87c4eeb376249 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1195,6 +1195,50 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ return mstb; } +static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( + struct drm_dp_mst_branch *mstb, + uint8_t *guid) +{ + struct drm_dp_mst_branch *found_mstb; + struct drm_dp_mst_port *port; + + list_for_each_entry(port, &mstb->ports, next) { + if (!port->mstb) + continue; + + if (port->guid_valid && memcmp(port->guid, guid, 16) == 0) + return port->mstb; + + found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); + + if (found_mstb) + return found_mstb; + } + + return NULL; +} + +static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid( + struct drm_dp_mst_topology_mgr *mgr, + uint8_t *guid) +{ + struct drm_dp_mst_branch *mstb; + + /* find the port by iterating down */ + mutex_lock(&mgr->lock); + + if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0) + mstb = mgr->mst_primary; + else + mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); + + if (mstb) + kref_get(&mstb->kref); + + mutex_unlock(&mgr->lock); + return mstb; +} + static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb) { @@ -1306,6 +1350,7 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr, struct drm_dp_sideband_msg_tx *txmsg) { struct drm_dp_mst_branch *mstb = txmsg->dst; + u8 req_type; /* both msg slots are full */ if (txmsg->seqno == -1) { @@ -1322,7 +1367,13 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr, txmsg->seqno = 1; mstb->tx_slots[txmsg->seqno] = txmsg; } - hdr->broadcast = 0; + + req_type = txmsg->msg[0] & 0x7f; + if (req_type == DP_CONNECTION_STATUS_NOTIFY || + req_type == DP_RESOURCE_STATUS_NOTIFY) + hdr->broadcast = 1; + else + hdr->broadcast = 0; hdr->path_msg = txmsg->path_msg; hdr->lct = mstb->lct; hdr->lcr = mstb->lct - 1; @@ -2129,28 +2180,50 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) if (mgr->up_req_recv.have_eomt) { struct drm_dp_sideband_msg_req_body msg; - struct drm_dp_mst_branch *mstb; + struct drm_dp_mst_branch *mstb = NULL; bool seqno; - mstb = drm_dp_get_mst_branch_device(mgr, - mgr->up_req_recv.initial_hdr.lct, - mgr->up_req_recv.initial_hdr.rad); - if (!mstb) { - DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); - memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); - return 0; + + if (!mgr->up_req_recv.initial_hdr.broadcast) { + mstb = drm_dp_get_mst_branch_device(mgr, + mgr->up_req_recv.initial_hdr.lct, + mgr->up_req_recv.initial_hdr.rad); + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } } seqno = mgr->up_req_recv.initial_hdr.seqno; drm_dp_sideband_parse_req(&mgr->up_req_recv, &msg); if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { - drm_dp_send_up_ack_reply(mgr, mstb, msg.req_type, seqno, false); + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); + + if (!mstb) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.conn_stat.guid); + + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } + drm_dp_update_port(mstb, &msg.u.conn_stat); DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type); (*mgr->cbs->hotplug)(mgr); } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { - drm_dp_send_up_ack_reply(mgr, mstb, msg.req_type, seqno, false); + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); + if (!mstb) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.resource_stat.guid); + + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } + DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn); } From 1d1e2c4f98c1673ae2c99ab6f133d2a1d3fa0b9e Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 18 Dec 2015 17:14:43 -0500 Subject: [PATCH 1703/2091] drm/dp/mst: always send reply for UP request [ Upstream commit 1f16ee7fa13649f4e55aa48ad31c3eb0722a62d3 ] We should always send reply for UP request in order to make downstream device clean-up resources appropriately. Issue was that reply for UP request was sent only once. Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 30 ++++++++++----------------- include/drm/drm_dp_mst_helper.h | 2 -- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 87c4eeb376249..c5110579a5443 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1475,26 +1475,18 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) } /* called holding qlock */ -static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) +static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_sideband_msg_tx *txmsg) { - struct drm_dp_sideband_msg_tx *txmsg; int ret; /* construct a chunk from the first msg in the tx_msg queue */ - if (list_empty(&mgr->tx_msg_upq)) { - mgr->tx_up_in_progress = false; - return; - } - - txmsg = list_first_entry(&mgr->tx_msg_upq, struct drm_dp_sideband_msg_tx, next); ret = process_single_tx_qlock(mgr, txmsg, true); - if (ret == 1) { - /* up txmsgs aren't put in slots - so free after we send it */ - list_del(&txmsg->next); - kfree(txmsg); - } else if (ret) + + if (ret != 1) DRM_DEBUG_KMS("failed to send msg in q %d\n", ret); - mgr->tx_up_in_progress = true; + + txmsg->dst->tx_slots[txmsg->seqno] = NULL; } static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, @@ -1879,11 +1871,12 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, drm_dp_encode_up_ack_reply(txmsg, req_type); mutex_lock(&mgr->qlock); - list_add_tail(&txmsg->next, &mgr->tx_msg_upq); - if (!mgr->tx_up_in_progress) { - process_single_up_tx_qlock(mgr); - } + + process_single_up_tx_qlock(mgr, txmsg); + mutex_unlock(&mgr->qlock); + + kfree(txmsg); return 0; } @@ -2774,7 +2767,6 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->qlock); mutex_init(&mgr->payload_lock); mutex_init(&mgr->destroy_connector_lock); - INIT_LIST_HEAD(&mgr->tx_msg_upq); INIT_LIST_HEAD(&mgr->tx_msg_downq); INIT_LIST_HEAD(&mgr->destroy_connector_list); INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work); diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index a89f505c856bc..c7f01d1aa5622 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -449,9 +449,7 @@ struct drm_dp_mst_topology_mgr { the mstb tx_slots and txmsg->state once they are queued */ struct mutex qlock; struct list_head tx_msg_downq; - struct list_head tx_msg_upq; bool tx_down_in_progress; - bool tx_up_in_progress; /* payload info + lock for it */ struct mutex payload_lock; From 8605e8573eae0457d6d3a1826ba3253e56c3d644 Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 25 Dec 2015 16:14:47 +0800 Subject: [PATCH 1704/2091] drm/dp/mst: fix in MSTB RAD initialization [ Upstream commit 75af4c8c4c0f60d7ad135419805798f144e9baf9 ] This fix is needed to support more then two branch displays, so RAD address consist at least of 2 elements Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c5110579a5443..c7c1e1fbd33d1 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -968,17 +968,17 @@ static struct drm_dp_mst_port *drm_dp_get_port(struct drm_dp_mst_branch *mstb, u static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port, u8 *rad) { - int lct = port->parent->lct; + int parent_lct = port->parent->lct; int shift = 4; - int idx = lct / 2; - if (lct > 1) { - memcpy(rad, port->parent->rad, idx); - shift = (lct % 2) ? 4 : 0; + int idx = (parent_lct - 1) / 2; + if (parent_lct > 1) { + memcpy(rad, port->parent->rad, idx + 1); + shift = (parent_lct % 2) ? 4 : 0; } else rad[0] = 0; rad[idx] |= port->port_num << shift; - return lct + 1; + return parent_lct + 1; } /* From 67d0df37bf4235bc3aa1866571e74437cba33199 Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 25 Dec 2015 16:14:48 +0800 Subject: [PATCH 1705/2091] drm/dp/mst: fix in RAD element access [ Upstream commit 7a11a334aa6af4c65c6a0d81b60c97fc18673532 ] This is needed to receive correct port number from RAD, so MSTB could be found Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c7c1e1fbd33d1..2d3bad43ad907 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1034,7 +1034,7 @@ static void build_mst_prop_path(struct drm_dp_mst_port *port, snprintf(proppath, proppath_size, "mst:%d", mstb->mgr->conn_base_id); for (i = 0; i < (mstb->lct - 1); i++) { int shift = (i % 2) ? 0 : 4; - int port_num = mstb->rad[i / 2] >> shift; + int port_num = (mstb->rad[i / 2] >> shift) & 0xf; snprintf(temp, sizeof(temp), "-%d", port_num); strlcat(proppath, temp, proppath_size); } @@ -1175,7 +1175,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; - int port_num = rad[i / 2] >> shift; + int port_num = (rad[i / 2] >> shift) & 0xf; list_for_each_entry(port, &mstb->ports, next) { if (port->port_num == port_num) { From 24ab3b8e796248bfbdb8a80469b9811b5df39caa Mon Sep 17 00:00:00 2001 From: Christoph Biedl Date: Wed, 23 Dec 2015 16:51:57 +0100 Subject: [PATCH 1706/2091] PCI: Fix minimum allocation address overwrite [ Upstream commit 3460baa620685c20f5ee19afb6d99d26150c382c ] Commit 36e097a8a297 ("PCI: Split out bridge window override of minimum allocation address") claimed to do no functional changes but unfortunately did: The "min" variable is altered. At least the AVM A1 PCMCIA adapter was no longer detected, breaking ISDN operation. Use a local copy of "min" to restore the previous behaviour. [bhelgaas: avoid gcc "?:" extension for portability and readability] Fixes: 36e097a8a297 ("PCI: Split out bridge window override of minimum allocation address") Signed-off-by: Christoph Biedl Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.14+ Signed-off-by: Sasha Levin --- drivers/pci/bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index d3346d23963b8..89b3befc71553 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -140,6 +140,8 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, type_mask |= IORESOURCE_TYPE_BITS; pci_bus_for_each_resource(bus, r, i) { + resource_size_t min_used = min; + if (!r) continue; @@ -163,12 +165,12 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, * overrides "min". */ if (avail.start) - min = avail.start; + min_used = avail.start; max = avail.end; /* Ok, try it out.. */ - ret = allocate_resource(r, res, size, min, max, + ret = allocate_resource(r, res, size, min_used, max, align, alignf, alignf_data); if (ret == 0) return 0; From 16870da043de536d1e4b866b594c60f0475f6739 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 10 Dec 2015 21:18:20 +0200 Subject: [PATCH 1707/2091] PCI: host: Mark PCIe/PCI (MSI) IRQ cascade handlers as IRQF_NO_THREAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8ff0ef996ca00028519c70e8d51d32bd37eb51dc ] On -RT and if kernel is booting with "threadirqs" cmd line parameter, PCIe/PCI (MSI) IRQ cascade handlers (like dra7xx_pcie_msi_irq_handler()) will be forced threaded and, as result, will generate warnings like this: WARNING: CPU: 1 PID: 82 at kernel/irq/handle.c:150 handle_irq_event_percpu+0x14c/0x174() irq 460 handler irq_default_primary_handler+0x0/0x14 enabled interrupts Backtrace: (warn_slowpath_common) from (warn_slowpath_fmt+0x38/0x40) (warn_slowpath_fmt) from (handle_irq_event_percpu+0x14c/0x174) (handle_irq_event_percpu) from (handle_irq_event+0x84/0xb8) (handle_irq_event) from (handle_simple_irq+0x90/0x118) (handle_simple_irq) from (generic_handle_irq+0x30/0x44) (generic_handle_irq) from (dra7xx_pcie_msi_irq_handler+0x7c/0x8c) (dra7xx_pcie_msi_irq_handler) from (irq_forced_thread_fn+0x28/0x5c) (irq_forced_thread_fn) from (irq_thread+0x128/0x204) This happens because all of them invoke generic_handle_irq() from the requested handler. generic_handle_irq() grabs raw_locks and thus needs to run in raw-IRQ context. This issue was originally reproduced on TI dra7-evem, but, as was identified during discussion [1], other hosts can also suffer from this issue. Fix all them at once by marking PCIe/PCI (MSI) IRQ cascade handlers IRQF_NO_THREAD explicitly. [1] http://lkml.kernel.org/r/1448027966-21610-1-git-send-email-grygorii.strashko@ti.com [bhelgaas: add stable tag, fix typos] Signed-off-by: Grygorii Strashko Signed-off-by: Bjorn Helgaas Acked-by: Lucas Stach (for imx6) CC: stable@vger.kernel.org CC: Kishon Vijay Abraham I CC: Jingoo Han CC: Kukjin Kim CC: Krzysztof Kozlowski CC: Richard Zhu CC: Thierry Reding CC: Stephen Warren CC: Alexandre Courbot CC: Simon Horman CC: Pratyush Anand CC: Michal Simek CC: "Sören Brinkmann" CC: Sebastian Andrzej Siewior Signed-off-by: Sasha Levin --- drivers/pci/host/pci-dra7xx.c | 3 ++- drivers/pci/host/pci-exynos.c | 3 ++- drivers/pci/host/pci-imx6.c | 3 ++- drivers/pci/host/pci-tegra.c | 2 +- drivers/pci/host/pcie-rcar.c | 6 ++++-- drivers/pci/host/pcie-spear13xx.c | 3 ++- drivers/pci/host/pcie-xilinx.c | 3 ++- 7 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c index 2d57e19a2cd43..b5ae685aec610 100644 --- a/drivers/pci/host/pci-dra7xx.c +++ b/drivers/pci/host/pci-dra7xx.c @@ -289,7 +289,8 @@ static int __init dra7xx_add_pcie_port(struct dra7xx_pcie *dra7xx, } ret = devm_request_irq(&pdev->dev, pp->irq, - dra7xx_pcie_msi_irq_handler, IRQF_SHARED, + dra7xx_pcie_msi_irq_handler, + IRQF_SHARED | IRQF_NO_THREAD, "dra7-pcie-msi", pp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); diff --git a/drivers/pci/host/pci-exynos.c b/drivers/pci/host/pci-exynos.c index c139237e0e523..5b2b83cb67ad8 100644 --- a/drivers/pci/host/pci-exynos.c +++ b/drivers/pci/host/pci-exynos.c @@ -527,7 +527,8 @@ static int __init exynos_add_pcie_port(struct pcie_port *pp, ret = devm_request_irq(&pdev->dev, pp->msi_irq, exynos_pcie_msi_irq_handler, - IRQF_SHARED, "exynos-pcie", pp); + IRQF_SHARED | IRQF_NO_THREAD, + "exynos-pcie", pp); if (ret) { dev_err(&pdev->dev, "failed to request msi irq\n"); return ret; diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c index fdb95367721e9..ebcb0ac8512b5 100644 --- a/drivers/pci/host/pci-imx6.c +++ b/drivers/pci/host/pci-imx6.c @@ -534,7 +534,8 @@ static int __init imx6_add_pcie_port(struct pcie_port *pp, ret = devm_request_irq(&pdev->dev, pp->msi_irq, imx6_pcie_msi_handler, - IRQF_SHARED, "mx6-pcie-msi", pp); + IRQF_SHARED | IRQF_NO_THREAD, + "mx6-pcie-msi", pp); if (ret) { dev_err(&pdev->dev, "failed to request MSI irq\n"); return -ENODEV; diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 00e92720d7f79..d9789d6ba47d5 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -1304,7 +1304,7 @@ static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) msi->irq = err; - err = request_irq(msi->irq, tegra_pcie_msi_irq, 0, + err = request_irq(msi->irq, tegra_pcie_msi_irq, IRQF_NO_THREAD, tegra_msi_irq_chip.name, pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index c086210f2ffd1..56ce5640d91ae 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -695,14 +695,16 @@ static int rcar_pcie_enable_msi(struct rcar_pcie *pcie) /* Two irqs are for MSI, but they are also used for non-MSI irqs */ err = devm_request_irq(&pdev->dev, msi->irq1, rcar_pcie_msi_irq, - IRQF_SHARED, rcar_msi_irq_chip.name, pcie); + IRQF_SHARED | IRQF_NO_THREAD, + rcar_msi_irq_chip.name, pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); goto err; } err = devm_request_irq(&pdev->dev, msi->irq2, rcar_pcie_msi_irq, - IRQF_SHARED, rcar_msi_irq_chip.name, pcie); + IRQF_SHARED | IRQF_NO_THREAD, + rcar_msi_irq_chip.name, pcie); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ: %d\n", err); goto err; diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c index 020d788907191..4ea793eaa2bd5 100644 --- a/drivers/pci/host/pcie-spear13xx.c +++ b/drivers/pci/host/pcie-spear13xx.c @@ -281,7 +281,8 @@ static int spear13xx_add_pcie_port(struct pcie_port *pp, return -ENODEV; } ret = devm_request_irq(dev, pp->irq, spear13xx_pcie_irq_handler, - IRQF_SHARED, "spear1340-pcie", pp); + IRQF_SHARED | IRQF_NO_THREAD, + "spear1340-pcie", pp); if (ret) { dev_err(dev, "failed to request irq %d\n", pp->irq); return ret; diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c index f1a06a091ccb5..577fe5b2f6175 100644 --- a/drivers/pci/host/pcie-xilinx.c +++ b/drivers/pci/host/pcie-xilinx.c @@ -776,7 +776,8 @@ static int xilinx_pcie_parse_dt(struct xilinx_pcie_port *port) port->irq = irq_of_parse_and_map(node, 0); err = devm_request_irq(dev, port->irq, xilinx_pcie_intr_handler, - IRQF_SHARED, "xilinx-pcie", port); + IRQF_SHARED | IRQF_NO_THREAD, + "xilinx-pcie", port); if (err) { dev_err(dev, "unable to request irq %d\n", port->irq); return err; From 1ecdadef5db6404bb57c9f369c0f026e1e9ac638 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Nov 2015 17:27:06 +0100 Subject: [PATCH 1708/2091] btrfs: handle invalid num_stripes in sys_array [ Upstream commit f5cdedd73fa71b74dcc42f2a11a5735d89ce7c4f ] We can handle the special case of num_stripes == 0 directly inside btrfs_read_sys_array. The BUG_ON in btrfs_chunk_item_size is there to catch other unhandled cases where we fail to validate external data. A crafted or corrupted image crashes at mount time: BTRFS: device fsid 9006933e-2a9a-44f0-917f-514252aeec2c devid 1 transid 7 /dev/loop0 BTRFS info (device loop0): disk space caching is enabled BUG: failure at fs/btrfs/ctree.h:337/btrfs_chunk_item_size()! Kernel panic - not syncing: BUG! CPU: 0 PID: 313 Comm: mount Not tainted 4.2.5-00657-ge047887-dirty #25 Stack: 637af890 60062489 602aeb2e 604192ba 60387961 00000011 637af8a0 6038a835 637af9c0 6038776b 634ef32b 00000000 Call Trace: [<6001c86d>] show_stack+0xfe/0x15b [<6038a835>] dump_stack+0x2a/0x2c [<6038776b>] panic+0x13e/0x2b3 [<6020f099>] btrfs_read_sys_array+0x25d/0x2ff [<601cfbbe>] open_ctree+0x192d/0x27af [<6019c2c1>] btrfs_mount+0x8f5/0xb9a [<600bc9a7>] mount_fs+0x11/0xf3 [<600d5167>] vfs_kern_mount+0x75/0x11a [<6019bcb0>] btrfs_mount+0x2e4/0xb9a [<600bc9a7>] mount_fs+0x11/0xf3 [<600d5167>] vfs_kern_mount+0x75/0x11a [<600d710b>] do_mount+0xa35/0xbc9 [<600d7557>] SyS_mount+0x95/0xc8 [<6001e884>] handle_syscall+0x6b/0x8e Reported-by: Jiri Slaby Reported-by: Vegard Nossum CC: stable@vger.kernel.org # 3.19+ Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 174f5e1e00abf..5113b7257b453 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6322,6 +6322,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) goto out_short_read; num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk(KERN_ERR + "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; From 01999150f50e945ee85914c37a1337784c1993c8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 1 Feb 2016 15:02:44 -0500 Subject: [PATCH 1709/2091] iwlwifi: update and fix 7265 series PCI IDs [ Upstream commit 006bda75d81fd27a583a3b310e9444fea2aa6ef2 ] Update and fix some 7265 PCI IDs entries. CC: [3.13+] Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach Signed-off-by: Sasha Levin --- drivers/net/wireless/iwlwifi/pcie/drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 88bf80a942b4f..9faf69875faba 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -382,6 +382,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095B, 0x5310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_n_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5210, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5C10, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, @@ -399,10 +400,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095A, 0x900A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9110, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9112, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9210, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9210, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x9200, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9510, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9310, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9410, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5020, iwl7265_2n_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x502A, iwl7265_2n_cfg)}, From d9d42b679e3653759b107ff99daf8da9c21aaec7 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 5 Jan 2016 15:25:43 +0200 Subject: [PATCH 1710/2091] iwlwifi: pcie: properly configure the debug buffer size for 8000 [ Upstream commit 62d7476d958ce06d7a10b02bdb30006870286fe2 ] 8000 device family has a new debug engine that needs to be configured differently than 7000's. The debug engine's DMA works in chunks of memory and the size of the buffer really means the start of the last chunk. Since one chunk is 256-byte long, we should configure the device to write to buffer_size - 256. This fixes a situation were the device would write to memory it is not allowed to access. CC: [4.1+] Signed-off-by: Emmanuel Grumbach Signed-off-by: Sasha Levin --- .../net/wireless/intel/iwlwifi/pcie/trans.c | 2717 +++++++++++++++++ drivers/net/wireless/iwlwifi/pcie/trans.c | 15 +- 2 files changed, 2729 insertions(+), 3 deletions(-) create mode 100644 drivers/net/wireless/intel/iwlwifi/pcie/trans.c diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c new file mode 100644 index 0000000000000..d60a467a983c6 --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -0,0 +1,2717 @@ +/****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, + * USA + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * Contact Information: + * Intel Linux Wireless + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + * BSD LICENSE + * + * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iwl-drv.h" +#include "iwl-trans.h" +#include "iwl-csr.h" +#include "iwl-prph.h" +#include "iwl-scd.h" +#include "iwl-agn-hw.h" +#include "iwl-fw-error-dump.h" +#include "internal.h" +#include "iwl-fh.h" + +/* extended range in FW SRAM */ +#define IWL_FW_MEM_EXTENDED_START 0x40000 +#define IWL_FW_MEM_EXTENDED_END 0x57FFF + +static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + if (!trans_pcie->fw_mon_page) + return; + + dma_unmap_page(trans->dev, trans_pcie->fw_mon_phys, + trans_pcie->fw_mon_size, DMA_FROM_DEVICE); + __free_pages(trans_pcie->fw_mon_page, + get_order(trans_pcie->fw_mon_size)); + trans_pcie->fw_mon_page = NULL; + trans_pcie->fw_mon_phys = 0; + trans_pcie->fw_mon_size = 0; +} + +static void iwl_pcie_alloc_fw_monitor(struct iwl_trans *trans, u8 max_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct page *page = NULL; + dma_addr_t phys; + u32 size = 0; + u8 power; + + if (!max_power) { + /* default max_power is maximum */ + max_power = 26; + } else { + max_power += 11; + } + + if (WARN(max_power > 26, + "External buffer size for monitor is too big %d, check the FW TLV\n", + max_power)) + return; + + if (trans_pcie->fw_mon_page) { + dma_sync_single_for_device(trans->dev, trans_pcie->fw_mon_phys, + trans_pcie->fw_mon_size, + DMA_FROM_DEVICE); + return; + } + + phys = 0; + for (power = max_power; power >= 11; power--) { + int order; + + size = BIT(power); + order = get_order(size); + page = alloc_pages(__GFP_COMP | __GFP_NOWARN | __GFP_ZERO, + order); + if (!page) + continue; + + phys = dma_map_page(trans->dev, page, 0, PAGE_SIZE << order, + DMA_FROM_DEVICE); + if (dma_mapping_error(trans->dev, phys)) { + __free_pages(page, order); + page = NULL; + continue; + } + IWL_INFO(trans, + "Allocated 0x%08x bytes (order %d) for firmware monitor.\n", + size, order); + break; + } + + if (WARN_ON_ONCE(!page)) + return; + + if (power != max_power) + IWL_ERR(trans, + "Sorry - debug buffer is only %luK while you requested %luK\n", + (unsigned long)BIT(power - 10), + (unsigned long)BIT(max_power - 10)); + + trans_pcie->fw_mon_page = page; + trans_pcie->fw_mon_phys = phys; + trans_pcie->fw_mon_size = size; +} + +static u32 iwl_trans_pcie_read_shr(struct iwl_trans *trans, u32 reg) +{ + iwl_write32(trans, HEEP_CTRL_WRD_PCIEX_CTRL_REG, + ((reg & 0x0000ffff) | (2 << 28))); + return iwl_read32(trans, HEEP_CTRL_WRD_PCIEX_DATA_REG); +} + +static void iwl_trans_pcie_write_shr(struct iwl_trans *trans, u32 reg, u32 val) +{ + iwl_write32(trans, HEEP_CTRL_WRD_PCIEX_DATA_REG, val); + iwl_write32(trans, HEEP_CTRL_WRD_PCIEX_CTRL_REG, + ((reg & 0x0000ffff) | (3 << 28))); +} + +static void iwl_pcie_set_pwr(struct iwl_trans *trans, bool vaux) +{ + if (trans->cfg->apmg_not_supported) + return; + + if (vaux && pci_pme_capable(to_pci_dev(trans->dev), PCI_D3cold)) + iwl_set_bits_mask_prph(trans, APMG_PS_CTRL_REG, + APMG_PS_CTRL_VAL_PWR_SRC_VAUX, + ~APMG_PS_CTRL_MSK_PWR_SRC); + else + iwl_set_bits_mask_prph(trans, APMG_PS_CTRL_REG, + APMG_PS_CTRL_VAL_PWR_SRC_VMAIN, + ~APMG_PS_CTRL_MSK_PWR_SRC); +} + +/* PCI registers */ +#define PCI_CFG_RETRY_TIMEOUT 0x041 + +static void iwl_pcie_apm_config(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u16 lctl; + u16 cap; + + /* + * HW bug W/A for instability in PCIe bus L0S->L1 transition. + * Check if BIOS (or OS) enabled L1-ASPM on this device. + * If so (likely), disable L0S, so device moves directly L0->L1; + * costs negligible amount of power savings. + * If not (unlikely), enable L0S, so there is at least some + * power savings, even without L1. + */ + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); + if (lctl & PCI_EXP_LNKCTL_ASPM_L1) + iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); + else + iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); + trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); + + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); + trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN; + dev_info(trans->dev, "L1 %sabled - LTR %sabled\n", + (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis", + trans->ltr_enabled ? "En" : "Dis"); +} + +/* + * Start up NIC's basic functionality after it has been reset + * (e.g. after platform boot, or shutdown via iwl_pcie_apm_stop()) + * NOTE: This does not load uCode nor start the embedded processor + */ +static int iwl_pcie_apm_init(struct iwl_trans *trans) +{ + int ret = 0; + IWL_DEBUG_INFO(trans, "Init card's basic functions\n"); + + /* + * Use "set_bit" below rather than "write", to preserve any hardware + * bits already set by default after reset. + */ + + /* Disable L0S exit timer (platform NMI Work/Around) */ + if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) + iwl_set_bit(trans, CSR_GIO_CHICKEN_BITS, + CSR_GIO_CHICKEN_BITS_REG_BIT_DIS_L0S_EXIT_TIMER); + + /* + * Disable L0s without affecting L1; + * don't wait for ICH L0s (ICH bug W/A) + */ + iwl_set_bit(trans, CSR_GIO_CHICKEN_BITS, + CSR_GIO_CHICKEN_BITS_REG_BIT_L1A_NO_L0S_RX); + + /* Set FH wait threshold to maximum (HW error during stress W/A) */ + iwl_set_bit(trans, CSR_DBG_HPET_MEM_REG, CSR_DBG_HPET_MEM_REG_VAL); + + /* + * Enable HAP INTA (interrupt from management bus) to + * wake device's PCI Express link L1a -> L0s + */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_BIT_HAP_WAKE_L1A); + + iwl_pcie_apm_config(trans); + + /* Configure analog phase-lock-loop before activating to D0A */ + if (trans->cfg->base_params->pll_cfg_val) + iwl_set_bit(trans, CSR_ANA_PLL_CFG, + trans->cfg->base_params->pll_cfg_val); + + /* + * Set "initialization complete" bit to move adapter from + * D0U* --> D0A* (powered-up active) state. + */ + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + /* + * Wait for clock stabilization; once stabilized, access to + * device-internal resources is supported, e.g. iwl_write_prph() + * and accesses to uCode SRAM. + */ + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000); + if (ret < 0) { + IWL_DEBUG_INFO(trans, "Failed to init the card\n"); + goto out; + } + + if (trans->cfg->host_interrupt_operation_mode) { + /* + * This is a bit of an abuse - This is needed for 7260 / 3160 + * only check host_interrupt_operation_mode even if this is + * not related to host_interrupt_operation_mode. + * + * Enable the oscillator to count wake up time for L1 exit. This + * consumes slightly more power (100uA) - but allows to be sure + * that we wake up from L1 on time. + * + * This looks weird: read twice the same register, discard the + * value, set a bit, and yet again, read that same register + * just to discard the value. But that's the way the hardware + * seems to like it. + */ + iwl_read_prph(trans, OSC_CLK); + iwl_read_prph(trans, OSC_CLK); + iwl_set_bits_prph(trans, OSC_CLK, OSC_CLK_FORCE_CONTROL); + iwl_read_prph(trans, OSC_CLK); + iwl_read_prph(trans, OSC_CLK); + } + + /* + * Enable DMA clock and wait for it to stabilize. + * + * Write to "CLK_EN_REG"; "1" bits enable clocks, while "0" + * bits do not disable clocks. This preserves any hardware + * bits already set by default in "CLK_CTRL_REG" after reset. + */ + if (!trans->cfg->apmg_not_supported) { + iwl_write_prph(trans, APMG_CLK_EN_REG, + APMG_CLK_VAL_DMA_CLK_RQT); + udelay(20); + + /* Disable L1-Active */ + iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, + APMG_PCIDEV_STT_VAL_L1_ACT_DIS); + + /* Clear the interrupt in APMG if the NIC is in RFKILL */ + iwl_write_prph(trans, APMG_RTC_INT_STT_REG, + APMG_RTC_INT_STT_RFKILL); + } + + set_bit(STATUS_DEVICE_ENABLED, &trans->status); + +out: + return ret; +} + +/* + * Enable LP XTAL to avoid HW bug where device may consume much power if + * FW is not loaded after device reset. LP XTAL is disabled by default + * after device HW reset. Do it only if XTAL is fed by internal source. + * Configure device's "persistence" mode to avoid resetting XTAL again when + * SHRD_HW_RST occurs in S3. + */ +static void iwl_pcie_apm_lp_xtal_enable(struct iwl_trans *trans) +{ + int ret; + u32 apmg_gp1_reg; + u32 apmg_xtal_cfg_reg; + u32 dl_cfg_reg; + + /* Force XTAL ON */ + __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_XTAL_ON); + + /* Reset entire device - do controller reset (results in SHRD_HW_RST) */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + udelay(10); + + /* + * Set "initialization complete" bit to move adapter from + * D0U* --> D0A* (powered-up active) state. + */ + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + /* + * Wait for clock stabilization; once stabilized, access to + * device-internal resources is possible. + */ + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + 25000); + if (WARN_ON(ret < 0)) { + IWL_ERR(trans, "Access time out - failed to enable LP XTAL\n"); + /* Release XTAL ON request */ + __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_XTAL_ON); + return; + } + + /* + * Clear "disable persistence" to avoid LP XTAL resetting when + * SHRD_HW_RST is applied in S3. + */ + iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG, + APMG_PCIDEV_STT_VAL_PERSIST_DIS); + + /* + * Force APMG XTAL to be active to prevent its disabling by HW + * caused by APMG idle state. + */ + apmg_xtal_cfg_reg = iwl_trans_pcie_read_shr(trans, + SHR_APMG_XTAL_CFG_REG); + iwl_trans_pcie_write_shr(trans, SHR_APMG_XTAL_CFG_REG, + apmg_xtal_cfg_reg | + SHR_APMG_XTAL_CFG_XTAL_ON_REQ); + + /* + * Reset entire device again - do controller reset (results in + * SHRD_HW_RST). Turn MAC off before proceeding. + */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + udelay(10); + + /* Enable LP XTAL by indirect access through CSR */ + apmg_gp1_reg = iwl_trans_pcie_read_shr(trans, SHR_APMG_GP1_REG); + iwl_trans_pcie_write_shr(trans, SHR_APMG_GP1_REG, apmg_gp1_reg | + SHR_APMG_GP1_WF_XTAL_LP_EN | + SHR_APMG_GP1_CHICKEN_BIT_SELECT); + + /* Clear delay line clock power up */ + dl_cfg_reg = iwl_trans_pcie_read_shr(trans, SHR_APMG_DL_CFG_REG); + iwl_trans_pcie_write_shr(trans, SHR_APMG_DL_CFG_REG, dl_cfg_reg & + ~SHR_APMG_DL_CFG_DL_CLOCK_POWER_UP); + + /* + * Enable persistence mode to avoid LP XTAL resetting when + * SHRD_HW_RST is applied in S3. + */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PERSIST_MODE); + + /* + * Clear "initialization complete" bit to move adapter from + * D0A* (powered-up Active) --> D0U* (Uninitialized) state. + */ + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + /* Activates XTAL resources monitor */ + __iwl_trans_pcie_set_bit(trans, CSR_MONITOR_CFG_REG, + CSR_MONITOR_XTAL_RESOURCES); + + /* Release XTAL ON request */ + __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_XTAL_ON); + udelay(10); + + /* Release APMG XTAL */ + iwl_trans_pcie_write_shr(trans, SHR_APMG_XTAL_CFG_REG, + apmg_xtal_cfg_reg & + ~SHR_APMG_XTAL_CFG_XTAL_ON_REQ); +} + +static int iwl_pcie_apm_stop_master(struct iwl_trans *trans) +{ + int ret = 0; + + /* stop device's busmaster DMA activity */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER); + + ret = iwl_poll_bit(trans, CSR_RESET, + CSR_RESET_REG_FLAG_MASTER_DISABLED, + CSR_RESET_REG_FLAG_MASTER_DISABLED, 100); + if (ret < 0) + IWL_WARN(trans, "Master Disable Timed Out, 100 usec\n"); + + IWL_DEBUG_INFO(trans, "stop master\n"); + + return ret; +} + +static void iwl_pcie_apm_stop(struct iwl_trans *trans, bool op_mode_leave) +{ + IWL_DEBUG_INFO(trans, "Stop card, put in low power state\n"); + + if (op_mode_leave) { + if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status)) + iwl_pcie_apm_init(trans); + + /* inform ME that we are leaving */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) + iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, + APMG_PCIDEV_STT_VAL_WAKE_ME); + else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PREPARE | + CSR_HW_IF_CONFIG_REG_ENABLE_PME); + mdelay(1); + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + } + mdelay(5); + } + + clear_bit(STATUS_DEVICE_ENABLED, &trans->status); + + /* Stop device's DMA activity */ + iwl_pcie_apm_stop_master(trans); + + if (trans->cfg->lp_xtal_workaround) { + iwl_pcie_apm_lp_xtal_enable(trans); + return; + } + + /* Reset the entire device */ + iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + udelay(10); + + /* + * Clear "initialization complete" bit to move adapter from + * D0A* (powered-up Active) --> D0U* (Uninitialized) state. + */ + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); +} + +static int iwl_pcie_nic_init(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + /* nic_init */ + spin_lock(&trans_pcie->irq_lock); + iwl_pcie_apm_init(trans); + + spin_unlock(&trans_pcie->irq_lock); + + iwl_pcie_set_pwr(trans, false); + + iwl_op_mode_nic_config(trans->op_mode); + + /* Allocate the RX queue, or reset if it is already allocated */ + iwl_pcie_rx_init(trans); + + /* Allocate or reset and init all Tx and Command queues */ + if (iwl_pcie_tx_init(trans)) + return -ENOMEM; + + if (trans->cfg->base_params->shadow_reg_enable) { + /* enable shadow regs in HW */ + iwl_set_bit(trans, CSR_MAC_SHADOW_REG_CTRL, 0x800FFFFF); + IWL_DEBUG_INFO(trans, "Enabling shadow registers in device\n"); + } + + return 0; +} + +#define HW_READY_TIMEOUT (50) + +/* Note: returns poll_bit return value, which is >= 0 if success */ +static int iwl_pcie_set_hw_ready(struct iwl_trans *trans) +{ + int ret; + + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_BIT_NIC_READY); + + /* See if we got it */ + ret = iwl_poll_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_BIT_NIC_READY, + CSR_HW_IF_CONFIG_REG_BIT_NIC_READY, + HW_READY_TIMEOUT); + + if (ret >= 0) + iwl_set_bit(trans, CSR_MBOX_SET_REG, CSR_MBOX_SET_REG_OS_ALIVE); + + IWL_DEBUG_INFO(trans, "hardware%s ready\n", ret < 0 ? " not" : ""); + return ret; +} + +/* Note: returns standard 0/-ERROR code */ +static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) +{ + int ret; + int t = 0; + int iter; + + IWL_DEBUG_INFO(trans, "iwl_trans_prepare_card_hw enter\n"); + + ret = iwl_pcie_set_hw_ready(trans); + /* If the card is ready, exit 0 */ + if (ret >= 0) + return 0; + + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + msleep(1); + + for (iter = 0; iter < 10; iter++) { + /* If HW is not ready, prepare the conditions to check again */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PREPARE); + + do { + ret = iwl_pcie_set_hw_ready(trans); + if (ret >= 0) + return 0; + + usleep_range(200, 1000); + t += 200; + } while (t < 150000); + msleep(25); + } + + IWL_ERR(trans, "Couldn't prepare the card\n"); + + return ret; +} + +/* + * ucode + */ +static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans, u32 dst_addr, + dma_addr_t phy_addr, u32 byte_cnt) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret; + + trans_pcie->ucode_write_complete = false; + + iwl_write_direct32(trans, + FH_TCSR_CHNL_TX_CONFIG_REG(FH_SRVC_CHNL), + FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE); + + iwl_write_direct32(trans, + FH_SRVC_CHNL_SRAM_ADDR_REG(FH_SRVC_CHNL), + dst_addr); + + iwl_write_direct32(trans, + FH_TFDIB_CTRL0_REG(FH_SRVC_CHNL), + phy_addr & FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK); + + iwl_write_direct32(trans, + FH_TFDIB_CTRL1_REG(FH_SRVC_CHNL), + (iwl_get_dma_hi_addr(phy_addr) + << FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt); + + iwl_write_direct32(trans, + FH_TCSR_CHNL_TX_BUF_STS_REG(FH_SRVC_CHNL), + 1 << FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM | + 1 << FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX | + FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID); + + iwl_write_direct32(trans, + FH_TCSR_CHNL_TX_CONFIG_REG(FH_SRVC_CHNL), + FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | + FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE | + FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD); + + ret = wait_event_timeout(trans_pcie->ucode_write_waitq, + trans_pcie->ucode_write_complete, 5 * HZ); + if (!ret) { + IWL_ERR(trans, "Failed to load firmware chunk!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int iwl_pcie_load_section(struct iwl_trans *trans, u8 section_num, + const struct fw_desc *section) +{ + u8 *v_addr; + dma_addr_t p_addr; + u32 offset, chunk_sz = min_t(u32, FH_MEM_TB_MAX_LENGTH, section->len); + int ret = 0; + + IWL_DEBUG_FW(trans, "[%d] uCode section being loaded...\n", + section_num); + + v_addr = dma_alloc_coherent(trans->dev, chunk_sz, &p_addr, + GFP_KERNEL | __GFP_NOWARN); + if (!v_addr) { + IWL_DEBUG_INFO(trans, "Falling back to small chunks of DMA\n"); + chunk_sz = PAGE_SIZE; + v_addr = dma_alloc_coherent(trans->dev, chunk_sz, + &p_addr, GFP_KERNEL); + if (!v_addr) + return -ENOMEM; + } + + for (offset = 0; offset < section->len; offset += chunk_sz) { + u32 copy_size, dst_addr; + bool extended_addr = false; + + copy_size = min_t(u32, chunk_sz, section->len - offset); + dst_addr = section->offset + offset; + + if (dst_addr >= IWL_FW_MEM_EXTENDED_START && + dst_addr <= IWL_FW_MEM_EXTENDED_END) + extended_addr = true; + + if (extended_addr) + iwl_set_bits_prph(trans, LMPM_CHICK, + LMPM_CHICK_EXTENDED_ADDR_SPACE); + + memcpy(v_addr, (u8 *)section->data + offset, copy_size); + ret = iwl_pcie_load_firmware_chunk(trans, dst_addr, p_addr, + copy_size); + + if (extended_addr) + iwl_clear_bits_prph(trans, LMPM_CHICK, + LMPM_CHICK_EXTENDED_ADDR_SPACE); + + if (ret) { + IWL_ERR(trans, + "Could not load the [%d] uCode section\n", + section_num); + break; + } + } + + dma_free_coherent(trans->dev, chunk_sz, v_addr, p_addr); + return ret; +} + +/* + * Driver Takes the ownership on secure machine before FW load + * and prevent race with the BT load. + * W/A for ROM bug. (should be remove in the next Si step) + */ +static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans) +{ + u32 val, loop = 1000; + + /* + * Check the RSA semaphore is accessible. + * If the HW isn't locked and the rsa semaphore isn't accessible, + * we are in trouble. + */ + val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0); + if (val & (BIT(1) | BIT(17))) { + IWL_INFO(trans, + "can't access the RSA semaphore it is write protected\n"); + return 0; + } + + /* take ownership on the AUX IF */ + iwl_write_prph(trans, WFPM_CTRL_REG, WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK); + iwl_write_prph(trans, AUX_MISC_MASTER1_EN, AUX_MISC_MASTER1_EN_SBE_MSK); + + do { + iwl_write_prph(trans, AUX_MISC_MASTER1_SMPHR_STATUS, 0x1); + val = iwl_read_prph(trans, AUX_MISC_MASTER1_SMPHR_STATUS); + if (val == 0x1) { + iwl_write_prph(trans, RSA_ENABLE, 0); + return 0; + } + + udelay(10); + loop--; + } while (loop > 0); + + IWL_ERR(trans, "Failed to take ownership on secure machine\n"); + return -EIO; +} + +static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, + const struct fw_img *image, + int cpu, + int *first_ucode_section) +{ + int shift_param; + int i, ret = 0, sec_num = 0x1; + u32 val, last_read_idx = 0; + + if (cpu == 1) { + shift_param = 0; + *first_ucode_section = 0; + } else { + shift_param = 16; + (*first_ucode_section)++; + } + + for (i = *first_ucode_section; i < IWL_UCODE_SECTION_MAX; i++) { + last_read_idx = i; + + /* + * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between + * CPU1 to CPU2. + * PAGING_SEPARATOR_SECTION delimiter - separate between + * CPU2 non paged to CPU2 paging sec. + */ + if (!image->sec[i].data || + image->sec[i].offset == CPU1_CPU2_SEPARATOR_SECTION || + image->sec[i].offset == PAGING_SEPARATOR_SECTION) { + IWL_DEBUG_FW(trans, + "Break since Data not valid or Empty section, sec = %d\n", + i); + break; + } + + ret = iwl_pcie_load_section(trans, i, &image->sec[i]); + if (ret) + return ret; + + /* Notify the ucode of the loaded section number and status */ + val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + sec_num = (sec_num << 1) | 0x1; + } + + *first_ucode_section = last_read_idx; + + if (cpu == 1) + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF); + else + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFFFFFF); + + return 0; +} + +static int iwl_pcie_load_cpu_sections(struct iwl_trans *trans, + const struct fw_img *image, + int cpu, + int *first_ucode_section) +{ + int shift_param; + int i, ret = 0; + u32 last_read_idx = 0; + + if (cpu == 1) { + shift_param = 0; + *first_ucode_section = 0; + } else { + shift_param = 16; + (*first_ucode_section)++; + } + + for (i = *first_ucode_section; i < IWL_UCODE_SECTION_MAX; i++) { + last_read_idx = i; + + /* + * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between + * CPU1 to CPU2. + * PAGING_SEPARATOR_SECTION delimiter - separate between + * CPU2 non paged to CPU2 paging sec. + */ + if (!image->sec[i].data || + image->sec[i].offset == CPU1_CPU2_SEPARATOR_SECTION || + image->sec[i].offset == PAGING_SEPARATOR_SECTION) { + IWL_DEBUG_FW(trans, + "Break since Data not valid or Empty section, sec = %d\n", + i); + break; + } + + ret = iwl_pcie_load_section(trans, i, &image->sec[i]); + if (ret) + return ret; + } + + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_set_bits_prph(trans, + CSR_UCODE_LOAD_STATUS_ADDR, + (LMPM_CPU_UCODE_LOADING_COMPLETED | + LMPM_CPU_HDRS_LOADING_COMPLETED | + LMPM_CPU_UCODE_LOADING_STARTED) << + shift_param); + + *first_ucode_section = last_read_idx; + + return 0; +} + +static void iwl_pcie_apply_destination(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + const struct iwl_fw_dbg_dest_tlv *dest = trans->dbg_dest_tlv; + int i; + + if (dest->version) + IWL_ERR(trans, + "DBG DEST version is %d - expect issues\n", + dest->version); + + IWL_INFO(trans, "Applying debug destination %s\n", + get_fw_dbg_mode_string(dest->monitor_mode)); + + if (dest->monitor_mode == EXTERNAL_MODE) + iwl_pcie_alloc_fw_monitor(trans, dest->size_power); + else + IWL_WARN(trans, "PCI should have external buffer debug\n"); + + for (i = 0; i < trans->dbg_dest_reg_num; i++) { + u32 addr = le32_to_cpu(dest->reg_ops[i].addr); + u32 val = le32_to_cpu(dest->reg_ops[i].val); + + switch (dest->reg_ops[i].op) { + case CSR_ASSIGN: + iwl_write32(trans, addr, val); + break; + case CSR_SETBIT: + iwl_set_bit(trans, addr, BIT(val)); + break; + case CSR_CLEARBIT: + iwl_clear_bit(trans, addr, BIT(val)); + break; + case PRPH_ASSIGN: + iwl_write_prph(trans, addr, val); + break; + case PRPH_SETBIT: + iwl_set_bits_prph(trans, addr, BIT(val)); + break; + case PRPH_CLEARBIT: + iwl_clear_bits_prph(trans, addr, BIT(val)); + break; + case PRPH_BLOCKBIT: + if (iwl_read_prph(trans, addr) & BIT(val)) { + IWL_ERR(trans, + "BIT(%u) in address 0x%x is 1, stopping FW configuration\n", + val, addr); + goto monitor; + } + break; + default: + IWL_ERR(trans, "FW debug - unknown OP %d\n", + dest->reg_ops[i].op); + break; + } + } + +monitor: + if (dest->monitor_mode == EXTERNAL_MODE && trans_pcie->fw_mon_size) { + iwl_write_prph(trans, le32_to_cpu(dest->base_reg), + trans_pcie->fw_mon_phys >> dest->base_shift); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size - 256) >> + dest->end_shift); + else + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> + dest->end_shift); + } +} + +static int iwl_pcie_load_given_ucode(struct iwl_trans *trans, + const struct fw_img *image) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret = 0; + int first_ucode_section; + + IWL_DEBUG_FW(trans, "working with %s CPU\n", + image->is_dual_cpus ? "Dual" : "Single"); + + /* load to FW the binary non secured sections of CPU1 */ + ret = iwl_pcie_load_cpu_sections(trans, image, 1, &first_ucode_section); + if (ret) + return ret; + + if (image->is_dual_cpus) { + /* set CPU2 header address */ + iwl_write_prph(trans, + LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR, + LMPM_SECURE_CPU2_HDR_MEM_SPACE); + + /* load to FW the binary sections of CPU2 */ + ret = iwl_pcie_load_cpu_sections(trans, image, 2, + &first_ucode_section); + if (ret) + return ret; + } + + /* supported for 7000 only for the moment */ + if (iwlwifi_mod_params.fw_monitor && + trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { + iwl_pcie_alloc_fw_monitor(trans, 0); + + if (trans_pcie->fw_mon_size) { + iwl_write_prph(trans, MON_BUFF_BASE_ADDR, + trans_pcie->fw_mon_phys >> 4); + iwl_write_prph(trans, MON_BUFF_END_ADDR, + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> 4); + } + } else if (trans->dbg_dest_tlv) { + iwl_pcie_apply_destination(trans); + } + + /* release CPU reset */ + iwl_write32(trans, CSR_RESET, 0); + + return 0; +} + +static int iwl_pcie_load_given_ucode_8000(struct iwl_trans *trans, + const struct fw_img *image) +{ + int ret = 0; + int first_ucode_section; + + IWL_DEBUG_FW(trans, "working with %s CPU\n", + image->is_dual_cpus ? "Dual" : "Single"); + + if (trans->dbg_dest_tlv) + iwl_pcie_apply_destination(trans); + + /* TODO: remove in the next Si step */ + ret = iwl_pcie_rsa_race_bug_wa(trans); + if (ret) + return ret; + + /* configure the ucode to be ready to get the secured image */ + /* release CPU reset */ + iwl_write_prph(trans, RELEASE_CPU_RESET, RELEASE_CPU_RESET_BIT); + + /* load to FW the binary Secured sections of CPU1 */ + ret = iwl_pcie_load_cpu_sections_8000(trans, image, 1, + &first_ucode_section); + if (ret) + return ret; + + /* load to FW the binary sections of CPU2 */ + return iwl_pcie_load_cpu_sections_8000(trans, image, 2, + &first_ucode_section); +} + +static int iwl_trans_pcie_start_fw(struct iwl_trans *trans, + const struct fw_img *fw, bool run_in_rfkill) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool hw_rfkill; + int ret; + + mutex_lock(&trans_pcie->mutex); + + /* Someone called stop_device, don't try to start_fw */ + if (trans_pcie->is_down) { + IWL_WARN(trans, + "Can't start_fw since the HW hasn't been started\n"); + ret = EIO; + goto out; + } + + /* This may fail if AMT took ownership of the device */ + if (iwl_pcie_prepare_card_hw(trans)) { + IWL_WARN(trans, "Exit HW not ready\n"); + ret = -EIO; + goto out; + } + + iwl_enable_rfkill_int(trans); + + /* If platform's RF_KILL switch is NOT set to KILL */ + hw_rfkill = iwl_is_rfkill_set(trans); + if (hw_rfkill) + set_bit(STATUS_RFKILL, &trans->status); + else + clear_bit(STATUS_RFKILL, &trans->status); + iwl_trans_pcie_rf_kill(trans, hw_rfkill); + if (hw_rfkill && !run_in_rfkill) { + ret = -ERFKILL; + goto out; + } + + iwl_write32(trans, CSR_INT, 0xFFFFFFFF); + + ret = iwl_pcie_nic_init(trans); + if (ret) { + IWL_ERR(trans, "Unable to init nic\n"); + goto out; + } + + /* make sure rfkill handshake bits are cleared */ + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL); + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, + CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED); + + /* clear (again), then enable host interrupts */ + iwl_write32(trans, CSR_INT, 0xFFFFFFFF); + iwl_enable_interrupts(trans); + + /* really make sure rfkill handshake bits are cleared */ + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL); + iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL); + + /* Load the given image to the HW */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + ret = iwl_pcie_load_given_ucode_8000(trans, fw); + else + ret = iwl_pcie_load_given_ucode(trans, fw); + +out: + mutex_unlock(&trans_pcie->mutex); + return ret; +} + +static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr) +{ + iwl_pcie_reset_ict(trans); + iwl_pcie_tx_start(trans, scd_addr); +} + +static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool hw_rfkill, was_hw_rfkill; + + lockdep_assert_held(&trans_pcie->mutex); + + if (trans_pcie->is_down) + return; + + trans_pcie->is_down = true; + + was_hw_rfkill = iwl_is_rfkill_set(trans); + + /* tell the device to stop sending interrupts */ + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + /* device going down, Stop using ICT table */ + iwl_pcie_disable_ict(trans); + + /* + * If a HW restart happens during firmware loading, + * then the firmware loading might call this function + * and later it might be called again due to the + * restart. So don't process again if the device is + * already dead. + */ + if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) { + IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n"); + iwl_pcie_tx_stop(trans); + iwl_pcie_rx_stop(trans); + + /* Power-down device's busmaster DMA clocks */ + if (!trans->cfg->apmg_not_supported) { + iwl_write_prph(trans, APMG_CLK_DIS_REG, + APMG_CLK_VAL_DMA_CLK_RQT); + udelay(5); + } + } + + /* Make sure (redundant) we've released our request to stay awake */ + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + + /* Stop the device, and put it in low power state */ + iwl_pcie_apm_stop(trans, false); + + /* stop and reset the on-board processor */ + iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + udelay(20); + + /* + * Upon stop, the APM issues an interrupt if HW RF kill is set. + * This is a bug in certain verions of the hardware. + * Certain devices also keep sending HW RF kill interrupt all + * the time, unless the interrupt is ACKed even if the interrupt + * should be masked. Re-ACK all the interrupts here. + */ + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + + /* clear all status bits */ + clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status); + clear_bit(STATUS_INT_ENABLED, &trans->status); + clear_bit(STATUS_TPOWER_PMI, &trans->status); + clear_bit(STATUS_RFKILL, &trans->status); + + /* + * Even if we stop the HW, we still want the RF kill + * interrupt + */ + iwl_enable_rfkill_int(trans); + + /* + * Check again since the RF kill state may have changed while + * all the interrupts were disabled, in this case we couldn't + * receive the RF kill interrupt and update the state in the + * op_mode. + * Don't call the op_mode if the rkfill state hasn't changed. + * This allows the op_mode to call stop_device from the rfkill + * notification without endless recursion. Under very rare + * circumstances, we might have a small recursion if the rfkill + * state changed exactly now while we were called from stop_device. + * This is very unlikely but can happen and is supported. + */ + hw_rfkill = iwl_is_rfkill_set(trans); + if (hw_rfkill) + set_bit(STATUS_RFKILL, &trans->status); + else + clear_bit(STATUS_RFKILL, &trans->status); + if (hw_rfkill != was_hw_rfkill) + iwl_trans_pcie_rf_kill(trans, hw_rfkill); + + /* re-take ownership to prevent other users from stealing the deivce */ + iwl_pcie_prepare_card_hw(trans); +} + +static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + mutex_lock(&trans_pcie->mutex); + _iwl_trans_pcie_stop_device(trans, low_power); + mutex_unlock(&trans_pcie->mutex); +} + +void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state) +{ + struct iwl_trans_pcie __maybe_unused *trans_pcie = + IWL_TRANS_GET_PCIE_TRANS(trans); + + lockdep_assert_held(&trans_pcie->mutex); + + if (iwl_op_mode_hw_rf_kill(trans->op_mode, state)) + _iwl_trans_pcie_stop_device(trans, true); +} + +static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) { + /* Enable persistence mode to avoid reset */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PERSIST_MODE); + } + + iwl_disable_interrupts(trans); + + /* + * in testing mode, the host stays awake and the + * hardware won't be reset (not even partially) + */ + if (test) + return; + + iwl_pcie_disable_ict(trans); + + synchronize_irq(trans_pcie->pci_dev->irq); + + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D3) { + /* + * reset TX queues -- some of their registers reset during S3 + * so if we don't reset everything here the D3 image would try + * to execute some invalid memory upon resume + */ + iwl_trans_pcie_tx_reset(trans); + } + + iwl_pcie_set_pwr(trans, true); +} + +static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans, + enum iwl_d3_status *status, + bool test) +{ + u32 val; + int ret; + + if (test) { + iwl_enable_interrupts(trans); + *status = IWL_D3_STATUS_ALIVE; + return 0; + } + + /* + * Also enables interrupts - none will happen as the device doesn't + * know we're waking it up, only when the opmode actually tells it + * after this call. + */ + iwl_pcie_reset_ict(trans); + + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + udelay(2); + + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + 25000); + if (ret < 0) { + IWL_ERR(trans, "Failed to resume the device (mac ready)\n"); + return ret; + } + + iwl_pcie_set_pwr(trans, false); + + if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) { + iwl_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + } else { + iwl_trans_pcie_tx_reset(trans); + + ret = iwl_pcie_rx_init(trans); + if (ret) { + IWL_ERR(trans, + "Failed to resume the device (RX reset)\n"); + return ret; + } + } + + val = iwl_read32(trans, CSR_RESET); + if (val & CSR_RESET_REG_FLAG_NEVO_RESET) + *status = IWL_D3_STATUS_RESET; + else + *status = IWL_D3_STATUS_ALIVE; + + return 0; +} + +static int _iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + bool hw_rfkill; + int err; + + lockdep_assert_held(&trans_pcie->mutex); + + err = iwl_pcie_prepare_card_hw(trans); + if (err) { + IWL_ERR(trans, "Error while preparing HW: %d\n", err); + return err; + } + + /* Reset the entire device */ + iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET); + + usleep_range(10, 15); + + iwl_pcie_apm_init(trans); + + /* From now on, the op_mode will be kept updated about RF kill state */ + iwl_enable_rfkill_int(trans); + + /* Set is_down to false here so that...*/ + trans_pcie->is_down = false; + + hw_rfkill = iwl_is_rfkill_set(trans); + if (hw_rfkill) + set_bit(STATUS_RFKILL, &trans->status); + else + clear_bit(STATUS_RFKILL, &trans->status); + /* ... rfkill can call stop_device and set it false if needed */ + iwl_trans_pcie_rf_kill(trans, hw_rfkill); + + return 0; +} + +static int iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int ret; + + mutex_lock(&trans_pcie->mutex); + ret = _iwl_trans_pcie_start_hw(trans, low_power); + mutex_unlock(&trans_pcie->mutex); + + return ret; +} + +static void iwl_trans_pcie_op_mode_leave(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + mutex_lock(&trans_pcie->mutex); + + /* disable interrupts - don't enable HW RF kill interrupt */ + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + iwl_pcie_apm_stop(trans, true); + + spin_lock(&trans_pcie->irq_lock); + iwl_disable_interrupts(trans); + spin_unlock(&trans_pcie->irq_lock); + + iwl_pcie_disable_ict(trans); + + mutex_unlock(&trans_pcie->mutex); + + synchronize_irq(trans_pcie->pci_dev->irq); +} + +static void iwl_trans_pcie_write8(struct iwl_trans *trans, u32 ofs, u8 val) +{ + writeb(val, IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs); +} + +static void iwl_trans_pcie_write32(struct iwl_trans *trans, u32 ofs, u32 val) +{ + writel(val, IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs); +} + +static u32 iwl_trans_pcie_read32(struct iwl_trans *trans, u32 ofs) +{ + return readl(IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs); +} + +static u32 iwl_trans_pcie_read_prph(struct iwl_trans *trans, u32 reg) +{ + iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_RADDR, + ((reg & 0x000FFFFF) | (3 << 24))); + return iwl_trans_pcie_read32(trans, HBUS_TARG_PRPH_RDAT); +} + +static void iwl_trans_pcie_write_prph(struct iwl_trans *trans, u32 addr, + u32 val) +{ + iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WADDR, + ((addr & 0x000FFFFF) | (3 << 24))); + iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WDAT, val); +} + +static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget) +{ + WARN_ON(1); + return 0; +} + +static void iwl_trans_pcie_configure(struct iwl_trans *trans, + const struct iwl_trans_config *trans_cfg) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + trans_pcie->cmd_queue = trans_cfg->cmd_queue; + trans_pcie->cmd_fifo = trans_cfg->cmd_fifo; + trans_pcie->cmd_q_wdg_timeout = trans_cfg->cmd_q_wdg_timeout; + if (WARN_ON(trans_cfg->n_no_reclaim_cmds > MAX_NO_RECLAIM_CMDS)) + trans_pcie->n_no_reclaim_cmds = 0; + else + trans_pcie->n_no_reclaim_cmds = trans_cfg->n_no_reclaim_cmds; + if (trans_pcie->n_no_reclaim_cmds) + memcpy(trans_pcie->no_reclaim_cmds, trans_cfg->no_reclaim_cmds, + trans_pcie->n_no_reclaim_cmds * sizeof(u8)); + + trans_pcie->rx_buf_size = trans_cfg->rx_buf_size; + trans_pcie->rx_page_order = + iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size); + + trans_pcie->wide_cmd_header = trans_cfg->wide_cmd_header; + trans_pcie->bc_table_dword = trans_cfg->bc_table_dword; + trans_pcie->scd_set_active = trans_cfg->scd_set_active; + trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx; + + trans->command_groups = trans_cfg->command_groups; + trans->command_groups_size = trans_cfg->command_groups_size; + + /* init ref_count to 1 (should be cleared when ucode is loaded) */ + trans_pcie->ref_count = 1; + + /* Initialize NAPI here - it should be before registering to mac80211 + * in the opmode but after the HW struct is allocated. + * As this function may be called again in some corner cases don't + * do anything if NAPI was already initialized. + */ + if (!trans_pcie->napi.poll) { + init_dummy_netdev(&trans_pcie->napi_dev); + netif_napi_add(&trans_pcie->napi_dev, &trans_pcie->napi, + iwl_pcie_dummy_napi_poll, 64); + } +} + +void iwl_trans_pcie_free(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i; + + synchronize_irq(trans_pcie->pci_dev->irq); + + iwl_pcie_tx_free(trans); + iwl_pcie_rx_free(trans); + + free_irq(trans_pcie->pci_dev->irq, trans); + iwl_pcie_free_ict(trans); + + pci_disable_msi(trans_pcie->pci_dev); + iounmap(trans_pcie->hw_base); + pci_release_regions(trans_pcie->pci_dev); + pci_disable_device(trans_pcie->pci_dev); + + if (trans_pcie->napi.poll) + netif_napi_del(&trans_pcie->napi); + + iwl_pcie_free_fw_monitor(trans); + + for_each_possible_cpu(i) { + struct iwl_tso_hdr_page *p = + per_cpu_ptr(trans_pcie->tso_hdr_page, i); + + if (p->page) + __free_page(p->page); + } + + free_percpu(trans_pcie->tso_hdr_page); + iwl_trans_free(trans); +} + +static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state) +{ + if (state) + set_bit(STATUS_TPOWER_PMI, &trans->status); + else + clear_bit(STATUS_TPOWER_PMI, &trans->status); +} + +static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, + unsigned long *flags) +{ + int ret; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + spin_lock_irqsave(&trans_pcie->reg_lock, *flags); + + if (trans_pcie->cmd_hold_nic_awake) + goto out; + + /* this bit wakes up the NIC */ + __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + udelay(2); + + /* + * These bits say the device is running, and should keep running for + * at least a short while (at least as long as MAC_ACCESS_REQ stays 1), + * but they do not indicate that embedded SRAM is restored yet; + * 3945 and 4965 have volatile SRAM, and must save/restore contents + * to/from host DRAM when sleeping/waking for power-saving. + * Each direction takes approximately 1/4 millisecond; with this + * overhead, it's a good idea to grab and hold MAC_ACCESS_REQUEST if a + * series of register accesses are expected (e.g. reading Event Log), + * to keep device from sleeping. + * + * CSR_UCODE_DRV_GP1 register bit MAC_SLEEP == 0 indicates that + * SRAM is okay/restored. We don't check that here because this call + * is just for hardware register access; but GP1 MAC_SLEEP check is a + * good idea before accessing 3945/4965 SRAM (e.g. reading Event Log). + * + * 5000 series and later (including 1000 series) have non-volatile SRAM, + * and do not save/restore SRAM when power cycling. + */ + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN, + (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY | + CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000); + if (unlikely(ret < 0)) { + iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI); + WARN_ONCE(1, + "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", + iwl_read32(trans, CSR_GP_CNTRL)); + spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags); + return false; + } + +out: + /* + * Fool sparse by faking we release the lock - sparse will + * track nic_access anyway. + */ + __release(&trans_pcie->reg_lock); + return true; +} + +static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans, + unsigned long *flags) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + lockdep_assert_held(&trans_pcie->reg_lock); + + /* + * Fool sparse by faking we acquiring the lock - sparse will + * track nic_access anyway. + */ + __acquire(&trans_pcie->reg_lock); + + if (trans_pcie->cmd_hold_nic_awake) + goto out; + + __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); + /* + * Above we read the CSR_GP_CNTRL register, which will flush + * any previous writes, but we need the write that clears the + * MAC_ACCESS_REQ bit to be performed before any other writes + * scheduled on different CPUs (after we drop reg_lock). + */ + mmiowb(); +out: + spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags); +} + +static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, + void *buf, int dwords) +{ + unsigned long flags; + int offs, ret = 0; + u32 *vals = buf; + + if (iwl_trans_grab_nic_access(trans, &flags)) { + iwl_write32(trans, HBUS_TARG_MEM_RADDR, addr); + for (offs = 0; offs < dwords; offs++) + vals[offs] = iwl_read32(trans, HBUS_TARG_MEM_RDAT); + iwl_trans_release_nic_access(trans, &flags); + } else { + ret = -EBUSY; + } + return ret; +} + +static int iwl_trans_pcie_write_mem(struct iwl_trans *trans, u32 addr, + const void *buf, int dwords) +{ + unsigned long flags; + int offs, ret = 0; + const u32 *vals = buf; + + if (iwl_trans_grab_nic_access(trans, &flags)) { + iwl_write32(trans, HBUS_TARG_MEM_WADDR, addr); + for (offs = 0; offs < dwords; offs++) + iwl_write32(trans, HBUS_TARG_MEM_WDAT, + vals ? vals[offs] : 0); + iwl_trans_release_nic_access(trans, &flags); + } else { + ret = -EBUSY; + } + return ret; +} + +static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans, + unsigned long txqs, + bool freeze) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int queue; + + for_each_set_bit(queue, &txqs, BITS_PER_LONG) { + struct iwl_txq *txq = &trans_pcie->txq[queue]; + unsigned long now; + + spin_lock_bh(&txq->lock); + + now = jiffies; + + if (txq->frozen == freeze) + goto next_queue; + + IWL_DEBUG_TX_QUEUES(trans, "%s TXQ %d\n", + freeze ? "Freezing" : "Waking", queue); + + txq->frozen = freeze; + + if (txq->q.read_ptr == txq->q.write_ptr) + goto next_queue; + + if (freeze) { + if (unlikely(time_after(now, + txq->stuck_timer.expires))) { + /* + * The timer should have fired, maybe it is + * spinning right now on the lock. + */ + goto next_queue; + } + /* remember how long until the timer fires */ + txq->frozen_expiry_remainder = + txq->stuck_timer.expires - now; + del_timer(&txq->stuck_timer); + goto next_queue; + } + + /* + * Wake a non-empty queue -> arm timer with the + * remainder before it froze + */ + mod_timer(&txq->stuck_timer, + now + txq->frozen_expiry_remainder); + +next_queue: + spin_unlock_bh(&txq->lock); + } +} + +static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i; + + for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) { + struct iwl_txq *txq = &trans_pcie->txq[i]; + + if (i == trans_pcie->cmd_queue) + continue; + + spin_lock_bh(&txq->lock); + + if (!block && !(WARN_ON_ONCE(!txq->block))) { + txq->block--; + if (!txq->block) { + iwl_write32(trans, HBUS_TARG_WRPTR, + txq->q.write_ptr | (i << 8)); + } + } else if (block) { + txq->block++; + } + + spin_unlock_bh(&txq->lock); + } +} + +#define IWL_FLUSH_WAIT_MS 2000 + +static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_txq *txq; + struct iwl_queue *q; + int cnt; + unsigned long now = jiffies; + u32 scd_sram_addr; + u8 buf[16]; + int ret = 0; + + /* waiting for all the tx frames complete might take a while */ + for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { + u8 wr_ptr; + + if (cnt == trans_pcie->cmd_queue) + continue; + if (!test_bit(cnt, trans_pcie->queue_used)) + continue; + if (!(BIT(cnt) & txq_bm)) + continue; + + IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt); + txq = &trans_pcie->txq[cnt]; + q = &txq->q; + wr_ptr = ACCESS_ONCE(q->write_ptr); + + while (q->read_ptr != ACCESS_ONCE(q->write_ptr) && + !time_after(jiffies, + now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { + u8 write_ptr = ACCESS_ONCE(q->write_ptr); + + if (WARN_ONCE(wr_ptr != write_ptr, + "WR pointer moved while flushing %d -> %d\n", + wr_ptr, write_ptr)) + return -ETIMEDOUT; + msleep(1); + } + + if (q->read_ptr != q->write_ptr) { + IWL_ERR(trans, + "fail to flush all tx fifo queues Q %d\n", cnt); + ret = -ETIMEDOUT; + break; + } + IWL_DEBUG_TX_QUEUES(trans, "Queue %d is now empty.\n", cnt); + } + + if (!ret) + return 0; + + IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", + txq->q.read_ptr, txq->q.write_ptr); + + scd_sram_addr = trans_pcie->scd_base_addr + + SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); + iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); + + iwl_print_hex_error(trans, buf, sizeof(buf)); + + for (cnt = 0; cnt < FH_TCSR_CHNL_NUM; cnt++) + IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", cnt, + iwl_read_direct32(trans, FH_TX_TRB_REG(cnt))); + + for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { + u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(cnt)); + u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7; + bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE)); + u32 tbl_dw = + iwl_trans_read_mem32(trans, trans_pcie->scd_base_addr + + SCD_TRANS_TBL_OFFSET_QUEUE(cnt)); + + if (cnt & 0x1) + tbl_dw = (tbl_dw & 0xFFFF0000) >> 16; + else + tbl_dw = tbl_dw & 0x0000FFFF; + + IWL_ERR(trans, + "Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n", + cnt, active ? "" : "in", fifo, tbl_dw, + iwl_read_prph(trans, SCD_QUEUE_RDPTR(cnt)) & + (TFD_QUEUE_SIZE_MAX - 1), + iwl_read_prph(trans, SCD_QUEUE_WRPTR(cnt))); + } + + return ret; +} + +static void iwl_trans_pcie_set_bits_mask(struct iwl_trans *trans, u32 reg, + u32 mask, u32 value) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + unsigned long flags; + + spin_lock_irqsave(&trans_pcie->reg_lock, flags); + __iwl_trans_pcie_set_bits_mask(trans, reg, mask, value); + spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); +} + +void iwl_trans_pcie_ref(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + unsigned long flags; + + if (iwlwifi_mod_params.d0i3_disable) + return; + + spin_lock_irqsave(&trans_pcie->ref_lock, flags); + IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count); + trans_pcie->ref_count++; + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); +} + +void iwl_trans_pcie_unref(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + unsigned long flags; + + if (iwlwifi_mod_params.d0i3_disable) + return; + + spin_lock_irqsave(&trans_pcie->ref_lock, flags); + IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count); + if (WARN_ON_ONCE(trans_pcie->ref_count == 0)) { + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); + return; + } + trans_pcie->ref_count--; + spin_unlock_irqrestore(&trans_pcie->ref_lock, flags); +} + +static const char *get_csr_string(int cmd) +{ +#define IWL_CMD(x) case x: return #x + switch (cmd) { + IWL_CMD(CSR_HW_IF_CONFIG_REG); + IWL_CMD(CSR_INT_COALESCING); + IWL_CMD(CSR_INT); + IWL_CMD(CSR_INT_MASK); + IWL_CMD(CSR_FH_INT_STATUS); + IWL_CMD(CSR_GPIO_IN); + IWL_CMD(CSR_RESET); + IWL_CMD(CSR_GP_CNTRL); + IWL_CMD(CSR_HW_REV); + IWL_CMD(CSR_EEPROM_REG); + IWL_CMD(CSR_EEPROM_GP); + IWL_CMD(CSR_OTP_GP_REG); + IWL_CMD(CSR_GIO_REG); + IWL_CMD(CSR_GP_UCODE_REG); + IWL_CMD(CSR_GP_DRIVER_REG); + IWL_CMD(CSR_UCODE_DRV_GP1); + IWL_CMD(CSR_UCODE_DRV_GP2); + IWL_CMD(CSR_LED_REG); + IWL_CMD(CSR_DRAM_INT_TBL_REG); + IWL_CMD(CSR_GIO_CHICKEN_BITS); + IWL_CMD(CSR_ANA_PLL_CFG); + IWL_CMD(CSR_HW_REV_WA_REG); + IWL_CMD(CSR_MONITOR_STATUS_REG); + IWL_CMD(CSR_DBG_HPET_MEM_REG); + default: + return "UNKNOWN"; + } +#undef IWL_CMD +} + +void iwl_pcie_dump_csr(struct iwl_trans *trans) +{ + int i; + static const u32 csr_tbl[] = { + CSR_HW_IF_CONFIG_REG, + CSR_INT_COALESCING, + CSR_INT, + CSR_INT_MASK, + CSR_FH_INT_STATUS, + CSR_GPIO_IN, + CSR_RESET, + CSR_GP_CNTRL, + CSR_HW_REV, + CSR_EEPROM_REG, + CSR_EEPROM_GP, + CSR_OTP_GP_REG, + CSR_GIO_REG, + CSR_GP_UCODE_REG, + CSR_GP_DRIVER_REG, + CSR_UCODE_DRV_GP1, + CSR_UCODE_DRV_GP2, + CSR_LED_REG, + CSR_DRAM_INT_TBL_REG, + CSR_GIO_CHICKEN_BITS, + CSR_ANA_PLL_CFG, + CSR_MONITOR_STATUS_REG, + CSR_HW_REV_WA_REG, + CSR_DBG_HPET_MEM_REG + }; + IWL_ERR(trans, "CSR values:\n"); + IWL_ERR(trans, "(2nd byte of CSR_INT_COALESCING is " + "CSR_INT_PERIODIC_REG)\n"); + for (i = 0; i < ARRAY_SIZE(csr_tbl); i++) { + IWL_ERR(trans, " %25s: 0X%08x\n", + get_csr_string(csr_tbl[i]), + iwl_read32(trans, csr_tbl[i])); + } +} + +#ifdef CONFIG_IWLWIFI_DEBUGFS +/* create and remove of files */ +#define DEBUGFS_ADD_FILE(name, parent, mode) do { \ + if (!debugfs_create_file(#name, mode, parent, trans, \ + &iwl_dbgfs_##name##_ops)) \ + goto err; \ +} while (0) + +/* file operation */ +#define DEBUGFS_READ_FILE_OPS(name) \ +static const struct file_operations iwl_dbgfs_##name##_ops = { \ + .read = iwl_dbgfs_##name##_read, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +}; + +#define DEBUGFS_WRITE_FILE_OPS(name) \ +static const struct file_operations iwl_dbgfs_##name##_ops = { \ + .write = iwl_dbgfs_##name##_write, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +}; + +#define DEBUGFS_READ_WRITE_FILE_OPS(name) \ +static const struct file_operations iwl_dbgfs_##name##_ops = { \ + .write = iwl_dbgfs_##name##_write, \ + .read = iwl_dbgfs_##name##_read, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +}; + +static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_txq *txq; + struct iwl_queue *q; + char *buf; + int pos = 0; + int cnt; + int ret; + size_t bufsz; + + bufsz = sizeof(char) * 75 * trans->cfg->base_params->num_of_queues; + + if (!trans_pcie->txq) + return -EAGAIN; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { + txq = &trans_pcie->txq[cnt]; + q = &txq->q; + pos += scnprintf(buf + pos, bufsz - pos, + "hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n", + cnt, q->read_ptr, q->write_ptr, + !!test_bit(cnt, trans_pcie->queue_used), + !!test_bit(cnt, trans_pcie->queue_stopped), + txq->need_update, txq->frozen, + (cnt == trans_pcie->cmd_queue ? " HCMD" : "")); + } + ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); + kfree(buf); + return ret; +} + +static ssize_t iwl_dbgfs_rx_queue_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_rxq *rxq = &trans_pcie->rxq; + char buf[256]; + int pos = 0; + const size_t bufsz = sizeof(buf); + + pos += scnprintf(buf + pos, bufsz - pos, "read: %u\n", + rxq->read); + pos += scnprintf(buf + pos, bufsz - pos, "write: %u\n", + rxq->write); + pos += scnprintf(buf + pos, bufsz - pos, "write_actual: %u\n", + rxq->write_actual); + pos += scnprintf(buf + pos, bufsz - pos, "need_update: %d\n", + rxq->need_update); + pos += scnprintf(buf + pos, bufsz - pos, "free_count: %u\n", + rxq->free_count); + if (rxq->rb_stts) { + pos += scnprintf(buf + pos, bufsz - pos, "closed_rb_num: %u\n", + le16_to_cpu(rxq->rb_stts->closed_rb_num) & 0x0FFF); + } else { + pos += scnprintf(buf + pos, bufsz - pos, + "closed_rb_num: Not Allocated\n"); + } + return simple_read_from_buffer(user_buf, count, ppos, buf, pos); +} + +static ssize_t iwl_dbgfs_interrupt_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct isr_statistics *isr_stats = &trans_pcie->isr_stats; + + int pos = 0; + char *buf; + int bufsz = 24 * 64; /* 24 items * 64 char per item */ + ssize_t ret; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos += scnprintf(buf + pos, bufsz - pos, + "Interrupt Statistics Report:\n"); + + pos += scnprintf(buf + pos, bufsz - pos, "HW Error:\t\t\t %u\n", + isr_stats->hw); + pos += scnprintf(buf + pos, bufsz - pos, "SW Error:\t\t\t %u\n", + isr_stats->sw); + if (isr_stats->sw || isr_stats->hw) { + pos += scnprintf(buf + pos, bufsz - pos, + "\tLast Restarting Code: 0x%X\n", + isr_stats->err_code); + } +#ifdef CONFIG_IWLWIFI_DEBUG + pos += scnprintf(buf + pos, bufsz - pos, "Frame transmitted:\t\t %u\n", + isr_stats->sch); + pos += scnprintf(buf + pos, bufsz - pos, "Alive interrupt:\t\t %u\n", + isr_stats->alive); +#endif + pos += scnprintf(buf + pos, bufsz - pos, + "HW RF KILL switch toggled:\t %u\n", isr_stats->rfkill); + + pos += scnprintf(buf + pos, bufsz - pos, "CT KILL:\t\t\t %u\n", + isr_stats->ctkill); + + pos += scnprintf(buf + pos, bufsz - pos, "Wakeup Interrupt:\t\t %u\n", + isr_stats->wakeup); + + pos += scnprintf(buf + pos, bufsz - pos, + "Rx command responses:\t\t %u\n", isr_stats->rx); + + pos += scnprintf(buf + pos, bufsz - pos, "Tx/FH interrupt:\t\t %u\n", + isr_stats->tx); + + pos += scnprintf(buf + pos, bufsz - pos, "Unexpected INTA:\t\t %u\n", + isr_stats->unhandled); + + ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); + kfree(buf); + return ret; +} + +static ssize_t iwl_dbgfs_interrupt_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct isr_statistics *isr_stats = &trans_pcie->isr_stats; + + char buf[8]; + int buf_size; + u32 reset_flag; + + memset(buf, 0, sizeof(buf)); + buf_size = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + if (sscanf(buf, "%x", &reset_flag) != 1) + return -EFAULT; + if (reset_flag == 0) + memset(isr_stats, 0, sizeof(*isr_stats)); + + return count; +} + +static ssize_t iwl_dbgfs_csr_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + char buf[8]; + int buf_size; + int csr; + + memset(buf, 0, sizeof(buf)); + buf_size = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + if (sscanf(buf, "%d", &csr) != 1) + return -EFAULT; + + iwl_pcie_dump_csr(trans); + + return count; +} + +static ssize_t iwl_dbgfs_fh_reg_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_trans *trans = file->private_data; + char *buf = NULL; + ssize_t ret; + + ret = iwl_dump_fh(trans, &buf); + if (ret < 0) + return ret; + if (!buf) + return -EINVAL; + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); + kfree(buf); + return ret; +} + +DEBUGFS_READ_WRITE_FILE_OPS(interrupt); +DEBUGFS_READ_FILE_OPS(fh_reg); +DEBUGFS_READ_FILE_OPS(rx_queue); +DEBUGFS_READ_FILE_OPS(tx_queue); +DEBUGFS_WRITE_FILE_OPS(csr); + +/* Create the debugfs files and directories */ +int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) +{ + struct dentry *dir = trans->dbgfs_dir; + + DEBUGFS_ADD_FILE(rx_queue, dir, S_IRUSR); + DEBUGFS_ADD_FILE(tx_queue, dir, S_IRUSR); + DEBUGFS_ADD_FILE(interrupt, dir, S_IWUSR | S_IRUSR); + DEBUGFS_ADD_FILE(csr, dir, S_IWUSR); + DEBUGFS_ADD_FILE(fh_reg, dir, S_IRUSR); + return 0; + +err: + IWL_ERR(trans, "failed to create the trans debugfs entry\n"); + return -ENOMEM; +} +#endif /*CONFIG_IWLWIFI_DEBUGFS */ + +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd) +{ + u32 cmdlen = 0; + int i; + + for (i = 0; i < IWL_NUM_OF_TBS; i++) + cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); + + return cmdlen; +} + +static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data, + int allocated_rb_nums) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int max_len = PAGE_SIZE << trans_pcie->rx_page_order; + struct iwl_rxq *rxq = &trans_pcie->rxq; + u32 i, r, j, rb_len = 0; + + spin_lock(&rxq->lock); + + r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; + + for (i = rxq->read, j = 0; + i != r && j < allocated_rb_nums; + i = (i + 1) & RX_QUEUE_MASK, j++) { + struct iwl_rx_mem_buffer *rxb = rxq->queue[i]; + struct iwl_fw_error_dump_rb *rb; + + dma_unmap_page(trans->dev, rxb->page_dma, max_len, + DMA_FROM_DEVICE); + + rb_len += sizeof(**data) + sizeof(*rb) + max_len; + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_RB); + (*data)->len = cpu_to_le32(sizeof(*rb) + max_len); + rb = (void *)(*data)->data; + rb->index = cpu_to_le32(i); + memcpy(rb->data, page_address(rxb->page), max_len); + /* remap the page for the free benefit */ + rxb->page_dma = dma_map_page(trans->dev, rxb->page, 0, + max_len, + DMA_FROM_DEVICE); + + *data = iwl_fw_error_next_data(*data); + } + + spin_unlock(&rxq->lock); + + return rb_len; +} +#define IWL_CSR_TO_DUMP (0x250) + +static u32 iwl_trans_pcie_dump_csr(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data) +{ + u32 csr_len = sizeof(**data) + IWL_CSR_TO_DUMP; + __le32 *val; + int i; + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_CSR); + (*data)->len = cpu_to_le32(IWL_CSR_TO_DUMP); + val = (void *)(*data)->data; + + for (i = 0; i < IWL_CSR_TO_DUMP; i += 4) + *val++ = cpu_to_le32(iwl_trans_pcie_read32(trans, i)); + + *data = iwl_fw_error_next_data(*data); + + return csr_len; +} + +static u32 iwl_trans_pcie_fh_regs_dump(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data) +{ + u32 fh_regs_len = FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND; + unsigned long flags; + __le32 *val; + int i; + + if (!iwl_trans_grab_nic_access(trans, &flags)) + return 0; + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FH_REGS); + (*data)->len = cpu_to_le32(fh_regs_len); + val = (void *)(*data)->data; + + for (i = FH_MEM_LOWER_BOUND; i < FH_MEM_UPPER_BOUND; i += sizeof(u32)) + *val++ = cpu_to_le32(iwl_trans_pcie_read32(trans, i)); + + iwl_trans_release_nic_access(trans, &flags); + + *data = iwl_fw_error_next_data(*data); + + return sizeof(**data) + fh_regs_len; +} + +static u32 +iwl_trans_pci_dump_marbh_monitor(struct iwl_trans *trans, + struct iwl_fw_error_dump_fw_mon *fw_mon_data, + u32 monitor_len) +{ + u32 buf_size_in_dwords = (monitor_len >> 2); + u32 *buffer = (u32 *)fw_mon_data->data; + unsigned long flags; + u32 i; + + if (!iwl_trans_grab_nic_access(trans, &flags)) + return 0; + + iwl_write_prph_no_grab(trans, MON_DMARB_RD_CTL_ADDR, 0x1); + for (i = 0; i < buf_size_in_dwords; i++) + buffer[i] = iwl_read_prph_no_grab(trans, + MON_DMARB_RD_DATA_ADDR); + iwl_write_prph_no_grab(trans, MON_DMARB_RD_CTL_ADDR, 0x0); + + iwl_trans_release_nic_access(trans, &flags); + + return monitor_len; +} + +static u32 +iwl_trans_pcie_dump_monitor(struct iwl_trans *trans, + struct iwl_fw_error_dump_data **data, + u32 monitor_len) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 len = 0; + + if ((trans_pcie->fw_mon_page && + trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) || + trans->dbg_dest_tlv) { + struct iwl_fw_error_dump_fw_mon *fw_mon_data; + u32 base, write_ptr, wrap_cnt; + + /* If there was a dest TLV - use the values from there */ + if (trans->dbg_dest_tlv) { + write_ptr = + le32_to_cpu(trans->dbg_dest_tlv->write_ptr_reg); + wrap_cnt = le32_to_cpu(trans->dbg_dest_tlv->wrap_count); + base = le32_to_cpu(trans->dbg_dest_tlv->base_reg); + } else { + base = MON_BUFF_BASE_ADDR; + write_ptr = MON_BUFF_WRPTR; + wrap_cnt = MON_BUFF_CYCLE_CNT; + } + + (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FW_MONITOR); + fw_mon_data = (void *)(*data)->data; + fw_mon_data->fw_mon_wr_ptr = + cpu_to_le32(iwl_read_prph(trans, write_ptr)); + fw_mon_data->fw_mon_cycle_cnt = + cpu_to_le32(iwl_read_prph(trans, wrap_cnt)); + fw_mon_data->fw_mon_base_ptr = + cpu_to_le32(iwl_read_prph(trans, base)); + + len += sizeof(**data) + sizeof(*fw_mon_data); + if (trans_pcie->fw_mon_page) { + /* + * The firmware is now asserted, it won't write anything + * to the buffer. CPU can take ownership to fetch the + * data. The buffer will be handed back to the device + * before the firmware will be restarted. + */ + dma_sync_single_for_cpu(trans->dev, + trans_pcie->fw_mon_phys, + trans_pcie->fw_mon_size, + DMA_FROM_DEVICE); + memcpy(fw_mon_data->data, + page_address(trans_pcie->fw_mon_page), + trans_pcie->fw_mon_size); + + monitor_len = trans_pcie->fw_mon_size; + } else if (trans->dbg_dest_tlv->monitor_mode == SMEM_MODE) { + /* + * Update pointers to reflect actual values after + * shifting + */ + base = iwl_read_prph(trans, base) << + trans->dbg_dest_tlv->base_shift; + iwl_trans_read_mem(trans, base, fw_mon_data->data, + monitor_len / sizeof(u32)); + } else if (trans->dbg_dest_tlv->monitor_mode == MARBH_MODE) { + monitor_len = + iwl_trans_pci_dump_marbh_monitor(trans, + fw_mon_data, + monitor_len); + } else { + /* Didn't match anything - output no monitor data */ + monitor_len = 0; + } + + len += monitor_len; + (*data)->len = cpu_to_le32(monitor_len + sizeof(*fw_mon_data)); + } + + return len; +} + +static struct iwl_trans_dump_data +*iwl_trans_pcie_dump_data(struct iwl_trans *trans, + const struct iwl_fw_dbg_trigger_tlv *trigger) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_fw_error_dump_data *data; + struct iwl_txq *cmdq = &trans_pcie->txq[trans_pcie->cmd_queue]; + struct iwl_fw_error_dump_txcmd *txcmd; + struct iwl_trans_dump_data *dump_data; + u32 len, num_rbs; + u32 monitor_len; + int i, ptr; + bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status); + + /* transport dump header */ + len = sizeof(*dump_data); + + /* host commands */ + len += sizeof(*data) + + cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); + + /* FW monitor */ + if (trans_pcie->fw_mon_page) { + len += sizeof(*data) + sizeof(struct iwl_fw_error_dump_fw_mon) + + trans_pcie->fw_mon_size; + monitor_len = trans_pcie->fw_mon_size; + } else if (trans->dbg_dest_tlv) { + u32 base, end; + + base = le32_to_cpu(trans->dbg_dest_tlv->base_reg); + end = le32_to_cpu(trans->dbg_dest_tlv->end_reg); + + base = iwl_read_prph(trans, base) << + trans->dbg_dest_tlv->base_shift; + end = iwl_read_prph(trans, end) << + trans->dbg_dest_tlv->end_shift; + + /* Make "end" point to the actual end */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000 || + trans->dbg_dest_tlv->monitor_mode == MARBH_MODE) + end += (1 << trans->dbg_dest_tlv->end_shift); + monitor_len = end - base; + len += sizeof(*data) + sizeof(struct iwl_fw_error_dump_fw_mon) + + monitor_len; + } else { + monitor_len = 0; + } + + if (trigger && (trigger->mode & IWL_FW_DBG_TRIGGER_MONITOR_ONLY)) { + dump_data = vzalloc(len); + if (!dump_data) + return NULL; + + data = (void *)dump_data->data; + len = iwl_trans_pcie_dump_monitor(trans, &data, monitor_len); + dump_data->len = len; + + return dump_data; + } + + /* CSR registers */ + len += sizeof(*data) + IWL_CSR_TO_DUMP; + + /* FH registers */ + len += sizeof(*data) + (FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND); + + if (dump_rbs) { + /* RBs */ + num_rbs = le16_to_cpu(ACCESS_ONCE( + trans_pcie->rxq.rb_stts->closed_rb_num)) + & 0x0FFF; + num_rbs = (num_rbs - trans_pcie->rxq.read) & RX_QUEUE_MASK; + len += num_rbs * (sizeof(*data) + + sizeof(struct iwl_fw_error_dump_rb) + + (PAGE_SIZE << trans_pcie->rx_page_order)); + } + + dump_data = vzalloc(len); + if (!dump_data) + return NULL; + + len = 0; + data = (void *)dump_data->data; + data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD); + txcmd = (void *)data->data; + spin_lock_bh(&cmdq->lock); + ptr = cmdq->q.write_ptr; + for (i = 0; i < cmdq->q.n_window; i++) { + u8 idx = get_cmd_index(&cmdq->q, ptr); + u32 caplen, cmdlen; + + cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]); + caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); + + if (cmdlen) { + len += sizeof(*txcmd) + caplen; + txcmd->cmdlen = cpu_to_le32(cmdlen); + txcmd->caplen = cpu_to_le32(caplen); + memcpy(txcmd->data, cmdq->entries[idx].cmd, caplen); + txcmd = (void *)((u8 *)txcmd->data + caplen); + } + + ptr = iwl_queue_dec_wrap(ptr); + } + spin_unlock_bh(&cmdq->lock); + + data->len = cpu_to_le32(len); + len += sizeof(*data); + data = iwl_fw_error_next_data(data); + + len += iwl_trans_pcie_dump_csr(trans, &data); + len += iwl_trans_pcie_fh_regs_dump(trans, &data); + if (dump_rbs) + len += iwl_trans_pcie_dump_rbs(trans, &data, num_rbs); + + len += iwl_trans_pcie_dump_monitor(trans, &data, monitor_len); + + dump_data->len = len; + + return dump_data; +} + +static const struct iwl_trans_ops trans_ops_pcie = { + .start_hw = iwl_trans_pcie_start_hw, + .op_mode_leave = iwl_trans_pcie_op_mode_leave, + .fw_alive = iwl_trans_pcie_fw_alive, + .start_fw = iwl_trans_pcie_start_fw, + .stop_device = iwl_trans_pcie_stop_device, + + .d3_suspend = iwl_trans_pcie_d3_suspend, + .d3_resume = iwl_trans_pcie_d3_resume, + + .send_cmd = iwl_trans_pcie_send_hcmd, + + .tx = iwl_trans_pcie_tx, + .reclaim = iwl_trans_pcie_reclaim, + + .txq_disable = iwl_trans_pcie_txq_disable, + .txq_enable = iwl_trans_pcie_txq_enable, + + .wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty, + .freeze_txq_timer = iwl_trans_pcie_freeze_txq_timer, + .block_txq_ptrs = iwl_trans_pcie_block_txq_ptrs, + + .write8 = iwl_trans_pcie_write8, + .write32 = iwl_trans_pcie_write32, + .read32 = iwl_trans_pcie_read32, + .read_prph = iwl_trans_pcie_read_prph, + .write_prph = iwl_trans_pcie_write_prph, + .read_mem = iwl_trans_pcie_read_mem, + .write_mem = iwl_trans_pcie_write_mem, + .configure = iwl_trans_pcie_configure, + .set_pmi = iwl_trans_pcie_set_pmi, + .grab_nic_access = iwl_trans_pcie_grab_nic_access, + .release_nic_access = iwl_trans_pcie_release_nic_access, + .set_bits_mask = iwl_trans_pcie_set_bits_mask, + + .ref = iwl_trans_pcie_ref, + .unref = iwl_trans_pcie_unref, + + .dump_data = iwl_trans_pcie_dump_data, +}; + +struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, + const struct pci_device_id *ent, + const struct iwl_cfg *cfg) +{ + struct iwl_trans_pcie *trans_pcie; + struct iwl_trans *trans; + u16 pci_cmd; + int ret; + + trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), + &pdev->dev, cfg, &trans_ops_pcie, 0); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->max_skb_frags = IWL_PCIE_MAX_FRAGS; + + trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + trans_pcie->trans = trans; + spin_lock_init(&trans_pcie->irq_lock); + spin_lock_init(&trans_pcie->reg_lock); + spin_lock_init(&trans_pcie->ref_lock); + mutex_init(&trans_pcie->mutex); + init_waitqueue_head(&trans_pcie->ucode_write_waitq); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); + if (!trans_pcie->tso_hdr_page) { + ret = -ENOMEM; + goto out_no_pci; + } + + ret = pci_enable_device(pdev); + if (ret) + goto out_no_pci; + + if (!cfg->base_params->pcie_l1_allowed) { + /* + * W/A - seems to solve weird behavior. We need to remove this + * if we don't want to stay in L1 all the time. This wastes a + * lot of power. + */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1 | + PCIE_LINK_STATE_CLKPM); + } + + pci_set_master(pdev); + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36)); + if (ret) { + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, + DMA_BIT_MASK(32)); + /* both attempts failed: */ + if (ret) { + dev_err(&pdev->dev, "No suitable DMA available\n"); + goto out_pci_disable_device; + } + } + + ret = pci_request_regions(pdev, DRV_NAME); + if (ret) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + goto out_pci_disable_device; + } + + trans_pcie->hw_base = pci_ioremap_bar(pdev, 0); + if (!trans_pcie->hw_base) { + dev_err(&pdev->dev, "pci_ioremap_bar failed\n"); + ret = -ENODEV; + goto out_pci_release_regions; + } + + /* We disable the RETRY_TIMEOUT register (0x41) to keep + * PCI Tx retries from interfering with C3 CPU state */ + pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); + + trans->dev = &pdev->dev; + trans_pcie->pci_dev = pdev; + iwl_disable_interrupts(trans); + + ret = pci_enable_msi(pdev); + if (ret) { + dev_err(&pdev->dev, "pci_enable_msi failed(0X%x)\n", ret); + /* enable rfkill interrupt: hw bug w/a */ + pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd); + if (pci_cmd & PCI_COMMAND_INTX_DISABLE) { + pci_cmd &= ~PCI_COMMAND_INTX_DISABLE; + pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); + } + } + + trans->hw_rev = iwl_read32(trans, CSR_HW_REV); + /* + * In the 8000 HW family the format of the 4 bytes of CSR_HW_REV have + * changed, and now the revision step also includes bit 0-1 (no more + * "dash" value). To keep hw_rev backwards compatible - we'll store it + * in the old format. + */ + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + unsigned long flags; + + trans->hw_rev = (trans->hw_rev & 0xfff0) | + (CSR_HW_REV_STEP(trans->hw_rev << 2) << 2); + + ret = iwl_pcie_prepare_card_hw(trans); + if (ret) { + IWL_WARN(trans, "Exit HW not ready\n"); + goto out_pci_disable_msi; + } + + /* + * in-order to recognize C step driver should read chip version + * id located at the AUX bus MISC address space. + */ + iwl_set_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_INIT_DONE); + udelay(2); + + ret = iwl_poll_bit(trans, CSR_GP_CNTRL, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, + 25000); + if (ret < 0) { + IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n"); + goto out_pci_disable_msi; + } + + if (iwl_trans_grab_nic_access(trans, &flags)) { + u32 hw_step; + + hw_step = iwl_read_prph_no_grab(trans, WFPM_CTRL_REG); + hw_step |= ENABLE_WFPM; + iwl_write_prph_no_grab(trans, WFPM_CTRL_REG, hw_step); + hw_step = iwl_read_prph_no_grab(trans, AUX_MISC_REG); + hw_step = (hw_step >> HW_STEP_LOCATION_BITS) & 0xF; + if (hw_step == 0x3) + trans->hw_rev = (trans->hw_rev & 0xFFFFFFF3) | + (SILICON_C_STEP << 2); + iwl_trans_release_nic_access(trans, &flags); + } + } + + trans->hw_id = (pdev->device << 16) + pdev->subsystem_device; + snprintf(trans->hw_id_str, sizeof(trans->hw_id_str), + "PCI ID: 0x%04X:0x%04X", pdev->device, pdev->subsystem_device); + + /* Initialize the wait queue for commands */ + init_waitqueue_head(&trans_pcie->wait_command_queue); + + ret = iwl_pcie_alloc_ict(trans); + if (ret) + goto out_pci_disable_msi; + + ret = request_threaded_irq(pdev->irq, iwl_pcie_isr, + iwl_pcie_irq_handler, + IRQF_SHARED, DRV_NAME, trans); + if (ret) { + IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq); + goto out_free_ict; + } + + trans_pcie->inta_mask = CSR_INI_SET_MASK; + + return trans; + +out_free_ict: + iwl_pcie_free_ict(trans); +out_pci_disable_msi: + pci_disable_msi(pdev); +out_pci_release_regions: + pci_release_regions(pdev); +out_pci_disable_device: + pci_disable_device(pdev); +out_no_pci: + free_percpu(trans_pcie->tso_hdr_page); + iwl_trans_free(trans); + return ERR_PTR(ret); +} diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 1de80a8e357a0..840c47d8e2ce4 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -7,6 +7,7 @@ * * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -881,9 +883,16 @@ static void iwl_pcie_apply_destination(struct iwl_trans *trans) if (dest->monitor_mode == EXTERNAL_MODE && trans_pcie->fw_mon_size) { iwl_write_prph(trans, le32_to_cpu(dest->base_reg), trans_pcie->fw_mon_phys >> dest->base_shift); - iwl_write_prph(trans, le32_to_cpu(dest->end_reg), - (trans_pcie->fw_mon_phys + - trans_pcie->fw_mon_size) >> dest->end_shift); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size - 256) >> + dest->end_shift); + else + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> + dest->end_shift); } } From 2e21928b854d0201e5af8413daaa8dfe1b32690d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Jan 2016 16:38:10 -0500 Subject: [PATCH 1711/2091] locks: fix unlock when fcntl_setlk races with a close [ Upstream commit 7f3697e24dc3820b10f445a4a7d914fc356012d1 ] Dmitry reported that he was able to reproduce the WARN_ON_ONCE that fires in locks_free_lock_context when the flc_posix list isn't empty. The problem turns out to be that we're basically rebuilding the file_lock from scratch in fcntl_setlk when we discover that the setlk has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END, then we may end up with fl_start and fl_end values that differ from when the lock was initially set, if the file position or length of the file has changed in the interim. Fix this by just reusing the same lock request structure, and simply override fl_type value with F_UNLCK as appropriate. That ensures that we really are unlocking the lock that was initially set. While we're there, make sure that we do pop a WARN_ON_ONCE if the removal ever fails. Also return -EBADF in this event, since that's what we would have returned if the close had happened earlier. Cc: Alexander Viro Cc: Fixes: c293621bbf67 (stale POSIX lock handling) Reported-by: Dmitry Vyukov Signed-off-by: Jeff Layton Acked-by: "J. Bruce Fields" Signed-off-by: Sasha Levin --- fs/locks.c | 51 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index d3d558ba4da79..8501eecb2af0c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2154,7 +2154,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2196,19 +2195,22 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - /* - * we need that spin_lock here - it prevents reordering between - * update of i_flctx->flc_posix and check for it done in close(). - * rcu_read_lock() wouldn't do. - */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -2294,7 +2296,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2336,14 +2337,22 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; From 9d8246cd71ec50f8aa57c1ea74d8aff314908043 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Jan 2016 16:34:05 -0600 Subject: [PATCH 1712/2091] scsi: add Synology to 1024 sector blacklist [ Upstream commit 9055082fb100cc66e20c048251d05159f5f2cfba ] Another iscsi target that cannot handle large IOs, but does not tell us a limit. The Synology iSCSI targets report: Block limits VPD page (SBC): Write same no zero (WSNZ): 0 Maximum compare and write length: 0 blocks Optimal transfer length granularity: 0 blocks Maximum transfer length: 0 blocks Optimal transfer length: 0 blocks Maximum prefetch length: 0 blocks Maximum unmap LBA count: 0 Maximum unmap block descriptor count: 0 Optimal unmap granularity: 0 Unmap granularity alignment valid: 0 Unmap granularity alignment: 0 Maximum write same length: 0x0 blocks and the size of the command it can handle seems to depend on how much memory it can allocate at the time. This results in IO errors when handling large IOs. This patch just has us use the old 1024 default sectors for this target by adding it to the scsi blacklist. We do not have good contacs with this vendors, so I have not been able to try and fix on their side. I have posted this a long while back, but it was not merged. This version just fixes it up for merge/patch failures in the original version. Reported-by: Ancoron Luciferis Reported-by: Michael Meyers Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen Cc: # 4.1+ Signed-off-by: Sasha Levin --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 9f77d23239a26..64ed88a67e6e9 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -227,6 +227,7 @@ static struct { {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "", NULL, BLIST_SPARSELUN}, {"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024}, + {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024}, {"QUANTUM", "XP34301", "1071", BLIST_NOTQ}, {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN}, {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN}, From 27f8627c3a656409f8594e3d976d03ab445d7fcc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Jan 2016 19:07:55 -0500 Subject: [PATCH 1713/2091] dm snapshot: fix hung bios when copy error occurs [ Upstream commit 385277bfb57faac44e92497104ba542cdd82d5fe ] When there is an error copying a chunk dm-snapshot can incorrectly hold associated bios indefinitely, resulting in hung IO. The function copy_callback sets pe->error if there was error copying the chunk, and then calls complete_exception. complete_exception calls pending_complete on error, otherwise it calls commit_exception with commit_callback (and commit_callback calls complete_exception). The persistent exception store (dm-snap-persistent.c) assumes that calls to prepare_exception and commit_exception are paired. persistent_prepare_exception increases ps->pending_count and persistent_commit_exception decreases it. If there is a copy error, persistent_prepare_exception is called but persistent_commit_exception is not. This results in the variable ps->pending_count never returning to zero and that causes some pending exceptions (and their associated bios) to be held forever. Fix this by unconditionally calling commit_exception regardless of whether the copy was successful. A new "valid" parameter is added to commit_exception -- when the copy fails this parameter is set to zero so that the chunk that failed to copy (and all following chunks) is not recorded in the snapshot store. Also, remove commit_callback now that it is merely a wrapper around pending_complete. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/md/dm-exception-store.h | 2 +- drivers/md/dm-snap-persistent.c | 5 ++++- drivers/md/dm-snap-transient.c | 4 ++-- drivers/md/dm-snap.c | 20 +++++--------------- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 0b2536247cf55..84e27708ad973 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -70,7 +70,7 @@ struct dm_exception_store_type { * Update the metadata with this exception. */ void (*commit_exception) (struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 808b8419bc48d..9feb894e5565a 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -694,7 +694,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store, } static void persistent_commit_exception(struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context) { @@ -703,6 +703,9 @@ static void persistent_commit_exception(struct dm_exception_store *store, struct core_exception ce; struct commit_callback *cb; + if (!valid) + ps->valid = 0; + ce.old_chunk = e->old_chunk; ce.new_chunk = e->new_chunk; write_exception(ps, ps->current_committed++, &ce); diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 1ce9a2586e413..31439d53cf7e8 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -52,12 +52,12 @@ static int transient_prepare_exception(struct dm_exception_store *store, } static void transient_commit_exception(struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context) { /* Just succeed */ - callback(callback_context, 1); + callback(callback_context, valid); } static void transient_usage(struct dm_exception_store *store, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f83a0f3fc3656..11ec9d2a27df2 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1428,8 +1428,9 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) dm_table_event(s->ti->table); } -static void pending_complete(struct dm_snap_pending_exception *pe, int success) +static void pending_complete(void *context, int success) { + struct dm_snap_pending_exception *pe = context; struct dm_exception *e; struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; @@ -1500,24 +1501,13 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) free_pending_exception(pe); } -static void commit_callback(void *context, int success) -{ - struct dm_snap_pending_exception *pe = context; - - pending_complete(pe, success); -} - static void complete_exception(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; - if (unlikely(pe->copy_error)) - pending_complete(pe, 0); - - else - /* Update the metadata if we are persistent */ - s->store->type->commit_exception(s->store, &pe->e, - commit_callback, pe); + /* Update the metadata if we are persistent */ + s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error, + pending_complete, pe); } /* From 4e623240d3054ca29fbb294d15a24b2ed971a413 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 16 Dec 2015 21:59:56 +0100 Subject: [PATCH 1714/2091] uml: fix hostfs mknod() [ Upstream commit 9f2dfda2f2f1c6181c3732c16b85c59ab2d195e0 ] An inverted return value check in hostfs_mknod() caused the function to return success after handling it as an error (and cleaning up). It resulted in the following segfault when trying to bind() a named unix socket: Pid: 198, comm: a.out Not tainted 4.4.0-rc4 RIP: 0033:[<0000000061077df6>] RSP: 00000000daae5d60 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 000000006092a460 RCX: 00000000dfc54208 RDX: 0000000061073ef1 RSI: 0000000000000070 RDI: 00000000e027d600 RBP: 00000000daae5de0 R08: 00000000da980ac0 R09: 0000000000000000 R10: 0000000000000003 R11: 00007fb1ae08f72a R12: 0000000000000000 R13: 000000006092a460 R14: 00000000daaa97c0 R15: 00000000daaa9a88 Kernel panic - not syncing: Kernel mode fault at addr 0x40, ip 0x61077df6 CPU: 0 PID: 198 Comm: a.out Not tainted 4.4.0-rc4 #1 Stack: e027d620 dfc54208 0000006f da981398 61bee000 0000c1ed daae5de0 0000006e e027d620 dfcd4208 00000005 6092a460 Call Trace: [<60dedc67>] SyS_bind+0xf7/0x110 [<600587be>] handle_syscall+0x7e/0x80 [<60066ad7>] userspace+0x3e7/0x4e0 [<6006321f>] ? save_registers+0x1f/0x40 [<6006c88e>] ? arch_prctl+0x1be/0x1f0 [<60054985>] fork_handler+0x85/0x90 Let's also get rid of the "cosmic ray protection" while we're at it. Fixes: e9193059b1b3 "hostfs: fix races in dentry_name() and inode_name()" Signed-off-by: Vegard Nossum Cc: Jeff Dike Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- fs/hostfs/hostfs_kern.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index de2d6245e9fad..f895a85d93048 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -730,15 +730,13 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, init_special_inode(inode, mode, dev); err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); - if (!err) + if (err) goto out_free; err = read_name(inode, name); __putname(name); if (err) goto out_put; - if (err) - goto out_put; d_instantiate(dentry, inode); return 0; From 498b346868790663ef443951bb06a30a57b64b32 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 18 Dec 2015 21:28:53 +0100 Subject: [PATCH 1715/2091] uml: flush stdout before forking [ Upstream commit 0754fb298f2f2719f0393491d010d46cfb25d043 ] I was seeing some really weird behaviour where piping UML's output somewhere would cause output to get duplicated: $ ./vmlinux | head -n 40 Checking that ptrace can change system call numbers...Core dump limits : soft - 0 hard - NONE OK Checking syscall emulation patch for ptrace...Core dump limits : soft - 0 hard - NONE OK Checking advanced syscall emulation patch for ptrace...Core dump limits : soft - 0 hard - NONE OK Core dump limits : soft - 0 hard - NONE This is because these tests do a fork() which duplicates the non-empty stdout buffer, then glibc flushes the duplicated buffer as each child exits. A simple workaround is to flush before forking. Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/os-Linux/start_up.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 47f1ff056a54f..22a358ef1b0cd 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -94,6 +94,8 @@ static int start_ptraced_child(void) { int pid, n, status; + fflush(stdout); + pid = fork(); if (pid == 0) ptrace_child(); From ccafc9df447a3de7b5b88d246404d97f70f80d6f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 8 Jan 2016 08:56:51 +1000 Subject: [PATCH 1716/2091] drm/nouveau/kms: take mode_config mutex in connector hotplug path [ Upstream commit 0a882cadbc63fd2da3994af7115b4ada2fcbd638 ] fdo#93634 Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_connector.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 3162040bc3148..05490ef5a2aa4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -969,10 +969,13 @@ nouveau_connector_hotplug(struct nvif_notify *notify) NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name); + mutex_lock(&drm->dev->mode_config.mutex); if (plugged) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); else drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + mutex_unlock(&drm->dev->mode_config.mutex); + drm_helper_hpd_irq_event(connector->dev); } From 2826e0944f3c7a673da95fd226c8a752254c3e6a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jan 2016 13:39:22 +0100 Subject: [PATCH 1717/2091] s390: fix normalization bug in exception table sorting [ Upstream commit bcb7825a77f41c7dd91da6f7ac10b928156a322e ] The normalization pass in the sorting routine of the relative exception table serves two purposes: - it ensures that the address fields of the exception table entries are fully ordered, so that no ambiguities arise between entries with identical instruction offsets (i.e., when two instructions that are exactly 8 bytes apart each have an exception table entry associated with them) - it ensures that the offsets of both the instruction and the fixup fields of each entry are relative to their final location after sorting. Commit eb608fb366de ("s390/exceptions: switch to relative exception table entries") ported the relative exception table format from x86, but modified the sorting routine to only normalize the instruction offset field and not the fixup offset field. The result is that the fixup offset of each entry will be relative to the original location of the entry before sorting, likely leading to crashes when those entries are dereferenced. Fixes: eb608fb366de ("s390/exceptions: switch to relative exception table entries") Signed-off-by: Ard Biesheuvel Cc: Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/mm/extable.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 4d1ee88864e8a..18c8b819b0aa9 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start, int i; /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn += i; + p->fixup += i + 4; + } sort(start, finish - start, sizeof(*start), cmp_ex, NULL); /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn -= i; + p->fixup -= i + 4; + } } #ifdef CONFIG_MODULES From 848235ac48d900d7c5e9c6cfb190d290cdb87a62 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Jan 2016 07:03:44 +1100 Subject: [PATCH 1718/2091] xfs: inode recovery readahead can race with inode buffer creation [ Upstream commit b79f4a1c68bb99152d0785ee4ea3ab4396cdacc6 ] When we do inode readahead in log recovery, we do can do the readahead before we've replayed the icreate transaction that stamps the buffer with inode cores. The inode readahead verifier catches this and marks the buffer as !done to indicate that it doesn't yet contain valid inodes. In adding buffer error notification (i.e. setting b_error = -EIO at the same time as as we clear the done flag) to such a readahead verifier failure, we can then get subsequent inode recovery failing with this error: XFS (dm-0): metadata I/O error: block 0xa00060 ("xlog_recover_do..(read#2)") error 5 numblks 32 This occurs when readahead completion races with icreate item replay such as: inode readahead find buffer lock buffer submit RA io .... icreate recovery xfs_trans_get_buffer find buffer lock buffer ..... fails verifier clear XBF_DONE set bp->b_error = -EIO release and unlock buffer icreate initialises buffer marks buffer as done adds buffer to delayed write queue releases buffer At this point, we have an initialised inode buffer that is up to date but has an -EIO state registered against it. When we finally get to recovering an inode in that buffer: inode item recovery xfs_trans_read_buffer find buffer lock buffer sees XBF_DONE is set, returns buffer sees bp->b_error is set fail log recovery! Essentially, we need xfs_trans_get_buf_map() to clear the error status of the buffer when doing a lookup. This function returns uninitialised buffers, so the buffer returned can not be in an error state and none of the code that uses this function expects b_error to be set on return. Indeed, there is an ASSERT(!bp->b_error); in the transaction case in xfs_trans_get_buf_map() that would have caught this if log recovery used transactions.... This patch firstly changes the inode readahead failure to set -EIO on the buffer, and secondly changes xfs_buf_get_map() to never return a buffer with an error state set so this first change doesn't cause unexpected log recovery failures. cc: # 3.12 - current Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_inode_buf.c | 12 +++++++----- fs/xfs/xfs_buf.c | 7 +++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 002b6b3a19885..8c68d3502f04c 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -63,11 +63,12 @@ xfs_inobp_check( * has not had the inode cores stamped into it. Hence for readahead, the buffer * may be potentially invalid. * - * If the readahead buffer is invalid, we don't want to mark it with an error, - * but we do want to clear the DONE status of the buffer so that a followup read - * will re-read it from disk. This will ensure that we don't get an unnecessary - * warnings during log recovery and we don't get unnecssary panics on debug - * kernels. + * If the readahead buffer is invalid, we need to mark it with an error and + * clear the DONE status of the buffer so that a followup read will re-read it + * from disk. We don't report the error otherwise to avoid warnings during log + * recovery and we don't get unnecssary panics on debug kernels. We use EIO here + * because all we want to do is say readahead failed; there is no-one to report + * the error to, so this will distinguish it from a non-ra verifier failure. */ static void xfs_inode_buf_verify( @@ -95,6 +96,7 @@ xfs_inode_buf_verify( XFS_RANDOM_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; + xfs_buf_ioerror(bp, -EIO); return; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1790b00bea7a7..82938ac05ba1d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -605,6 +605,13 @@ xfs_buf_get_map( } } + /* + * Clear b_error if this is a lookup from a caller that doesn't expect + * valid data to be found in the buffer. + */ + if (!(flags & XBF_READ)) + xfs_buf_ioerror(bp, 0); + XFS_STATS_INC(xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; From e52b0623ee970dfa58d4af089329e193e59be918 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Jan 2016 07:04:01 +1100 Subject: [PATCH 1719/2091] xfs: handle dquot buffer readahead in log recovery correctly [ Upstream commit 7d6a13f023567d573ac362502bb702eda716e654 ] When we do dquot readahead in log recovery, we do not use a verifier as the underlying buffer may not have dquots in it. e.g. the allocation operation hasn't yet been replayed. Hence we do not want to fail recovery because we detect an operation to be replayed has not been run yet. This problem was addressed for inodes in commit d891400 ("xfs: inode buffers may not be valid during recovery readahead") but the problem was not recognised to exist for dquots and their buffers as the dquot readahead did not have a verifier. The result of not using a verifier is that when the buffer is then next read to replay a dquot modification, the dquot buffer verifier will only be attached to the buffer if *readahead is not complete*. Hence we can read the buffer, replay the dquot changes and then add it to the delwri submission list without it having a verifier attached to it. This then generates warnings in xfs_buf_ioapply(), which catches and warns about this case. Fix this and make it handle the same readahead verifier error cases as for inode buffers by adding a new readahead verifier that has a write operation as well as a read operation that marks the buffer as not done if any corruption is detected. Also make sure we don't run readahead if the dquot buffer has been marked as cancelled by recovery. This will result in readahead either succeeding and the buffer having a valid write verifier, or readahead failing and the buffer state requiring the subsequent read to resubmit the IO with the new verifier. In either case, this will result in the buffer always ending up with a valid write verifier on it. Note: we also need to fix the inode buffer readahead error handling to mark the buffer with EIO. Brian noticed the code I copied from there wrong during review, so fix it at the same time. Add comments linking the two functions that handle readahead verifier errors together so we don't forget this behavioural link in future. cc: # 3.12 - current Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_dquot_buf.c | 35 ++++++++++++++++++++++++++++------ fs/xfs/libxfs/xfs_inode_buf.c | 2 ++ fs/xfs/libxfs/xfs_quota_defs.h | 2 +- fs/xfs/libxfs/xfs_shared.h | 1 + fs/xfs/xfs_log_recover.c | 9 +++++++-- 5 files changed, 40 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 6fbf2d853a54e..48aff071591d2 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -54,7 +54,7 @@ xfs_dqcheck( xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ uint flags, - char *str) + const char *str) { xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; int errs = 0; @@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc( STATIC bool xfs_dquot_buf_verify( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + int warn) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; xfs_dqid_t id = 0; @@ -240,8 +241,7 @@ xfs_dquot_buf_verify( if (i == 0) id = be32_to_cpu(ddq->d_id); - error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, - "xfs_dquot_buf_verify"); + error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); if (error) return false; } @@ -256,13 +256,32 @@ xfs_dquot_buf_read_verify( if (!xfs_dquot_buf_verify_crc(mp, bp)) xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dquot_buf_verify(mp, bp)) + else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) xfs_buf_ioerror(bp, -EFSCORRUPTED); if (bp->b_error) xfs_verifier_error(bp); } +/* + * readahead errors are silent and simply leave the buffer as !done so a real + * read will then be run with the xfs_dquot_buf_ops verifier. See + * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than + * reporting the failure. + */ +static void +xfs_dquot_buf_readahead_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify_crc(mp, bp) || + !xfs_dquot_buf_verify(mp, bp, 0)) { + xfs_buf_ioerror(bp, -EIO); + bp->b_flags &= ~XBF_DONE; + } +} + /* * we don't calculate the CRC here as that is done when the dquot is flushed to * the buffer after the update is done. This ensures that the dquot in the @@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if (!xfs_dquot_buf_verify(mp, bp)) { + if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; @@ -286,3 +305,7 @@ const struct xfs_buf_ops xfs_dquot_buf_ops = { .verify_write = xfs_dquot_buf_write_verify, }; +const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { + .verify_read = xfs_dquot_buf_readahead_verify, + .verify_write = xfs_dquot_buf_write_verify, +}; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8c68d3502f04c..7da6d0b2c2edb 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -69,6 +69,8 @@ xfs_inobp_check( * recovery and we don't get unnecssary panics on debug kernels. We use EIO here * because all we want to do is say readahead failed; there is no-one to report * the error to, so this will distinguish it from a non-ra verifier failure. + * Changes to this readahead error behavour also need to be reflected in + * xfs_dquot_buf_readahead_verify(). */ static void xfs_inode_buf_verify( diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index 1b0a08379759d..f51078f1e92ad 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, char *str); + xfs_dqid_t id, uint type, uint flags, const char *str); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 8dda4b321343b..a3472a38efd20 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; extern const struct xfs_buf_ops xfs_dquot_buf_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a5d03396dda01..1114afdd5a6bb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3154,6 +3154,7 @@ xlog_recover_dquot_ra_pass2( struct xfs_disk_dquot *recddq; struct xfs_dq_logformat *dq_f; uint type; + int len; if (mp->m_qflags == 0) @@ -3174,8 +3175,12 @@ xlog_recover_dquot_ra_pass2( ASSERT(dq_f); ASSERT(dq_f->qlf_len == 1); - xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, - XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL); + len = XFS_FSB_TO_BB(mp, dq_f->qlf_len); + if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0)) + return; + + xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len, + &xfs_dquot_buf_ra_ops); } STATIC void From 27853751e20c1d4c3d060210d38cc0b63ac0c991 Mon Sep 17 00:00:00 2001 From: Roman Volkov Date: Fri, 1 Jan 2016 16:24:41 +0300 Subject: [PATCH 1720/2091] clocksource/drivers/vt8500: Increase the minimum delta [ Upstream commit f9eccf24615672896dc13251410c3f2f33a14f95 ] The vt8500 clocksource driver declares itself as capable to handle the minimum delay of 4 cycles by passing the value into clockevents_config_and_register(). The vt8500_timer_set_next_event() requires the passed cycles value to be at least 16. The impact is that userspace hangs in nanosleep() calls with small delay intervals. This problem is reproducible in Linux 4.2 starting from: c6eb3f70d448 ('hrtimer: Get rid of hrtimer softirq') From Russell King, more detailed explanation: "It's a speciality of the StrongARM/PXA hardware. It takes a certain number of OSCR cycles for the value written to hit the compare registers. So, if a very small delta is written (eg, the compare register is written with a value of OSCR + 1), the OSCR will have incremented past this value before it hits the underlying hardware. The result is, that you end up waiting a very long time for the OSCR to wrap before the event fires. So, we introduce a check in set_next_event() to detect this and return -ETIME if the calculated delta is too small, which causes the generic clockevents code to retry after adding the min_delta specified in clockevents_config_and_register() to the current time value. min_delta must be sufficient that we don't re-trip the -ETIME check - if we do, we will return -ETIME, forward the next event time, try to set it, return -ETIME again, and basically lock the system up. So, min_delta must be larger than the check inside set_next_event(). A factor of two was chosen to ensure that this situation would never occur. The PXA code worked on PXA systems for years, and I'd suggest no one changes this mechanism without access to a wide range of PXA systems, otherwise they're risking breakage." Cc: stable@vger.kernel.org Cc: Russell King Acked-by: Alexey Charkov Signed-off-by: Roman Volkov Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/clocksource/vt8500_timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index 1098ed3b9b89f..dc45ddb36117d 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -50,6 +50,8 @@ #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t) +#define MIN_OSCR_DELTA 16 + static void __iomem *regbase; static cycle_t vt8500_timer_read(struct clocksource *cs) @@ -80,7 +82,7 @@ static int vt8500_timer_set_next_event(unsigned long cycles, cpu_relax(); writel((unsigned long)alarm, regbase + TIMER_MATCH_VAL); - if ((signed)(alarm - clocksource.read(&clocksource)) <= 16) + if ((signed)(alarm - clocksource.read(&clocksource)) <= MIN_OSCR_DELTA) return -ETIME; writel(1, regbase + TIMER_IER_VAL); @@ -160,7 +162,7 @@ static void __init vt8500_timer_init(struct device_node *np) pr_err("%s: setup_irq failed for %s\n", __func__, clockevent.name); clockevents_config_and_register(&clockevent, VT8500_TIMER_HZ, - 4, 0xf0000000); + MIN_OSCR_DELTA * 2, 0xf0000000); } CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init); From 1fc9a4dd398062e9fa4937f446bedd3305312d1d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 11 Jan 2016 17:35:38 -0800 Subject: [PATCH 1721/2091] Input: elantech - mark protocols v2 and v3 as semi-mt [ Upstream commit 6544a1df11c48c8413071aac3316792e4678fbfb ] When using a protocol v2 or v3 hardware, elantech uses the function elantech_report_semi_mt_data() to report data. This devices are rather creepy because if num_finger is 3, (x2,y2) is (0,0). Yes, only one valid touch is reported. Anyway, userspace (libinput) is now confused by these (0,0) touches, and detect them as palm, and rejects them. Commit 3c0213d17a09 ("Input: elantech - fix semi-mt protocol for v3 HW") was sufficient enough for xf86-input-synaptics and libinput before it has palm rejection. Now we need to actually tell libinput that this device is a semi-mt one and it should not rely on the actual values of the 2 touches. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/elantech.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index ce3d40004458c..0f5b400706d77 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1214,7 +1214,7 @@ static int elantech_set_input_params(struct psmouse *psmouse) input_set_abs_params(dev, ABS_TOOL_WIDTH, ETP_WMIN_V2, ETP_WMAX_V2, 0, 0); } - input_mt_init_slots(dev, 2, 0); + input_mt_init_slots(dev, 2, INPUT_MT_SEMI_MT); input_set_abs_params(dev, ABS_MT_POSITION_X, x_min, x_max, 0, 0); input_set_abs_params(dev, ABS_MT_POSITION_Y, y_min, y_max, 0, 0); break; From 7daaedba8fa87ade25763e15e5218c66558634c3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 20 Aug 2015 00:49:48 +0300 Subject: [PATCH 1722/2091] virtio_ballon: change stub of release_pages_by_pfn [ Upstream commit b4d34037329f46ed818d3b0a6e1e23b9c8721f79 ] and rename it to release_pages_balloon. The function originally takes arrays of pfns and now it takes pointer to struct virtio_ballon. This change is necessary to conditionally call adjust_managed_page_count in the next patch. Signed-off-by: Denis V. Lunev CC: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_balloon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 82e80e034f250..8543c9a97307a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -166,13 +166,13 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) mutex_unlock(&vb->balloon_lock); } -static void release_pages_by_pfn(const u32 pfns[], unsigned int num) +static void release_pages_balloon(struct virtio_balloon *vb) { unsigned int i; /* Find pfns pointing at start of each page, get pages and free them. */ - for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { - struct page *page = balloon_pfn_to_page(pfns[i]); + for (i = 0; i < vb->num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { + struct page *page = balloon_pfn_to_page(vb->pfns[i]); adjust_managed_page_count(page, 1); put_page(page); /* balloon reference */ } @@ -206,7 +206,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) if (vb->num_pfns != 0) tell_host(vb, vb->deflate_vq); mutex_unlock(&vb->balloon_lock); - release_pages_by_pfn(vb->pfns, vb->num_pfns); + release_pages_balloon(vb); return num_freed_pages; } From 3bbe9868ba5654df63f9e10766620f813e92fdf8 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 28 Dec 2015 08:35:12 +0900 Subject: [PATCH 1723/2091] virtio_balloon: fix race by fill and leak [ Upstream commit f68b992bbb474641881932c61c92dcfa6f5b3689 ] During my compaction-related stuff, I encountered a bug with ballooning. With repeated inflating and deflating cycle, guest memory( ie, cat /proc/meminfo | grep MemTotal) is decreased and couldn't be recovered. The reason is balloon_lock doesn't cover release_pages_balloon so struct virtio_balloon fields could be overwritten by race of fill_balloon(e,g, vb->*pfns could be critical). This patch fixes it in my test. Cc: Signed-off-by: Minchan Kim Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8543c9a97307a..89bac470f04e0 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -205,8 +205,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) */ if (vb->num_pfns != 0) tell_host(vb, vb->deflate_vq); - mutex_unlock(&vb->balloon_lock); release_pages_balloon(vb); + mutex_unlock(&vb->balloon_lock); return num_freed_pages; } From c1a7154c7e5d81bba4b027d42ec1f16113992415 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 28 Dec 2015 08:35:13 +0900 Subject: [PATCH 1724/2091] virtio_balloon: fix race between migration and ballooning [ Upstream commit 21ea9fb69e7c4b1b1559c3e410943d3ff248ffcb ] In balloon_page_dequeue, pages_lock should cover the loop (ie, list_for_each_entry_safe). Otherwise, the cursor page could be isolated by compaction and then list_del by isolation could poison the page->lru.{prev,next} so the loop finally could access wrong address like this. This patch fixes the bug. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 82 Comm: vballoon Not tainted 4.4.0-rc5-mm1-access_bit+ #1906 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800a7ff0000 ti: ffff8800a7fec000 task.ti: ffff8800a7fec000 RIP: 0010:[] [] balloon_page_dequeue+0x54/0x130 RSP: 0018:ffff8800a7fefdc0 EFLAGS: 00010246 RAX: ffff88013fff9a70 RBX: ffffea000056fe00 RCX: 0000000000002b7d RDX: ffff88013fff9a70 RSI: ffffea000056fe00 RDI: ffff88013fff9a68 RBP: ffff8800a7fefde8 R08: ffffea000056fda0 R09: 0000000000000000 R10: ffff8800a7fefd90 R11: 0000000000000001 R12: dead0000000000e0 R13: ffffea000056fe20 R14: ffff880138809070 R15: ffff880138809060 FS: 0000000000000000(0000) GS:ffff88013fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f229c10e000 CR3: 00000000b8b53000 CR4: 00000000000006a0 Stack: 0000000000000100 ffff880138809088 ffff880138809000 ffff880138809060 0000000000000046 ffff8800a7fefe28 ffffffff812c86d3 ffff880138809020 ffff880138809000 fffffffffff91900 0000000000000100 ffff880138809060 Call Trace: [] leak_balloon+0x93/0x1a0 [] balloon+0x217/0x2a0 [] ? __schedule+0x31e/0x8b0 [] ? abort_exclusive_wait+0xb0/0xb0 [] ? update_balloon_stats+0xf0/0xf0 [] kthread+0xc9/0xe0 [] ? kthread_park+0x60/0x60 [] ret_from_fork+0x3f/0x70 [] ? kthread_park+0x60/0x60 Code: 8d 60 e0 0f 84 af 00 00 00 48 8b 43 20 a8 01 75 3b 48 89 d8 f0 0f ba 28 00 72 10 48 8b 03 f6 c4 08 75 2f 48 89 df e8 8c 83 f9 ff <49> 8b 44 24 20 4d 8d 6c 24 20 48 83 e8 20 4d 39 f5 74 7a 4c 89 RIP [] balloon_page_dequeue+0x54/0x130 RSP ---[ end trace 43cf28060d708d5f ]--- Kernel panic - not syncing: Fatal exception Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Cc: Signed-off-by: Minchan Kim Signed-off-by: Michael S. Tsirkin Acked-by: Rafael Aquini Signed-off-by: Sasha Levin --- mm/balloon_compaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index fcad8322ef367..b640609bcd177 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -61,6 +61,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) bool dequeued_page; dequeued_page = false; + spin_lock_irqsave(&b_dev_info->pages_lock, flags); list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) { /* * Block others from accessing the 'page' while we get around @@ -75,15 +76,14 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) continue; } #endif - spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_delete(page); __count_vm_event(BALLOON_DEFLATE); - spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); unlock_page(page); dequeued_page = true; break; } } + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); if (!dequeued_page) { /* From 35a317f44dbf18460282a9c4d0d74480f5c3f7b2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 10 Jan 2016 09:30:42 +0100 Subject: [PATCH 1725/2091] parisc: Fix __ARCH_SI_PREAMBLE_SIZE [ Upstream commit e60fc5aa608eb38b47ba4ee058f306f739eb70a0 ] On a 64bit kernel build the compiler aligns the _sifields union in the struct siginfo_t on a 64bit address. The __ARCH_SI_PREAMBLE_SIZE define compensates for this alignment and thus fixes the wait testcase of the strace package. The symptoms of a wrong __ARCH_SI_PREAMBLE_SIZE value is that _sigchld.si_stime variable is missed to be copied and thus after a copy_siginfo() will have uninitialized values. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- arch/parisc/include/uapi/asm/siginfo.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/parisc/include/uapi/asm/siginfo.h b/arch/parisc/include/uapi/asm/siginfo.h index d7034728f3778..1c75565d984b4 100644 --- a/arch/parisc/include/uapi/asm/siginfo.h +++ b/arch/parisc/include/uapi/asm/siginfo.h @@ -1,6 +1,10 @@ #ifndef _PARISC_SIGINFO_H #define _PARISC_SIGINFO_H +#if defined(__LP64__) +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + #include #undef NSIGTRAP From 12060a1c7cd09460128f8580c1862de807294c3a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Nov 2015 13:28:55 +0000 Subject: [PATCH 1726/2091] drm/i915: Restore inhibiting the load of the default context [ Upstream commit 06ef83a705a98da63797a5a570220b6ca36febd4 ] Following a GPU reset, we may leave the context in a poorly defined state, and reloading from that context will leave the GPU flummoxed. For secondary contexts, this will lead to that context being banned - but currently it is also causing the default context to become banned, leading to turmoil in the shared state. This is a regression from commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b [v4.1] Author: Ben Widawsky Date: Mon Mar 16 16:00:58 2015 +0000 drm/i915: Initialize all contexts which quietly introduced the removal of the MI_RESTORE_INHIBIT on the default context. v2: Mark the global default context as uninitialized on GPU reset so that the context-local workarounds are reloaded upon re-enabling. Signed-off-by: Chris Wilson Cc: Michel Thierry Cc: Mika Kuoppala Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1448630935-27377-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Cc: stable@vger.kernel.org [danvet: This seems to fix a gpu hand on after the first resume, resulting in any future suspend operation failing with -EIO because the gpu seems to be in a funky state. Somehow this patch fixes that.] Signed-off-by: Daniel Vetter (cherry picked from commit 42f1cae8c079bcceb3cff079fddc3ff8852c788f) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/i915_gem_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f3e84c44d0091..4decf518d1060 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -317,6 +317,10 @@ void i915_gem_context_reset(struct drm_device *dev) i915_gem_context_unreference(lctx); ring->last_context = NULL; } + + /* Force the GPU state to be reinitialised on enabling */ + if (ring->default_context) + ring->default_context->legacy_hw_ctx.initialized = false; } } @@ -704,7 +708,7 @@ static int do_switch(struct intel_engine_cs *ring, goto unpin_out; } - if (!to->legacy_hw_ctx.initialized) { + if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) { hw_flags |= MI_RESTORE_INHIBIT; /* NB: If we inhibit the restore, the context is not allowed to * die because future work may end up depending on valid address From 064ee90886630403af052302f922a07499595515 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 13 Jan 2016 18:28:17 +0100 Subject: [PATCH 1727/2091] KVM: PPC: Fix ONE_REG AltiVec support [ Upstream commit b4d7f161feb3015d6306e1d35b565c888ff70c9d ] The get and set operations got exchanged by mistake when moving the code from book3s.c to powerpc.c. Fixes: 3840edc8033ad5b86deee309c1c321ca54257452 Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Greg Kurz Signed-off-by: Paul Mackerras Signed-off-by: Sasha Levin --- arch/powerpc/kvm/powerpc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ac3ddf115f3d8..c8fe9ab107922 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -915,21 +915,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); break; case KVM_REG_PPC_VRSAVE: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vrsave = set_reg_val(reg->id, val); + val = get_reg_val(reg->id, vcpu->arch.vrsave); break; #endif /* CONFIG_ALTIVEC */ default: @@ -970,17 +966,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ default: From 13aedd784b84cb7d8a3bb835941d80e99f5c796e Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 11 Jan 2016 13:04:28 +0000 Subject: [PATCH 1728/2091] dmaengine: dw: fix cyclic transfer setup [ Upstream commit df3bb8a0e619d501cd13334c3e0586edcdcbc716 ] Commit 61e183f83069 ("dmaengine/dw_dmac: Reconfigure interrupt and chan_cfg register on resume") moved some channel initialisation to a new function which must be called before starting a transfer. This updates dw_dma_cyclic_start() to use dwc_dostart() like the other modes, thus ensuring dwc_initialize() gets called and removing some code duplication. Fixes: 61e183f83069 ("dmaengine/dw_dmac: Reconfigure interrupt and chan_cfg register on resume") Signed-off-by: Mans Rullgard Reviewed-by: Viresh Kumar Signed-off-by: Vinod Koul Cc: Signed-off-by: Sasha Levin --- drivers/dma/dw/core.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 9e504d3b0d4fc..115e5cff089ac 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1245,7 +1245,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(dwc->chan.device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1254,27 +1253,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan) } spin_lock_irqsave(&dwc->lock, flags); - - /* Assert channel is idle */ - if (dma_readl(dw, CH_EN) & dwc->mask) { - dev_err(chan2dev(&dwc->chan), - "%s: BUG: Attempted to start non-idle channel\n", - __func__); - dwc_dump_chan_regs(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - return -EBUSY; - } - - dma_writel(dw, CLEAR.ERROR, dwc->mask); - dma_writel(dw, CLEAR.XFER, dwc->mask); - - /* Setup DMAC channel registers */ - channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys); - channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); - channel_writel(dwc, CTL_HI, 0); - - channel_set_bit(dw, CH_EN, dwc->mask); - + dwc_dostart(dwc, dwc->cdesc->desc[0]); spin_unlock_irqrestore(&dwc->lock, flags); return 0; From 66eba3dabc93f3404e3d011cd19a2eb45b87990f Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 11 Jan 2016 13:04:29 +0000 Subject: [PATCH 1729/2091] dmaengine: dw: fix cyclic transfer callbacks [ Upstream commit 2895b2cad6e7a95104cf396e5330054453382ae1 ] Cyclic transfer callbacks rely on block completion interrupts which were disabled in commit ff7b05f29fd4 ("dmaengine/dw_dmac: Don't handle block interrupts"). This re-enables block interrupts so the cyclic callbacks can work. Other transfer types are not affected as they set the INT_EN bit only on the last block. Fixes: ff7b05f29fd4 ("dmaengine/dw_dmac: Don't handle block interrupts") Signed-off-by: Mans Rullgard Reviewed-by: Viresh Kumar Signed-off-by: Vinod Koul Cc: Signed-off-by: Sasha Levin --- drivers/dma/dw/core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 115e5cff089ac..303d937d63c79 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -156,6 +156,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); dwc->initialized = true; @@ -536,16 +537,17 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr); /* Called with dwc->lock held and all DMAC interrupts disabled */ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, - u32 status_err, u32 status_xfer) + u32 status_block, u32 status_err, u32 status_xfer) { unsigned long flags; - if (dwc->mask) { + if (status_block & dwc->mask) { void (*callback)(void *param); void *callback_param; dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n", channel_readl(dwc, LLP)); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); callback = dwc->cdesc->period_callback; callback_param = dwc->cdesc->period_callback_param; @@ -577,6 +579,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, channel_writel(dwc, CTL_LO, 0); channel_writel(dwc, CTL_HI, 0); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -593,10 +596,12 @@ static void dw_dma_tasklet(unsigned long data) { struct dw_dma *dw = (struct dw_dma *)data; struct dw_dma_chan *dwc; + u32 status_block; u32 status_xfer; u32 status_err; int i; + status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); @@ -605,7 +610,8 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) - dwc_handle_cyclic(dw, dwc, status_err, status_xfer); + dwc_handle_cyclic(dw, dwc, status_block, status_err, + status_xfer); else if (status_err & (1 << i)) dwc_handle_error(dw, dwc); else if (status_xfer & (1 << i)) @@ -616,6 +622,7 @@ static void dw_dma_tasklet(unsigned long data) * Re-enable interrupts. */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -635,6 +642,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) * softirq handler. */ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); status = dma_readl(dw, STATUS_INT); @@ -645,6 +653,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /* Try to recover */ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); @@ -1111,6 +1120,7 @@ static void dw_dma_off(struct dw_dma *dw) dma_writel(dw, CFG, 0); channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); @@ -1216,6 +1226,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); channel_clear_bit(dw, MASK.ERROR, dwc->mask); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1458,6 +1469,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) dwc_chan_disable(dw, dwc); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -1548,9 +1560,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Force dma off, just in case */ dw_dma_off(dw); - /* Disable BLOCK interrupts as well */ - channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); - /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev, sizeof(struct dw_desc), 4, 0); From 8a55af069755c320f9f69c29d6bba9856ba8a74a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 4 Jan 2016 02:21:55 +0100 Subject: [PATCH 1730/2091] mmc: mmci: fix an ages old detection error [ Upstream commit 0bcb7efdff63564e80fe84dd36a9fbdfbf6697a4 ] commit 4956e10903fd ("ARM: 6244/1: mmci: add variant data and default MCICLOCK support") added variant data for ARM, U300 and Ux500 variants. The Nomadik NHK8815/8820 variant was erroneously labeled as a U300 variant, and when the proper Nomadik variant was later introduced in commit 34fd421349ff ("ARM: 7378/1: mmci: add support for the Nomadik MMCI variant") this was not fixes. Let's say this fixes the latter commit as there was no proper Nomadik support until then. Cc: stable@vger.kernel.org Fixes: 34fd421349ff ("ARM: 7378/1: mmci: add support for the Nomadik...") Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/mmci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index fb266745f8240..acece3299756e 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1886,7 +1886,7 @@ static struct amba_id mmci_ids[] = { { .id = 0x00280180, .mask = 0x00ffffff, - .data = &variant_u300, + .data = &variant_nomadik, }, { .id = 0x00480180, From 6e1e9bd06060eedf56db7aa53909b52944fc3d06 Mon Sep 17 00:00:00 2001 From: Weijun Yang Date: Sun, 4 Oct 2015 12:04:11 +0000 Subject: [PATCH 1731/2091] mmc: core: enable CMD19 tuning for DDR50 mode [ Upstream commit 4324f6de6d2eb9b232410eb0d67bfafdde8ba711 ] As SD Specifications Part1 Physical Layer Specification Version 3.01 says, CMD19 tuning is available for unlocked cards in transfer state of 1.8V signaling mode. The small difference between v3.00 and 3.01 spec means that CMD19 tuning is also available for DDR50 mode. Signed-off-by: Weijun Yang Signed-off-by: Barry Song Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/sd.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 31a9ef256d065..51fe41b5bb76b 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -661,9 +661,25 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - (card->sd_bus_speed == UHS_SDR50_BUS_SPEED || - card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) + (card->sd_bus_speed == UHS_SDR50_BUS_SPEED || + card->sd_bus_speed == UHS_DDR50_BUS_SPEED || + card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) { err = mmc_execute_tuning(card); + + /* + * As SD Specifications Part1 Physical Layer Specification + * Version 3.01 says, CMD19 tuning is available for unlocked + * cards in transfer state of 1.8V signaling mode. The small + * difference between v3.00 and 3.01 spec means that CMD19 + * tuning is also available for DDR50 mode. + */ + if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) { + pr_warn("%s: ddr50 tuning failed\n", + mmc_hostname(card->host)); + err = 0; + } + } + out: kfree(status); From 2a048a248fad2e7d49a3a5c8b9259f403182b5ac Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Wed, 13 Jan 2016 09:36:55 +0100 Subject: [PATCH 1732/2091] mmc: core: Enable tuning according to the actual timing [ Upstream commit e10c321977091f163eceedec0650e0ef4b3cf4bb ] While in sdhci_execute_tuning() the choice whether or not to enable the tuning is done on the actual timing, in the mmc_sdio_init_uhs_card() the check is done on the capability of the card. This difference is causing some issues with some SDIO cards in DDR50 mode where the CDM19 is wrongly issued. With this patch we modify the check in both mmc_(sd|sdio)_init_uhs_card() functions to take the proper decision only according to the actual timing specification. Cc: stable@vger.kernel.org Signed-off-by: Carlo Caione Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/core/sd.c | 8 ++++---- drivers/mmc/core/sdio.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 51fe41b5bb76b..ce3044883a428 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -661,9 +661,9 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - (card->sd_bus_speed == UHS_SDR50_BUS_SPEED || - card->sd_bus_speed == UHS_DDR50_BUS_SPEED || - card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) { + (card->host->ios.timing == MMC_TIMING_UHS_SDR50 || + card->host->ios.timing == MMC_TIMING_UHS_DDR50 || + card->host->ios.timing == MMC_TIMING_UHS_SDR104)) { err = mmc_execute_tuning(card); /* @@ -673,7 +673,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) * difference between v3.00 and 3.01 spec means that CMD19 * tuning is also available for DDR50 mode. */ - if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) { + if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) { pr_warn("%s: ddr50 tuning failed\n", mmc_hostname(card->host)); err = 0; diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 3bd6a93602b68..941beb3b5fa27 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -566,8 +566,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) || - (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104))) + ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) || + (card->host->ios.timing == MMC_TIMING_UHS_SDR104))) err = mmc_execute_tuning(card); out: return err; From 5301c0647782e71150361a3854ccf3e004b79bb9 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 27 Dec 2015 02:13:27 +0300 Subject: [PATCH 1733/2091] sparc64: fix incorrect sign extension in sys_sparc64_personality [ Upstream commit 525fd5a94e1be0776fa652df5c687697db508c91 ] The value returned by sys_personality has type "long int". It is saved to a variable of type "int", which is not a problem yet because the type of task_struct->pesonality is "unsigned int". The problem is the sign extension from "int" to "long int" that happens on return from sys_sparc64_personality. For example, a userspace call personality((unsigned) -EINVAL) will result to any subsequent personality call, including absolutely harmless read-only personality(0xffffffff) call, failing with errno set to EINVAL. Signed-off-by: Dmitry V. Levin Cc: Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/kernel/sys_sparc_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 30e7ddb27a3a9..c690c8e16a96e 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -413,7 +413,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) { - int ret; + long ret; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) From ca7342a8a39704670c4079aacbd78e360beaf223 Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Sat, 7 Nov 2015 22:13:49 +1000 Subject: [PATCH 1734/2091] cifs: Ratelimit kernel log messages [ Upstream commit ec7147a99e33a9e4abad6fc6e1b40d15df045d53 ] Under some conditions, CIFS can repeatedly call the cifs_dbg() logging wrapper. If done rapidly enough, the console framebuffer can softlockup or "rcu_sched self-detected stall". Apply the built-in log ratelimiters to prevent such hangs. Signed-off-by: Jamie Bainbridge Signed-off-by: Steve French CC: Stable Signed-off-by: Sasha Levin --- fs/cifs/cifs_debug.c | 2 +- fs/cifs/cifs_debug.h | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 7febcf2475c5a..50b2684833029 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("CIFS VFS: %pV", &vaf); + pr_err_ratelimited("CIFS VFS: %pV", &vaf); va_end(args); } diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index f40fbaca1b2a2..66cf0f9fff898 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...); /* information message: e.g., configuration, major event */ #define cifs_dbg(type, fmt, ...) \ do { \ - if (type == FYI) { \ - if (cifsFYI & CIFS_INFO) { \ - pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \ - } \ + if (type == FYI && cifsFYI & CIFS_INFO) { \ + pr_debug_ratelimited("%s: " \ + fmt, __FILE__, ##__VA_ARGS__); \ } else if (type == VFS) { \ cifs_vfs_err(fmt, ##__VA_ARGS__); \ } else if (type == NOISY && type != 0) { \ - pr_debug(fmt, ##__VA_ARGS__); \ + pr_debug_ratelimited(fmt, ##__VA_ARGS__); \ } \ } while (0) From 99b79b15df4ed1a6293cff487b46efc86bd20ae5 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 23 Dec 2015 07:32:41 +0100 Subject: [PATCH 1735/2091] cifs: fix race between call_async() and reconnect() [ Upstream commit 820962dc700598ffe8cd21b967e30e7520c34748 ] cifs_call_async() queues the MID to the pending list and calls smb_send_rqst(). If smb_send_rqst() performs a partial send, it sets the tcpStatus to CifsNeedReconnect and returns an error code to cifs_call_async(). In this case, cifs_call_async() removes the MID from the list and returns to the caller. However, cifs_call_async() releases the server mutex _before_ removing the MID. This means that a cifs_reconnect() can race with this function and manage to remove the MID from the list and delete the entry before cifs_call_async() calls cifs_delete_mid(). This leads to various crashes due to the use after free in cifs_delete_mid(). Task1 Task2 cifs_call_async(): - rc = -EAGAIN - mutex_unlock(srv_mutex) cifs_reconnect(): - mutex_lock(srv_mutex) - mutex_unlock(srv_mutex) - list_delete(mid) - mid->callback() cifs_writev_callback(): - mutex_lock(srv_mutex) - delete(mid) - mutex_unlock(srv_mutex) - cifs_delete_mid(mid) <---- use after free Fix this by removing the MID in cifs_call_async() before releasing the srv_mutex. Also hold the srv_mutex in cifs_reconnect() until the MIDs are moved out of the pending list. Signed-off-by: Rabin Vincent Acked-by: Shirish Pargaonkar CC: Stable Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 2 +- fs/cifs/transport.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8383d5ea42028..de626b939811b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -357,7 +357,6 @@ cifs_reconnect(struct TCP_Server_Info *server) server->session_key.response = NULL; server->session_key.len = 0; server->lstrp = jiffies; - mutex_unlock(&server->srv_mutex); /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); @@ -370,6 +369,7 @@ cifs_reconnect(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &retry_list); } spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_safe(tmp, tmp2, &retry_list) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 126f46b887cc8..66106f6ed7b4d 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, cifs_in_send_dec(server); cifs_save_when_sent(mid); - if (rc < 0) + if (rc < 0) { server->sequence_number -= 2; + cifs_delete_mid(mid); + } + mutex_unlock(&server->srv_mutex); if (rc == 0) return 0; - cifs_delete_mid(mid); add_credits_and_wake_if(server, credits, optype); return rc; } From c5882812d2e1ab7db5bc71a1bca90b3a2d89dedd Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 14 Jan 2016 13:41:14 +0300 Subject: [PATCH 1736/2091] cifs_dbg() outputs an uninitialized buffer in cifs_readdir() [ Upstream commit 01b9b0b28626db4a47d7f48744d70abca9914ef1 ] In some cases tmp_bug can be not filled in cifs_filldir and stay uninitialized, therefore its printk with "%s" modifier can leak content of kernelspace memory. If old content of this buffer does not contain '\0' access bejond end of allocated object can crash the host. Signed-off-by: Vasily Averin Signed-off-by: Steve French CC: Stable Signed-off-by: Sasha Levin --- fs/cifs/readdir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b1eede3678a91..3634c7adf7d20 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * if buggy server returns . and .. late do we want to * check for that here? */ + *tmp_buf = 0; rc = cifs_filldir(current_entry, file, ctx, tmp_buf, max_len); if (rc) { From a5254ac06b9aaf6ef99bb260fe713f6a64371353 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 14 Jan 2016 15:16:47 -0800 Subject: [PATCH 1737/2091] m32r: fix m32104ut_defconfig build fail [ Upstream commit 601f1db653217f205ffa5fb33514b4e1711e56d1 ] The build of m32104ut_defconfig for m32r arch was failing for long long time with the error: ERROR: "memory_start" [fs/udf/udf.ko] undefined! ERROR: "memory_end" [fs/udf/udf.ko] undefined! ERROR: "memory_end" [drivers/scsi/sg.ko] undefined! ERROR: "memory_start" [drivers/scsi/sg.ko] undefined! ERROR: "memory_end" [drivers/i2c/i2c-dev.ko] undefined! ERROR: "memory_start" [drivers/i2c/i2c-dev.ko] undefined! As done in other architectures export the symbols to fix the error. Reported-by: Fengguang Wu Signed-off-by: Sudip Mukherjee Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/m32r/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 0392112a5d702..a5ecef7188baa 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -81,7 +81,10 @@ static struct resource code_resource = { }; unsigned long memory_start; +EXPORT_SYMBOL(memory_start); + unsigned long memory_end; +EXPORT_SYMBOL(memory_end); void __init setup_arch(char **); int get_cpuinfo(char *); From 68cc9774de3e0ad8de67d6fbbdaf4a808cbc62a9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 14 Jan 2016 15:16:50 -0800 Subject: [PATCH 1738/2091] dma-debug: switch check from _text to _stext [ Upstream commit ea535e418c01837d07b6c94e817540f50bfdadb0 ] In include/asm-generic/sections.h: /* * Usage guidelines: * _text, _data: architecture specific, don't use them in * arch-independent code * [_stext, _etext]: contains .text.* sections, may also contain * .rodata.* * and/or .init.* sections _text is not guaranteed across architectures. Architectures such as ARM may reuse parts which are not actually text and erroneously trigger a bug. Switch to using _stext which is guaranteed to contain text sections. Came out of https://lkml.kernel.org/g/<567B1176.4000106@redhat.com> Signed-off-by: Laura Abbott Reviewed-by: Kees Cook Cc: Russell King Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/dma-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index dace71fe41f70..517a568f038dd 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1181,7 +1181,7 @@ static inline bool overlap(void *addr, unsigned long len, void *start, void *end static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) { - if (overlap(addr, len, _text, _etext) || + if (overlap(addr, len, _stext, _etext) || overlap(addr, len, __start_rodata, __end_rodata)) err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } From c91d339da50d0f1e1f61556fa6ef3374d6810c80 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 14 Jan 2016 15:16:53 -0800 Subject: [PATCH 1739/2091] scripts/bloat-o-meter: fix python3 syntax error [ Upstream commit 72214a24a7677d4c7501eecc9517ed681b5f2db2 ] In Python3+ print is a function so the old syntax is not correct anymore: $ ./scripts/bloat-o-meter vmlinux.o vmlinux.o.old File "./scripts/bloat-o-meter", line 61 print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ ^ SyntaxError: invalid syntax Fix by calling print as a function. Tested on python 2.7.11, 3.5.1 Signed-off-by: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/bloat-o-meter | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 23e78dcd12bf7..38b64f4873152 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -58,8 +58,8 @@ for name in common: delta.sort() delta.reverse() -print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ - (add, remove, grow, shrink, up, -down, up-down) -print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta") +print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ + (add, remove, grow, shrink, up, -down, up-down)) +print("%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")) for d, n in delta: - if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d) + if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) From 8467278d965a3c2c2f2e95c8eb723f47b872ad5d Mon Sep 17 00:00:00 2001 From: xuejiufei Date: Thu, 14 Jan 2016 15:17:38 -0800 Subject: [PATCH 1740/2091] ocfs2/dlm: ignore cleaning the migration mle that is inuse [ Upstream commit bef5502de074b6f6fa647b94b73155d675694420 ] We have found that migration source will trigger a BUG that the refcount of mle is already zero before put when the target is down during migration. The situation is as follows: dlm_migrate_lockres dlm_add_migration_mle dlm_mark_lockres_migrating dlm_get_mle_inuse <<<<<< Now the refcount of the mle is 2. dlm_send_one_lockres and wait for the target to become the new master. <<<<<< o2hb detect the target down and clean the migration mle. Now the refcount is 1. dlm_migrate_lockres woken, and put the mle twice when found the target goes down which trigger the BUG with the following message: "ERROR: bad mle: ". Signed-off-by: Jiufei Xue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/dlm/dlmmaster.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 482cfd34472d6..523e485a11b84 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2518,6 +2518,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); + /* get an extra reference on the mle. + * otherwise the assert_master from the new + * master will destroy this. + */ + dlm_get_mle_inuse(mle); spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); @@ -2553,6 +2558,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, if (mle_added) { dlm_mle_detach_hb_events(dlm, mle); dlm_put_mle(mle); + dlm_put_mle_inuse(mle); } else if (mle) { kmem_cache_free(dlm_mle_cache, mle); mle = NULL; @@ -2570,17 +2576,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, * ensure that all assert_master work is flushed. */ flush_workqueue(dlm->dlm_worker); - /* get an extra reference on the mle. - * otherwise the assert_master from the new - * master will destroy this. - * also, make sure that all callers of dlm_get_mle - * take both dlm->spinlock and dlm->master_lock */ - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - dlm_get_mle_inuse(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - /* notify new node and send all lock state */ /* call send_one_lockres with migration flag. * this serves as notice to the target node that a @@ -3309,6 +3304,15 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) mle->new_master != dead_node) continue; + if (mle->new_master == dead_node && mle->inuse) { + mlog(ML_NOTICE, "%s: target %u died during " + "migration from %u, the MLE is " + "still keep used, ignore it!\n", + dlm->name, dead_node, + mle->master); + continue; + } + /* If we have reached this point, this mle needs to be * removed from the list and freed. */ dlm_clean_migration_mle(dlm, mle); From 39e51d967413e64c40eae6201430b1ffdb90ba08 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 7 Dec 2015 12:25:02 +0530 Subject: [PATCH 1741/2091] perf kvm record/report: 'unprocessable sample' error while recording/reporting guest data [ Upstream commit 3caeaa562733c4836e61086ec07666635006a787 ] While recording guest samples in host using perf kvm record, it will populate unprocessable sample error, though samples will be recorded properly. While generating report using perf kvm report, no samples will be processed and same error will populate. We have seen this behaviour with upstream perf(4.4-rc3) on x86 and ppc64 hardware. Reason behind this failure is, when it tries to fetch machine from rb_tree of machines, it fails. As a part of tracing a bug, we figured out that this code was incorrectly refactored in commit 54245fdc3576 ("perf session: Remove wrappers to machines__find"). This patch will change the functionality such that if it can't fetch machine in first trial, it will create one node of machine and add that to rb_tree. So next time when it tries to fetch same machine from rb_tree, it won't fail. Actually it was the case before refactoring of code in aforementioned commit. This patch is generated from acme perf/core branch. Below I've mention an example that demonstrate the behaviour before and after applying patch. Before applying patch: [Note: One needs to run guest before recording data in host] ravi@ravi-bangoria:~$ ./perf kvm record -a Warning: 5903 unprocessable samples recorded. Do you have a KVM guest running and not using 'perf kvm'? [ perf record: Captured and wrote 1.409 MB perf.data.guest (285 samples) ] ravi@ravi-bangoria:~$ ./perf kvm report --stdio Warning: 5903 unprocessable samples recorded. Do you have a KVM guest running and not using 'perf kvm'? # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 285 of event 'cycles' # Event count (approx.): 88715406 # # Overhead Command Shared Object Symbol # ........ ....... ............. ...... # # (For a higher level overview, try: perf report --sort comm,dso) # After applying patch: ravi@ravi-bangoria:~$ ./perf kvm record -a [ perf record: Captured and wrote 1.188 MB perf.data.guest (17 samples) ] ravi@ravi-bangoria:~$ ./perf kvm report --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 17 of event 'cycles' # Event count (approx.): 700746 # # Overhead Command Shared Object Symbol # ........ ....... ................ ...................... # 34.19% :5758 [unknown] [g] 0xffffffff818682ab 22.79% :5758 [unknown] [g] 0xffffffff812dc7f8 22.79% :5758 [unknown] [g] 0xffffffff818650d0 14.83% :5758 [unknown] [g] 0xffffffff8161a1b6 2.49% :5758 [unknown] [g] 0xffffffff818692bf 0.48% :5758 [unknown] [g] 0xffffffff81869253 0.05% :5758 [unknown] [g] 0xffffffff81869250 Signed-off-by: Ravi Bangoria Cc: Naveen N. Rao Cc: stable@vger.kernel.org # v3.19+ Fixes: 54245fdc3576 ("perf session: Remove wrappers to machines__find") Link: http://lkml.kernel.org/r/1449471302-11283-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0c74012575ac9..83054ef6c1a19 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -816,7 +816,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, machine = machines__find(machines, pid); if (!machine) - machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); + machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } From 664ecf4f243bac17065cd9878790d40a592e2f3d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 14 Jan 2016 15:22:26 -0800 Subject: [PATCH 1742/2091] zram/zcomp: use GFP_NOIO to allocate streams [ Upstream commit 3d5fe03a3ea013060ebba2a811aeb0f23f56aefa ] We can end up allocating a new compression stream with GFP_KERNEL from within the IO path, which may result is nested (recursive) IO operations. That can introduce problems if the IO path in question is a reclaimer, holding some locks that will deadlock nested IOs. Allocate streams and working memory using GFP_NOIO flag, forbidding recursive IO and FS operations. An example: inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage. git/20158 [HC0[0]:SC0[0]:HE1:SE1] takes: (jbd2_handle){+.+.?.}, at: start_this_handle+0x4ca/0x555 {IN-RECLAIM_FS-W} state was registered at: __lock_acquire+0x8da/0x117b lock_acquire+0x10c/0x1a7 start_this_handle+0x52d/0x555 jbd2__journal_start+0xb4/0x237 __ext4_journal_start_sb+0x108/0x17e ext4_dirty_inode+0x32/0x61 __mark_inode_dirty+0x16b/0x60c iput+0x11e/0x274 __dentry_kill+0x148/0x1b8 shrink_dentry_list+0x274/0x44a prune_dcache_sb+0x4a/0x55 super_cache_scan+0xfc/0x176 shrink_slab.part.14.constprop.25+0x2a2/0x4d3 shrink_zone+0x74/0x140 kswapd+0x6b7/0x930 kthread+0x107/0x10f ret_from_fork+0x3f/0x70 irq event stamp: 138297 hardirqs last enabled at (138297): debug_check_no_locks_freed+0x113/0x12f hardirqs last disabled at (138296): debug_check_no_locks_freed+0x33/0x12f softirqs last enabled at (137818): __do_softirq+0x2d3/0x3e9 softirqs last disabled at (137813): irq_exit+0x41/0x95 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(jbd2_handle); lock(jbd2_handle); *** DEADLOCK *** 5 locks held by git/20158: #0: (sb_writers#7){.+.+.+}, at: [] mnt_want_write+0x24/0x4b #1: (&type->i_mutex_dir_key#2/1){+.+.+.}, at: [] lock_rename+0xd9/0xe3 #2: (&sb->s_type->i_mutex_key#11){+.+.+.}, at: [] lock_two_nondirectories+0x3f/0x6b #3: (&sb->s_type->i_mutex_key#11/4){+.+.+.}, at: [] lock_two_nondirectories+0x66/0x6b #4: (jbd2_handle){+.+.?.}, at: [] start_this_handle+0x4ca/0x555 stack backtrace: CPU: 2 PID: 20158 Comm: git Not tainted 4.1.0-rc7-next-20150615-dbg-00016-g8bdf555-dirty #211 Call Trace: dump_stack+0x4c/0x6e mark_lock+0x384/0x56d mark_held_locks+0x5f/0x76 lockdep_trace_alloc+0xb2/0xb5 kmem_cache_alloc_trace+0x32/0x1e2 zcomp_strm_alloc+0x25/0x73 [zram] zcomp_strm_multi_find+0xe7/0x173 [zram] zcomp_strm_find+0xc/0xe [zram] zram_bvec_rw+0x2ca/0x7e0 [zram] zram_make_request+0x1fa/0x301 [zram] generic_make_request+0x9c/0xdb submit_bio+0xf7/0x120 ext4_io_submit+0x2e/0x43 ext4_bio_write_page+0x1b7/0x300 mpage_submit_page+0x60/0x77 mpage_map_and_submit_buffers+0x10f/0x21d ext4_writepages+0xc8c/0xe1b do_writepages+0x23/0x2c __filemap_fdatawrite_range+0x84/0x8b filemap_flush+0x1c/0x1e ext4_alloc_da_blocks+0xb8/0x117 ext4_rename+0x132/0x6dc ? mark_held_locks+0x5f/0x76 ext4_rename2+0x29/0x2b vfs_rename+0x540/0x636 SyS_renameat2+0x359/0x44d SyS_rename+0x1e/0x20 entry_SYSCALL_64_fastpath+0x12/0x6f [minchan@kernel.org: add stable mark] Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Kyeongdon Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/block/zram/zcomp.c | 4 ++-- drivers/block/zram/zcomp_lz4.c | 2 +- drivers/block/zram/zcomp_lzo.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 54d946a9eee60..6fbb10ca73b10 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -76,7 +76,7 @@ static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) */ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) { - struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_NOIO); if (!zstrm) return NULL; @@ -85,7 +85,7 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) * allocate 2 pages. 1 for compressed data, plus 1 extra for the * case when compressed size is larger than the original one */ - zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + zstrm->buffer = (void *)__get_free_pages(GFP_NOIO | __GFP_ZERO, 1); if (!zstrm->private || !zstrm->buffer) { zcomp_strm_free(comp, zstrm); zstrm = NULL; diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c index f2afb7e988c37..ee44b51130a45 100644 --- a/drivers/block/zram/zcomp_lz4.c +++ b/drivers/block/zram/zcomp_lz4.c @@ -15,7 +15,7 @@ static void *zcomp_lz4_create(void) { - return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); + return kzalloc(LZ4_MEM_COMPRESS, GFP_NOIO); } static void zcomp_lz4_destroy(void *private) diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c index da1bc47d588e9..683ce049e0704 100644 --- a/drivers/block/zram/zcomp_lzo.c +++ b/drivers/block/zram/zcomp_lzo.c @@ -15,7 +15,7 @@ static void *lzo_create(void) { - return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + return kzalloc(LZO1X_MEM_COMPRESS, GFP_NOIO); } static void lzo_destroy(void *private) From 7332411bbb9f327e65467013ef10d1d0f4999fcb Mon Sep 17 00:00:00 2001 From: Kyeongdon Kim Date: Thu, 14 Jan 2016 15:22:29 -0800 Subject: [PATCH 1743/2091] zram: try vmalloc() after kmalloc() [ Upstream commit d913897abace843bba20249f3190167f7895e9c3 ] When we're using LZ4 multi compression streams for zram swap, we found out page allocation failure message in system running test. That was not only once, but a few(2 - 5 times per test). Also, some failure cases were continually occurring to try allocation order 3. In order to make parallel compression private data, we should call kzalloc() with order 2/3 in runtime(lzo/lz4). But if there is no order 2/3 size memory to allocate in that time, page allocation fails. This patch makes to use vmalloc() as fallback of kmalloc(), this prevents page alloc failure warning. After using this, we never found warning message in running test, also It could reduce process startup latency about 60-120ms in each case. For reference a call trace : Binder_1: page allocation failure: order:3, mode:0x10c0d0 CPU: 0 PID: 424 Comm: Binder_1 Tainted: GW 3.10.49-perf-g991d02b-dirty #20 Call trace: dump_backtrace+0x0/0x270 show_stack+0x10/0x1c dump_stack+0x1c/0x28 warn_alloc_failed+0xfc/0x11c __alloc_pages_nodemask+0x724/0x7f0 __get_free_pages+0x14/0x5c kmalloc_order_trace+0x38/0xd8 zcomp_lz4_create+0x2c/0x38 zcomp_strm_alloc+0x34/0x78 zcomp_strm_multi_find+0x124/0x1ec zcomp_strm_find+0xc/0x18 zram_bvec_rw+0x2fc/0x780 zram_make_request+0x25c/0x2d4 generic_make_request+0x80/0xbc submit_bio+0xa4/0x15c __swap_writepage+0x218/0x230 swap_writepage+0x3c/0x4c shrink_page_list+0x51c/0x8d0 shrink_inactive_list+0x3f8/0x60c shrink_lruvec+0x33c/0x4cc shrink_zone+0x3c/0x100 try_to_free_pages+0x2b8/0x54c __alloc_pages_nodemask+0x514/0x7f0 __get_free_pages+0x14/0x5c proc_info_read+0x50/0xe4 vfs_read+0xa0/0x12c SyS_read+0x44/0x74 DMA: 3397*4kB (MC) 26*8kB (RC) 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 13796kB [minchan@kernel.org: change vmalloc gfp and adding comment about gfp] [sergey.senozhatsky@gmail.com: tweak comments and styles] Signed-off-by: Kyeongdon Kim Signed-off-by: Minchan Kim Acked-by: Sergey Senozhatsky Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/block/zram/zcomp_lz4.c | 23 +++++++++++++++++++++-- drivers/block/zram/zcomp_lzo.c | 23 +++++++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c index ee44b51130a45..dd6083124276f 100644 --- a/drivers/block/zram/zcomp_lz4.c +++ b/drivers/block/zram/zcomp_lz4.c @@ -10,17 +10,36 @@ #include #include #include +#include +#include #include "zcomp_lz4.h" static void *zcomp_lz4_create(void) { - return kzalloc(LZ4_MEM_COMPRESS, GFP_NOIO); + void *ret; + + /* + * This function can be called in swapout/fs write path + * so we can't use GFP_FS|IO. And it assumes we already + * have at least one stream in zram initialization so we + * don't do best effort to allocate more stream in here. + * A default stream will work well without further multiple + * streams. That's why we use NORETRY | NOWARN. + */ + ret = kzalloc(LZ4_MEM_COMPRESS, GFP_NOIO | __GFP_NORETRY | + __GFP_NOWARN); + if (!ret) + ret = __vmalloc(LZ4_MEM_COMPRESS, + GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN | + __GFP_ZERO | __GFP_HIGHMEM, + PAGE_KERNEL); + return ret; } static void zcomp_lz4_destroy(void *private) { - kfree(private); + kvfree(private); } static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c index 683ce049e0704..edc549920fa06 100644 --- a/drivers/block/zram/zcomp_lzo.c +++ b/drivers/block/zram/zcomp_lzo.c @@ -10,17 +10,36 @@ #include #include #include +#include +#include #include "zcomp_lzo.h" static void *lzo_create(void) { - return kzalloc(LZO1X_MEM_COMPRESS, GFP_NOIO); + void *ret; + + /* + * This function can be called in swapout/fs write path + * so we can't use GFP_FS|IO. And it assumes we already + * have at least one stream in zram initialization so we + * don't do best effort to allocate more stream in here. + * A default stream will work well without further multiple + * streams. That's why we use NORETRY | NOWARN. + */ + ret = kzalloc(LZO1X_MEM_COMPRESS, GFP_NOIO | __GFP_NORETRY | + __GFP_NOWARN); + if (!ret) + ret = __vmalloc(LZO1X_MEM_COMPRESS, + GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN | + __GFP_ZERO | __GFP_HIGHMEM, + PAGE_KERNEL); + return ret; } static void lzo_destroy(void *private) { - kfree(private); + kvfree(private); } static int lzo_compress(const unsigned char *src, unsigned char *dst, From 4ae0f4a619e2071dc184e0792d90ace3fd18c280 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 15 Jan 2016 16:54:03 -0800 Subject: [PATCH 1744/2091] mm: soft-offline: check return value in second __get_any_page() call [ Upstream commit d96b339f453997f2f08c52da3f41423be48c978f ] I saw the following BUG_ON triggered in a testcase where a process calls madvise(MADV_SOFT_OFFLINE) on thps, along with a background process that calls migratepages command repeatedly (doing ping-pong among different NUMA nodes) for the first process: Soft offlining page 0x60000 at 0x700000600000 __get_any_page: 0x60000 free buddy page page:ffffea0001800000 count:0 mapcount:-127 mapping: (null) index:0x1 flags: 0x1fffc0000000000() page dumped because: VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0) ------------[ cut here ]------------ kernel BUG at /src/linux-dev/include/linux/mm.h:342! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: cfg80211 rfkill crc32c_intel serio_raw virtio_balloon i2c_piix4 virtio_blk virtio_net ata_generic pata_acpi CPU: 3 PID: 3035 Comm: test_alloc_gene Tainted: G O 4.4.0-rc8-v4.4-rc8-160107-1501-00000-rc8+ #74 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88007c63d5c0 ti: ffff88007c210000 task.ti: ffff88007c210000 RIP: 0010:[] [] put_page+0x5c/0x60 RSP: 0018:ffff88007c213e00 EFLAGS: 00010246 Call Trace: put_hwpoison_page+0x4e/0x80 soft_offline_page+0x501/0x520 SyS_madvise+0x6bc/0x6f0 entry_SYSCALL_64_fastpath+0x12/0x6a Code: 8b fc ff ff 5b 5d c3 48 89 df e8 b0 fa ff ff 48 89 df 31 f6 e8 c6 7d ff ff 5b 5d c3 48 c7 c6 08 54 a2 81 48 89 df e8 a4 c5 01 00 <0f> 0b 66 90 66 66 66 66 90 55 48 89 e5 41 55 41 54 53 48 8b 47 RIP [] put_page+0x5c/0x60 RSP The root cause resides in get_any_page() which retries to get a refcount of the page to be soft-offlined. This function calls put_hwpoison_page(), expecting that the target page is putback to LRU list. But it can be also freed to buddy. So the second check need to care about such case. Fixes: af8fae7c0886 ("mm/memory-failure.c: clean up soft_offline_page()") Signed-off-by: Naoya Horiguchi Cc: Sasha Levin Cc: Aneesh Kumar K.V Cc: Vlastimil Babka Cc: Jerome Marchand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Steve Capper Cc: Johannes Weiner Cc: Michal Hocko Cc: Christoph Lameter Cc: David Rientjes Cc: [3.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9f48145c884fc..e26bc59d7dffa 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1557,7 +1557,7 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) * Did it turn free? */ ret = __get_any_page(page, pfn, 0); - if (!PageLRU(page)) { + if (ret == 1 && !PageLRU(page)) { /* Drop page reference which is from __get_any_page() */ put_page(page); pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", From 2c641f5b0c8e87d43235ce39890bcc4d0c7cd2fb Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 15 Jan 2016 16:57:49 -0800 Subject: [PATCH 1745/2091] memcg: only free spare array when readers are done [ Upstream commit 6611d8d76132f86faa501de9451a89bf23fb2371 ] A spare array holding mem cgroup threshold events is kept around to make sure we can always safely deregister an event and have an array to store the new set of events in. In the scenario where we're going from 1 to 0 registered events, the pointer to the primary array containing 1 event is copied to the spare slot, and then the spare slot is freed because no events are left. However, it is freed before calling synchronize_rcu(), which means readers may still be accessing threshold->primary after it is freed. Fixed by only freeing after synchronize_rcu(). Signed-off-by: Martijn Coenen Cc: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memcontrol.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 68dea90334cb9..aac1c98a9bc7c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3824,16 +3824,17 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, swap_buffers: /* Swap primary and spare array */ thresholds->spare = thresholds->primary; - /* If all events are unregistered, free the spare array */ - if (!new) { - kfree(thresholds->spare); - thresholds->spare = NULL; - } rcu_assign_pointer(thresholds->primary, new); /* To be sure that nobody uses thresholds */ synchronize_rcu(); + + /* If all events are unregistered, free the spare array */ + if (!new) { + kfree(thresholds->spare); + thresholds->spare = NULL; + } unlock: mutex_unlock(&memcg->thresholds_lock); } From 94ecf566322c74e13509e834605e71b332d2469f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 6 Nov 2015 16:32:58 -0800 Subject: [PATCH 1746/2091] panic: release stale console lock to always get the logbuf printed out [ Upstream commit 08d78658f393fefaa2e6507ea052c6f8ef4002a2 ] In some cases we may end up killing the CPU holding the console lock while still having valuable data in logbuf. E.g. I'm observing the following: - A crash is happening on one CPU and console_unlock() is being called on some other. - console_unlock() tries to print out the buffer before releasing the lock and on slow console it takes time. - in the meanwhile crashing CPU does lots of printk()-s with valuable data (which go to the logbuf) and sends IPIs to all other CPUs. - console_unlock() finishes printing previous chunk and enables interrupts before trying to print out the rest, the CPU catches the IPI and never releases console lock. This is not the only possible case: in VT/fb subsystems we have many other console_lock()/console_unlock() users. Non-masked interrupts (or receiving NMI in case of extreme slowness) will have the same result. Getting the whole console buffer printed out on crash should be top priority. [akpm@linux-foundation.org: tweak comment text] Signed-off-by: Vitaly Kuznetsov Cc: HATAYAMA Daisuke Cc: Masami Hiramatsu Cc: Jiri Kosina Cc: Baoquan He Cc: Prarit Bhargava Cc: Xie XiuQi Cc: Seth Jennings Cc: "K. Y. Srinivasan" Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/panic.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 8136ad76e5fd3..dae109ea89eba 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -23,6 +23,7 @@ #include #include #include +#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -146,6 +147,15 @@ void panic(const char *fmt, ...) bust_spinlocks(0); + /* + * We may have ended up stopping the CPU holding the lock (in + * smp_send_stop()) while still having some valuable data in the console + * buffer. Try to acquire the lock then release it regardless of the + * result. The release will also print the buffers out. + */ + console_trylock(); + console_unlock(); + if (!panic_blink) panic_blink = no_blink; From 0e19e24c3fe0abde8e2c5f4543616a251ccea6bf Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 20 Nov 2015 15:57:24 -0800 Subject: [PATCH 1747/2091] kernel/panic.c: turn off locks debug before releasing console lock [ Upstream commit 7625b3a0007decf2b135cb47ca67abc78a7b1bc1 ] Commit 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") introduced an unwanted bad unlock balance report when panic() is called directly and not from OOPS (e.g. from out_of_memory()). The difference is that in case of OOPS we disable locks debug in oops_enter() and on direct panic call nobody does that. Fixes: 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") Reported-by: kernel test robot Signed-off-by: Vitaly Kuznetsov Cc: HATAYAMA Daisuke Cc: Masami Hiramatsu Cc: Jiri Kosina Cc: Baoquan He Cc: Prarit Bhargava Cc: Xie XiuQi Cc: Seth Jennings Cc: "K. Y. Srinivasan" Cc: Jan Kara Cc: Petr Mladek Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/panic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index dae109ea89eba..466c16f8190d3 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -151,8 +151,11 @@ void panic(const char *fmt, ...) * We may have ended up stopping the CPU holding the lock (in * smp_send_stop()) while still having some valuable data in the console * buffer. Try to acquire the lock then release it regardless of the - * result. The release will also print the buffers out. + * result. The release will also print the buffers out. Locks debug + * should be disabled to avoid reporting bad unlock balance when + * panic() is not being callled from OOPS. */ + debug_locks_off(); console_trylock(); console_unlock(); From bc24ac15b0746172a8f603171352aa54abcf7c78 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Jan 2016 16:58:24 -0800 Subject: [PATCH 1748/2091] printk: do cond_resched() between lines while outputting to consoles [ Upstream commit 8d91f8b15361dfb438ab6eb3b319e2ded43458ff ] @console_may_schedule tracks whether console_sem was acquired through lock or trylock. If the former, we're inside a sleepable context and console_conditional_schedule() performs cond_resched(). This allows console drivers which use console_lock for synchronization to yield while performing time-consuming operations such as scrolling. However, the actual console outputting is performed while holding irq-safe logbuf_lock, so console_unlock() clears @console_may_schedule before starting outputting lines. Also, only a few drivers call console_conditional_schedule() to begin with. This means that when a lot of lines need to be output by console_unlock(), for example on a console registration, the task doing console_unlock() may not yield for a long time on a non-preemptible kernel. If this happens with a slow console devices, for example a serial console, the outputting task may occupy the cpu for a very long time. Long enough to trigger softlockup and/or RCU stall warnings, which in turn pile more messages, sometimes enough to trigger the next cycle of warnings incapacitating the system. Fix it by making console_unlock() insert cond_resched() between lines if @console_may_schedule. Signed-off-by: Tejun Heo Reported-by: Calvin Owens Acked-by: Jan Kara Cc: Dave Jones Cc: Kyle McMartin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/console.h | 1 + kernel/panic.c | 3 +-- kernel/printk/printk.c | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 9f50fb413c11c..901555a3886e9 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -149,6 +149,7 @@ extern int console_trylock(void); extern void console_unlock(void); extern void console_conditional_schedule(void); extern void console_unblank(void); +extern void console_flush_on_panic(void); extern struct tty_driver *console_device(int *); extern void console_stop(struct console *); extern void console_start(struct console *); diff --git a/kernel/panic.c b/kernel/panic.c index 466c16f8190d3..a4f7820f59302 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -156,8 +156,7 @@ void panic(const char *fmt, ...) * panic() is not being callled from OOPS. */ debug_locks_off(); - console_trylock(); - console_unlock(); + console_flush_on_panic(); if (!panic_blink) panic_blink = no_blink; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bff0169e1ad88..3c1aca0c35438 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2173,13 +2173,24 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; - bool retry; + bool do_cond_resched, retry; if (console_suspended) { up_console_sem(); return; } + /* + * Console drivers are called under logbuf_lock, so + * @console_may_schedule should be cleared before; however, we may + * end up dumping a lot of lines, for example, if called from + * console registration path, and should invoke cond_resched() + * between lines if allowable. Not doing so can cause a very long + * scheduling stall on a slow console leading to RCU stall and + * softlockup warnings which exacerbate the issue with more + * messages practically incapacitating the system. + */ + do_cond_resched = console_may_schedule; console_may_schedule = 0; /* flush buffered message fragment immediately to console */ @@ -2241,6 +2252,9 @@ void console_unlock(void) call_console_drivers(level, text, len); start_critical_timings(); local_irq_restore(flags); + + if (do_cond_resched) + cond_resched(); } console_locked = 0; @@ -2308,6 +2322,25 @@ void console_unblank(void) console_unlock(); } +/** + * console_flush_on_panic - flush console content on panic + * + * Immediately output all pending messages no matter what. + */ +void console_flush_on_panic(void) +{ + /* + * If someone else is holding the console lock, trylock will fail + * and may_schedule may be set. Ignore and proceed to unlock so + * that messages are flushed out. As this can be called from any + * context and we don't want to get preempted while flushing, + * ensure may_schedule is cleared. + */ + console_trylock(); + console_may_schedule = 0; + console_unlock(); +} + /* * Return the console tty driver structure and its associated index */ From 0571ba52a19e18a1c20469454231eef681cb1310 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 30 Dec 2015 11:47:53 +0800 Subject: [PATCH 1749/2091] crypto: af_alg - Disallow bind/setkey/... after accept(2) [ Upstream commit c840ac6af3f8713a71b4d2363419145760bd6044 ] Each af_alg parent socket obtained by socket(2) corresponds to a tfm object once bind(2) has succeeded. An accept(2) call on that parent socket creates a context which then uses the tfm object. Therefore as long as any child sockets created by accept(2) exist the parent socket must not be modified or freed. This patch guarantees this by using locks and a reference count on the parent socket. Any attempt to modify the parent socket will fail with EBUSY. Cc: stable@vger.kernel.org Reported-by: Dmitry Vyukov Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/af_alg.c | 35 ++++++++++++++++++++++++++++++++--- include/crypto/if_alg.h | 8 +++----- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f22cc56fd1b38..2cf64ae17aa8a 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -125,6 +125,23 @@ int af_alg_release(struct socket *sock) } EXPORT_SYMBOL_GPL(af_alg_release); +void af_alg_release_parent(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + bool last; + + sk = ask->parent; + ask = alg_sk(sk); + + lock_sock(sk); + last = !--ask->refcnt; + release_sock(sk); + + if (last) + sock_put(sk); +} +EXPORT_SYMBOL_GPL(af_alg_release_parent); + static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; @@ -132,6 +149,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_alg *sa = (void *)uaddr; const struct af_alg_type *type; void *private; + int err; if (sock->state == SS_CONNECTED) return -EINVAL; @@ -157,16 +175,22 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return PTR_ERR(private); } + err = -EBUSY; lock_sock(sk); + if (ask->refcnt) + goto unlock; swap(ask->type, type); swap(ask->private, private); + err = 0; + +unlock: release_sock(sk); alg_do_release(type, private); - return 0; + return err; } static int alg_setkey(struct sock *sk, char __user *ukey, @@ -199,11 +223,15 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; - int err = -ENOPROTOOPT; + int err = -EBUSY; lock_sock(sk); + if (ask->refcnt) + goto unlock; + type = ask->type; + err = -ENOPROTOOPT; if (level != SOL_ALG || !type) goto unlock; @@ -261,7 +289,8 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) sk2->sk_family = PF_ALG; - sock_hold(sk); + if (!ask->refcnt++) + sock_hold(sk); alg_sk(sk2)->parent = sk; alg_sk(sk2)->type = type; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 018afb264ac26..589716f2ee8a4 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -30,6 +30,8 @@ struct alg_sock { struct sock *parent; + unsigned int refcnt; + const struct af_alg_type *type; void *private; }; @@ -67,6 +69,7 @@ int af_alg_register_type(const struct af_alg_type *type); int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); +void af_alg_release_parent(struct sock *sk); int af_alg_accept(struct sock *sk, struct socket *newsock); int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); @@ -83,11 +86,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk) return (struct alg_sock *)sk; } -static inline void af_alg_release_parent(struct sock *sk) -{ - sock_put(alg_sk(sk)->parent); -} - static inline void af_alg_init_completion(struct af_alg_completion *completion) { init_completion(&completion->completion); From fa988b35c2e40f38e57388a1a3f48de056e81dd3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 30 Dec 2015 20:24:17 +0800 Subject: [PATCH 1750/2091] crypto: af_alg - Fix socket double-free when accept fails [ Upstream commit a383292c86663bbc31ac62cc0c04fc77504636a6 ] When we fail an accept(2) call we will end up freeing the socket twice, once due to the direct sk_free call and once again through newsock. This patch fixes this by removing the sk_free call. Cc: stable@vger.kernel.org Reported-by: Dmitry Vyukov Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/af_alg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 2cf64ae17aa8a..153dc85e04f1a 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -282,10 +282,8 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) security_sk_clone(sk, sk2); err = type->accept(ask->private, sk2); - if (err) { - sk_free(sk2); + if (err) goto unlock; - } sk2->sk_family = PF_ALG; From 92d76b5b2c3a29e14bc01a5e96d075afd4c6644f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jan 2016 13:35:18 +0900 Subject: [PATCH 1751/2091] crypto: af_alg - Add nokey compatibility path [ Upstream commit 37766586c965d63758ad542325a96d5384f4a8c9 ] This patch adds a compatibility path to support old applications that do acept(2) before setkey. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/af_alg.c | 13 ++++++++++++- include/crypto/if_alg.h | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 153dc85e04f1a..fef296cc91241 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -76,6 +76,8 @@ int af_alg_register_type(const struct af_alg_type *type) goto unlock; type->ops->owner = THIS_MODULE; + if (type->ops_nokey) + type->ops_nokey->owner = THIS_MODULE; node->type = type; list_add(&node->list, &alg_types); err = 0; @@ -264,6 +266,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) const struct af_alg_type *type; struct sock *sk2; int err; + bool nokey; lock_sock(sk); type = ask->type; @@ -282,12 +285,17 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) security_sk_clone(sk, sk2); err = type->accept(ask->private, sk2); + + nokey = err == -ENOKEY; + if (nokey && type->accept_nokey) + err = type->accept_nokey(ask->private, sk2); + if (err) goto unlock; sk2->sk_family = PF_ALG; - if (!ask->refcnt++) + if (nokey || !ask->refcnt++) sock_hold(sk); alg_sk(sk2)->parent = sk; alg_sk(sk2)->type = type; @@ -295,6 +303,9 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) newsock->ops = type->ops; newsock->state = SS_CONNECTED; + if (nokey) + newsock->ops = type->ops_nokey; + err = 0; unlock: diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 589716f2ee8a4..df8284415c565 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -52,9 +52,11 @@ struct af_alg_type { void (*release)(void *private); int (*setkey)(void *private, const u8 *key, unsigned int keylen); int (*accept)(void *private, struct sock *sk); + int (*accept_nokey)(void *private, struct sock *sk); int (*setauthsize)(void *private, unsigned int authsize); struct proto_ops *ops; + struct proto_ops *ops_nokey; struct module *owner; char name[14]; }; From c409087e6db988257adffb614616764116c7337e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 8 Jan 2016 21:28:26 +0800 Subject: [PATCH 1752/2091] crypto: hash - Add crypto_ahash_has_setkey [ Upstream commit a5596d6332787fd383b3b5427b41f94254430827 ] This patch adds a way for ahash users to determine whether a key is required by a crypto_ahash transform. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/ahash.c | 5 ++++- crypto/shash.c | 4 +++- include/crypto/hash.h | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 9c1dc8d6106a8..d19b52324cf52 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -451,6 +451,7 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) struct ahash_alg *alg = crypto_ahash_alg(hash); hash->setkey = ahash_nosetkey; + hash->has_setkey = false; hash->export = ahash_no_export; hash->import = ahash_no_import; @@ -463,8 +464,10 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) hash->finup = alg->finup ?: ahash_def_finup; hash->digest = alg->digest; - if (alg->setkey) + if (alg->setkey) { hash->setkey = alg->setkey; + hash->has_setkey = true; + } if (alg->export) hash->export = alg->export; if (alg->import) diff --git a/crypto/shash.c b/crypto/shash.c index 47c713954bf30..aa3e505045e0b 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -355,8 +355,10 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->finup = shash_async_finup; crt->digest = shash_async_digest; - if (alg->setkey) + if (alg->setkey) { crt->setkey = shash_async_setkey; + crt->has_setkey = true; + } if (alg->export) crt->export = shash_async_export; if (alg->import) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 98abda9ed3aa8..bbc59bdd6395f 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -199,6 +199,7 @@ struct crypto_ahash { unsigned int keylen); unsigned int reqsize; + bool has_setkey; struct crypto_tfm base; }; @@ -356,6 +357,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); +static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm) +{ + return tfm->has_setkey; +} + /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information From e1ed9a4b4380fc33c558a0c1bd7cbf527e891151 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 8 Jan 2016 21:31:04 +0800 Subject: [PATCH 1753/2091] crypto: algif_hash - Require setkey before accept(2) [ Upstream commit 6de62f15b581f920ade22d758f4c338311c2f0d4 ] Hash implementations that require a key may crash if you use them without setting a key. This patch adds the necessary checks so that if you do attempt to use them without a key that we return -ENOKEY instead of proceeding. This patch also adds a compatibility path to support old applications that do acept(2) before setkey. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_hash.c | 201 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 193 insertions(+), 8 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 1396ad0787fc6..b3df979f0ed1b 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -34,6 +34,11 @@ struct hash_ctx { struct ahash_request req; }; +struct algif_hash_tfm { + struct crypto_ahash *hash; + bool has_key; +}; + static int hash_sendmsg(struct socket *sock, struct msghdr *msg, size_t ignored) { @@ -227,22 +232,151 @@ static struct proto_ops algif_hash_ops = { .accept = hash_accept, }; +static int hash_check_key(struct socket *sock) +{ + int err; + struct sock *psk; + struct alg_sock *pask; + struct algif_hash_tfm *tfm; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + + if (ask->refcnt) + return 0; + + psk = ask->parent; + pask = alg_sk(ask->parent); + tfm = pask->private; + + err = -ENOKEY; + lock_sock(psk); + if (!tfm->has_key) + goto unlock; + + if (!pask->refcnt++) + sock_hold(psk); + + ask->refcnt = 1; + sock_put(psk); + + err = 0; + +unlock: + release_sock(psk); + + return err; +} + +static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t size) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_sendmsg(sock, msg, size); +} + +static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_sendpage(sock, page, offset, size, flags); +} + +static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_recvmsg(sock, msg, ignored, flags); +} + +static int hash_accept_nokey(struct socket *sock, struct socket *newsock, + int flags) +{ + int err; + + err = hash_check_key(sock); + if (err) + return err; + + return hash_accept(sock, newsock, flags); +} + +static struct proto_ops algif_hash_ops_nokey = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .setsockopt = sock_no_setsockopt, + .poll = sock_no_poll, + + .release = af_alg_release, + .sendmsg = hash_sendmsg_nokey, + .sendpage = hash_sendpage_nokey, + .recvmsg = hash_recvmsg_nokey, + .accept = hash_accept_nokey, +}; + static void *hash_bind(const char *name, u32 type, u32 mask) { - return crypto_alloc_ahash(name, type, mask); + struct algif_hash_tfm *tfm; + struct crypto_ahash *hash; + + tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); + if (!tfm) + return ERR_PTR(-ENOMEM); + + hash = crypto_alloc_ahash(name, type, mask); + if (IS_ERR(hash)) { + kfree(tfm); + return ERR_CAST(hash); + } + + tfm->hash = hash; + + return tfm; } static void hash_release(void *private) { - crypto_free_ahash(private); + struct algif_hash_tfm *tfm = private; + + crypto_free_ahash(tfm->hash); + kfree(tfm); } static int hash_setkey(void *private, const u8 *key, unsigned int keylen) { - return crypto_ahash_setkey(private, key, keylen); + struct algif_hash_tfm *tfm = private; + int err; + + err = crypto_ahash_setkey(tfm->hash, key, keylen); + tfm->has_key = !err; + + return err; } -static void hash_sock_destruct(struct sock *sk) +static void hash_sock_destruct_common(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; @@ -250,15 +384,40 @@ static void hash_sock_destruct(struct sock *sk) sock_kzfree_s(sk, ctx->result, crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req))); sock_kfree_s(sk, ctx, ctx->len); +} + +static void hash_sock_destruct(struct sock *sk) +{ + hash_sock_destruct_common(sk); af_alg_release_parent(sk); } -static int hash_accept_parent(void *private, struct sock *sk) +static void hash_release_parent_nokey(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + + if (!ask->refcnt) { + sock_put(ask->parent); + return; + } + + af_alg_release_parent(sk); +} + +static void hash_sock_destruct_nokey(struct sock *sk) +{ + hash_sock_destruct_common(sk); + hash_release_parent_nokey(sk); +} + +static int hash_accept_parent_common(void *private, struct sock *sk) { struct hash_ctx *ctx; struct alg_sock *ask = alg_sk(sk); - unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private); - unsigned ds = crypto_ahash_digestsize(private); + struct algif_hash_tfm *tfm = private; + struct crypto_ahash *hash = tfm->hash; + unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash); + unsigned ds = crypto_ahash_digestsize(hash); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) @@ -278,7 +437,7 @@ static int hash_accept_parent(void *private, struct sock *sk) ask->private = ctx; - ahash_request_set_tfm(&ctx->req, private); + ahash_request_set_tfm(&ctx->req, hash); ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); @@ -287,12 +446,38 @@ static int hash_accept_parent(void *private, struct sock *sk) return 0; } +static int hash_accept_parent(void *private, struct sock *sk) +{ + struct algif_hash_tfm *tfm = private; + + if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash)) + return -ENOKEY; + + return hash_accept_parent_common(private, sk); +} + +static int hash_accept_parent_nokey(void *private, struct sock *sk) +{ + int err; + + err = hash_accept_parent_common(private, sk); + if (err) + goto out; + + sk->sk_destruct = hash_sock_destruct_nokey; + +out: + return err; +} + static const struct af_alg_type algif_type_hash = { .bind = hash_bind, .release = hash_release, .setkey = hash_setkey, .accept = hash_accept_parent, + .accept_nokey = hash_accept_parent_nokey, .ops = &algif_hash_ops, + .ops_nokey = &algif_hash_ops_nokey, .name = "hash", .owner = THIS_MODULE }; From 99214a2ff7f6faef389659e93dafdf41ae86a72b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 13 Jan 2016 14:59:03 +0800 Subject: [PATCH 1754/2091] crypto: af_alg - Allow af_af_alg_release_parent to be called on nokey path [ Upstream commit 6a935170a980024dd29199e9dbb5c4da4767a1b9 ] This patch allows af_alg_release_parent to be called even for nokey sockets. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/af_alg.c | 9 ++++++++- include/crypto/if_alg.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index fef296cc91241..63acec501aaa0 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -133,6 +133,12 @@ void af_alg_release_parent(struct sock *sk) bool last; sk = ask->parent; + + if (ask->nokey_refcnt && !ask->refcnt) { + sock_put(sk); + return; + } + ask = alg_sk(sk); lock_sock(sk); @@ -265,8 +271,8 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; struct sock *sk2; + unsigned int nokey; int err; - bool nokey; lock_sock(sk); type = ask->type; @@ -299,6 +305,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) sock_hold(sk); alg_sk(sk2)->parent = sk; alg_sk(sk2)->type = type; + alg_sk(sk2)->nokey_refcnt = nokey; newsock->ops = type->ops; newsock->state = SS_CONNECTED; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index df8284415c565..a2bfd7843f18f 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -31,6 +31,7 @@ struct alg_sock { struct sock *parent; unsigned int refcnt; + unsigned int nokey_refcnt; const struct af_alg_type *type; void *private; From 279792e1f4f66d719fe9ff7c720dd600d9880fbf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 13 Jan 2016 15:00:36 +0800 Subject: [PATCH 1755/2091] crypto: algif_hash - Remove custom release parent function [ Upstream commit f1d84af1835846a5a2b827382c5848faf2bb0e75 ] This patch removes the custom release parent function as the generic af_alg_release_parent now works for nokey sockets too. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_hash.c | 43 +++---------------------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index b3df979f0ed1b..1867020987cfd 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -376,7 +376,7 @@ static int hash_setkey(void *private, const u8 *key, unsigned int keylen) return err; } -static void hash_sock_destruct_common(struct sock *sk) +static void hash_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; @@ -384,33 +384,10 @@ static void hash_sock_destruct_common(struct sock *sk) sock_kzfree_s(sk, ctx->result, crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req))); sock_kfree_s(sk, ctx, ctx->len); -} - -static void hash_sock_destruct(struct sock *sk) -{ - hash_sock_destruct_common(sk); - af_alg_release_parent(sk); -} - -static void hash_release_parent_nokey(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - - if (!ask->refcnt) { - sock_put(ask->parent); - return; - } - af_alg_release_parent(sk); } -static void hash_sock_destruct_nokey(struct sock *sk) -{ - hash_sock_destruct_common(sk); - hash_release_parent_nokey(sk); -} - -static int hash_accept_parent_common(void *private, struct sock *sk) +static int hash_accept_parent_nokey(void *private, struct sock *sk) { struct hash_ctx *ctx; struct alg_sock *ask = alg_sk(sk); @@ -453,21 +430,7 @@ static int hash_accept_parent(void *private, struct sock *sk) if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash)) return -ENOKEY; - return hash_accept_parent_common(private, sk); -} - -static int hash_accept_parent_nokey(void *private, struct sock *sk) -{ - int err; - - err = hash_accept_parent_common(private, sk); - if (err) - goto out; - - sk->sk_destruct = hash_sock_destruct_nokey; - -out: - return err; + return hash_accept_parent_nokey(private, sk); } static const struct af_alg_type algif_type_hash = { From 8515819fff8fce56be067af985016b6356d1fe5e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 13 Jan 2016 15:03:32 +0800 Subject: [PATCH 1756/2091] crypto: af_alg - Forbid bind(2) when nokey child sockets are present [ Upstream commit a6a48c565f6f112c6983e2a02b1602189ed6e26e ] This patch forbids the calling of bind(2) when there are child sockets created by accept(2) in existence, even if they are created on the nokey path. This is needed as those child sockets have references to the tfm object which bind(2) will destroy. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/af_alg.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 63acec501aaa0..9641b74b53ef1 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -130,19 +130,16 @@ EXPORT_SYMBOL_GPL(af_alg_release); void af_alg_release_parent(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); - bool last; + unsigned int nokey = ask->nokey_refcnt; + bool last = nokey && !ask->refcnt; sk = ask->parent; - - if (ask->nokey_refcnt && !ask->refcnt) { - sock_put(sk); - return; - } - ask = alg_sk(sk); lock_sock(sk); - last = !--ask->refcnt; + ask->nokey_refcnt -= nokey; + if (!last) + last = !--ask->refcnt; release_sock(sk); if (last) @@ -185,7 +182,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EBUSY; lock_sock(sk); - if (ask->refcnt) + if (ask->refcnt | ask->nokey_refcnt) goto unlock; swap(ask->type, type); @@ -303,6 +300,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (nokey || !ask->refcnt++) sock_hold(sk); + ask->nokey_refcnt += nokey; alg_sk(sk2)->parent = sk; alg_sk(sk2)->type = type; alg_sk(sk2)->nokey_refcnt = nokey; From 3a1e81ad84e4d880b00ecf7ad8d03b9b772ddfa7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 Jan 2016 22:01:08 +0800 Subject: [PATCH 1757/2091] crypto: algif_hash - Fix race condition in hash_check_key [ Upstream commit ad46d7e33219218605ea619e32553daf4f346b9f ] We need to lock the child socket in hash_check_key as otherwise two simultaneous calls can cause the parent socket to be freed. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_hash.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 1867020987cfd..7b5018b34c97c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -234,22 +234,23 @@ static struct proto_ops algif_hash_ops = { static int hash_check_key(struct socket *sock) { - int err; + int err = 0; struct sock *psk; struct alg_sock *pask; struct algif_hash_tfm *tfm; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); + lock_sock(sk); if (ask->refcnt) - return 0; + goto unlock_child; psk = ask->parent; pask = alg_sk(ask->parent); tfm = pask->private; err = -ENOKEY; - lock_sock(psk); + lock_sock_nested(psk, SINGLE_DEPTH_NESTING); if (!tfm->has_key) goto unlock; @@ -263,6 +264,8 @@ static int hash_check_key(struct socket *sock) unlock: release_sock(psk); +unlock_child: + release_sock(sk); return err; } From 40ced0bb2cd8bd17c4f1061176c2ca1db71ec667 Mon Sep 17 00:00:00 2001 From: Songjun Wu Date: Mon, 18 Jan 2016 11:14:44 +0100 Subject: [PATCH 1758/2091] dmaengine: at_xdmac: fix resume for cyclic transfers [ Upstream commit 611dcadb01c89d1d3521450c05a4ded332e5a32d ] When having cyclic transfers, the channel was paused when performing suspend but was not correctly resumed. Signed-off-by: Songjun Wu Signed-off-by: Ludovic Desroches Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Cc: # 4.1 and later Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/at_xdmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index c89a7abb523fa..8d8c35623f2a8 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1230,6 +1230,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) at_xdmac_remove_xfer(atchan, desc); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1362,6 +1363,8 @@ static int atmel_xdmac_resume(struct device *dev) atchan = to_at_xdmac_chan(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_resume(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); From 34d751ee6985e545e5de6d2c626e33001ea5fc5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Jan 2016 10:45:00 +0100 Subject: [PATCH 1759/2091] ARM: debug-ll: fix BCM63xx entry for multiplatform [ Upstream commit 6c54809977de3c9e2ef9e9934a2c6625f7e161e7 ] During my randconfig build testing, I found that a kernel with DEBUG_AT91_UART and ARCH_BCM_63XX fails to build: arch/arm/include/debug/at91.S:18:0: error: "CONFIG_DEBUG_UART_VIRT" redefined [-Werror] It turns out that the DEBUG_UART_BCM63XX option is enabled whenever the ARCH_BCM_63XX is, and that breaks multiplatform kernels because we then end up using the UART address from BCM63XX rather than the one we actually configured (if any). This changes the BCM63XX options to only have one Kconfig option, and only enable that if the user explicitly turns it on. Signed-off-by: Arnd Bergmann Fixes: b51312bebfa4 ("ARM: BCM63XX: add low-level UART debug support") Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- arch/arm/Kconfig.debug | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 0c12ffb155a23..f775d7161ffb9 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -161,10 +161,9 @@ choice mobile SoCs in the Kona family of chips (e.g. bcm28155, bcm11351, etc...) - config DEBUG_BCM63XX + config DEBUG_BCM63XX_UART bool "Kernel low-level debugging on BCM63XX UART" depends on ARCH_BCM_63XX - select DEBUG_UART_BCM63XX config DEBUG_BERLIN_UART bool "Marvell Berlin SoC Debug UART" @@ -1304,7 +1303,7 @@ config DEBUG_LL_INCLUDE default "debug/vf.S" if DEBUG_VF_UART default "debug/vt8500.S" if DEBUG_VT8500_UART0 default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1 - default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX + default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0 default "mach/debug-macro.S" @@ -1320,10 +1319,6 @@ config DEBUG_UART_8250 ARCH_IOP33X || ARCH_IXP4XX || \ ARCH_LPC32XX || ARCH_MV78XX0 || ARCH_ORION5X || ARCH_RPC -# Compatibility options for BCM63xx -config DEBUG_UART_BCM63XX - def_bool ARCH_BCM_63XX - config DEBUG_UART_PHYS hex "Physical base address of debug UART" default 0x00100a00 if DEBUG_NETX_UART @@ -1415,7 +1410,7 @@ config DEBUG_UART_PHYS default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1 default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2 default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3 - default 0xfffe8600 if DEBUG_UART_BCM63XX + default 0xfffe8600 if DEBUG_BCM63XX_UART default 0xfffff700 if ARCH_IOP33X depends on ARCH_EP93XX || \ DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ @@ -1427,7 +1422,7 @@ config DEBUG_UART_PHYS DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF2 || \ DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \ DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \ - DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \ + DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \ DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 config DEBUG_UART_VIRT @@ -1466,7 +1461,7 @@ config DEBUG_UART_VIRT default 0xfb009000 if DEBUG_REALVIEW_STD_PORT default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT default 0xfc40ab00 if DEBUG_BRCMSTB_UART - default 0xfcfe8600 if DEBUG_UART_BCM63XX + default 0xfcfe8600 if DEBUG_BCM63XX_UART default 0xfd000000 if ARCH_SPEAR3XX || ARCH_SPEAR6XX default 0xfd000000 if ARCH_SPEAR13XX default 0xfd012000 if ARCH_MV78XX0 @@ -1516,7 +1511,7 @@ config DEBUG_UART_VIRT DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \ DEBUG_NETX_UART || \ DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \ - DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \ + DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \ DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 config DEBUG_UART_8250_SHIFT From 4126014e7cd0d5e667953610139b20388910cbbd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 19 Jan 2016 08:28:10 +1100 Subject: [PATCH 1760/2091] xfs: log mount failures don't wait for buffers to be released [ Upstream commit 85bec5460ad8e05e0a8d70fb0f6750eb719ad092 ] Recently I've been seeing xfs/051 fail on 1k block size filesystems. Trying to trace the events during the test lead to the problem going away, indicating that it was a race condition that lead to this ASSERT failure: XFS: Assertion failed: atomic_read(&pag->pag_ref) == 0, file: fs/xfs/xfs_mount.c, line: 156 ..... [] xfs_free_perag+0x87/0xb0 [] xfs_mountfs+0x4d9/0x900 [] xfs_fs_fill_super+0x3bf/0x4d0 [] mount_bdev+0x180/0x1b0 [] xfs_fs_mount+0x15/0x20 [] mount_fs+0x38/0x170 [] vfs_kern_mount+0x67/0x120 [] do_mount+0x218/0xd60 [] SyS_mount+0x8b/0xd0 When I finally caught it with tracing enabled, I saw that AG 2 had an elevated reference count and a buffer was responsible for it. I tracked down the specific buffer, and found that it was missing the final reference count release that would put it back on the LRU and hence be found by xfs_wait_buftarg() calls in the log mount failure handling. The last four traces for the buffer before the assert were (trimmed for relevance) kworker/0:1-5259 xfs_buf_iodone: hold 2 lock 0 flags ASYNC kworker/0:1-5259 xfs_buf_ioerror: hold 2 lock 0 error -5 mount-7163 xfs_buf_lock_done: hold 2 lock 0 flags ASYNC mount-7163 xfs_buf_unlock: hold 2 lock 1 flags ASYNC This is an async write that is completing, so there's nobody waiting for it directly. Hence we call xfs_buf_relse() once all the processing is complete. That does: static inline void xfs_buf_relse(xfs_buf_t *bp) { xfs_buf_unlock(bp); xfs_buf_rele(bp); } Now, it's clear that mount is waiting on the buffer lock, and that it has been released by xfs_buf_relse() and gained by mount. This is expected, because at this point the mount process is in xfs_buf_delwri_submit() waiting for all the IO it submitted to complete. The mount process, however, is waiting on the lock for the buffer because it is in xfs_buf_delwri_submit(). This waits for IO completion, but it doesn't wait for the buffer reference owned by the IO to go away. The mount process collects all the completions, fails the log recovery, and the higher level code then calls xfs_wait_buftarg() to free all the remaining buffers in the filesystem. The issue is that on unlocking the buffer, the scheduler has decided that the mount process has higher priority than the the kworker thread that is running the IO completion, and so immediately switched contexts to the mount process from the semaphore unlock code, hence preventing the kworker thread from finishing the IO completion and releasing the IO reference to the buffer. Hence by the time that xfs_wait_buftarg() is run, the buffer still has an active reference and so isn't on the LRU list that the function walks to free the remaining buffers. Hence we miss that buffer and continue onwards to tear down the mount structures, at which time we get find a stray reference count on the perag structure. On a non-debug kernel, this will be ignored and the structure torn down and freed. Hence when the kworker thread is then rescheduled and the buffer released and freed, it will access a freed perag structure. The problem here is that when the log mount fails, we still need to quiesce the log to ensure that the IO workqueues have returned to idle before we run xfs_wait_buftarg(). By synchronising the workqueues, we ensure that all IO completions are fully processed, not just to the point where buffers have been unlocked. This ensures we don't end up in the situation above. cc: # 3.18 Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Sasha Levin --- fs/xfs/xfs_buf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 82938ac05ba1d..7dd64bf98c566 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1529,6 +1529,16 @@ xfs_wait_buftarg( LIST_HEAD(dispose); int loop = 0; + /* + * We need to flush the buffer workqueue to ensure that all IO + * completion processing is 100% done. Just waiting on buffer locks is + * not sufficient for async IO as the reference count held over IO is + * not released until after the buffer lock is dropped. Hence we need to + * ensure here that all reference counts have been dropped before we + * start walking the LRU list. + */ + drain_workqueue(btp->bt_mount->m_buf_workqueue); + /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, From 5d545a70cca8594f6120a9e3d70187920e62ce77 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 18 Jan 2016 17:06:05 +0100 Subject: [PATCH 1761/2091] crypto: crc32c - Fix crc32c soft dependency [ Upstream commit fd7f6727102a1ccf6b4c1dfcc631f9b546526b26 ] I don't think it makes sense for a module to have a soft dependency on itself. This seems quite cyclic by nature and I can't see what purpose it could serve. OTOH libcrc32c calls crypto_alloc_shash("crc32c", 0, 0) so it pretty much assumes that some incarnation of the "crc32c" hash algorithm has been loaded. Therefore it makes sense to have the soft dependency there (as crc-t10dif does.) Cc: stable@vger.kernel.org Cc: Tim Chen Cc: "David S. Miller" Signed-off-by: Jean Delvare Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/crc32c_generic.c | 1 - lib/libcrc32c.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 06f1b60f02b22..4c0a0e2718769 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -172,4 +172,3 @@ MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crc32c"); MODULE_ALIAS_CRYPTO("crc32c-generic"); -MODULE_SOFTDEP("pre: crc32c"); diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 6a08ce7d6adc0..acf9da449f816 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -74,3 +74,4 @@ module_exit(libcrc32c_mod_fini); MODULE_AUTHOR("Clay Haapala "); MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32c"); From dfaff0ef0343ba85ba3bac69a6547701425c0242 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 7 Jan 2016 16:44:10 -0500 Subject: [PATCH 1762/2091] IB/qib: fix mcast detach when qp not attached [ Upstream commit 09dc9cd6528f5b52bcbd3292a6312e762c85260f ] The code produces the following trace: [1750924.419007] general protection fault: 0000 [#3] SMP [1750924.420364] Modules linked in: nfnetlink autofs4 rpcsec_gss_krb5 nfsv4 dcdbas rfcomm bnep bluetooth nfsd auth_rpcgss nfs_acl dm_multipath nfs lockd scsi_dh sunrpc fscache radeon ttm drm_kms_helper drm serio_raw parport_pc ppdev i2c_algo_bit lpc_ich ipmi_si ib_mthca ib_qib dca lp parport ib_ipoib mac_hid ib_cm i3000_edac ib_sa ib_uverbs edac_core ib_umad ib_mad ib_core ib_addr tg3 ptp dm_mirror dm_region_hash dm_log psmouse pps_core [1750924.420364] CPU: 1 PID: 8401 Comm: python Tainted: G D 3.13.0-39-generic #66-Ubuntu [1750924.420364] Hardware name: Dell Computer Corporation PowerEdge 860/0XM089, BIOS A04 07/24/2007 [1750924.420364] task: ffff8800366a9800 ti: ffff88007af1c000 task.ti: ffff88007af1c000 [1750924.420364] RIP: 0010:[] [] qib_mcast_qp_free+0x11/0x50 [ib_qib] [1750924.420364] RSP: 0018:ffff88007af1dd70 EFLAGS: 00010246 [1750924.420364] RAX: 0000000000000001 RBX: ffff88007b822688 RCX: 000000000000000f [1750924.420364] RDX: ffff88007b822688 RSI: ffff8800366c15a0 RDI: 6764697200000000 [1750924.420364] RBP: ffff88007af1dd78 R08: 0000000000000001 R09: 0000000000000000 [1750924.420364] R10: 0000000000000011 R11: 0000000000000246 R12: ffff88007baa1d98 [1750924.420364] R13: ffff88003ecab000 R14: ffff88007b822660 R15: 0000000000000000 [1750924.420364] FS: 00007ffff7fd8740(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 [1750924.420364] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1750924.420364] CR2: 00007ffff597c750 CR3: 000000006860b000 CR4: 00000000000007e0 [1750924.420364] Stack: [1750924.420364] ffff88007b822688 ffff88007af1ddf0 ffffffffa0132429 000000007af1de20 [1750924.420364] ffff88007baa1dc8 ffff88007baa0000 ffff88007af1de70 ffffffffa00cb313 [1750924.420364] 00007fffffffde88 0000000000000000 0000000000000008 ffff88003ecab000 [1750924.420364] Call Trace: [1750924.420364] [] qib_multicast_detach+0x1e9/0x350 [ib_qib] [1750924.568035] [] ? ib_uverbs_modify_qp+0x323/0x3d0 [ib_uverbs] [1750924.568035] [] ib_detach_mcast+0x31/0x50 [ib_core] [1750924.568035] [] ib_uverbs_detach_mcast+0x93/0x170 [ib_uverbs] [1750924.568035] [] ib_uverbs_write+0xc6/0x2c0 [ib_uverbs] [1750924.568035] [] ? apparmor_file_permission+0x18/0x20 [1750924.568035] [] ? security_file_permission+0x23/0xa0 [1750924.568035] [] vfs_write+0xb4/0x1f0 [1750924.568035] [] SyS_write+0x49/0xa0 [1750924.568035] [] system_call_fastpath+0x1a/0x1f [1750924.568035] Code: 66 2e 0f 1f 84 00 00 00 00 00 31 c0 5d c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 8b 7f 10 ff 8f 40 01 00 00 74 0e 48 89 df e8 8e f8 06 e1 5b 5d c3 0f [1750924.568035] RIP [] qib_mcast_qp_free+0x11/0x50 [ib_qib] [1750924.568035] RSP [1750924.650439] ---[ end trace 73d5d4b3f8ad4851 ] The fix is to note the qib_mcast_qp that was found. If none is found, then return EINVAL indicating the error. Cc: Reviewed-by: Dennis Dalessandro Reported-by: Jason Gunthorpe Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qib/qib_verbs_mcast.c | 35 +++++++++------------ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index f8ea069a3eafc..b2fb5286dbd98 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; - struct qib_mcast_qp *p, *tmp; + struct qib_mcast_qp *p, *tmp, *delp = NULL; struct rb_node *n; int last = 0; int ret; - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; spin_lock_irq(&ibp->lock); @@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) while (1) { if (n == NULL) { spin_unlock_irq(&ibp->lock); - ret = -EINVAL; - goto bail; + return -EINVAL; } mcast = rb_entry(n, struct qib_mcast, rb_node); @@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) */ list_del_rcu(&p->list); mcast->n_attached--; + delp = p; /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { @@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(delp); - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - qib_mcast_qp_free(p); - } if (last) { atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); @@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) dev->n_mcast_grps_allocated--; spin_unlock_irq(&dev->n_mcast_grps_lock); } - - ret = 0; - -bail: - return ret; + return 0; } int qib_mcast_tree_empty(struct qib_ibport *ibp) From 34d7d523ecaa8c7a6857f97fd251e95e628aa17e Mon Sep 17 00:00:00 2001 From: Vinit Agnihotri Date: Mon, 11 Jan 2016 12:57:25 -0500 Subject: [PATCH 1763/2091] IB/qib: Support creating qps with GFP_NOIO flag [ Upstream commit fbbeb8632bf0b46ab44cfcedc4654cd7831b7161 ] The current code is problematic when the QP creation and ipoib is used to support NFS and NFS desires to do IO for paging purposes. In that case, the GFP_KERNEL allocation in qib_qp.c causes a deadlock in tight memory situations. This fix adds support to create queue pair with GFP_NOIO flag for connected mode only to cleanly fail the create queue pair in those situations. Cc: # 3.16+ Reviewed-by: Mike Marciniszyn Signed-off-by: Vinit Agnihotri Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qib/qib_qp.c | 46 +++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 4fa88ba2963e6..131994382b220 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -100,9 +100,10 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct qpn_map *map; @@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, size_t sz; size_t sg_list_sz; struct ib_qp *ret; + gfp_t gfp; + if (init_attr->cap.max_send_sge > ib_qib_max_sges || init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || - init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + return ERR_PTR(-EINVAL); + + /* GFP_NOIO is applicable in RC QPs only */ + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { @@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, sz = sizeof(struct qib_sge) * init_attr->cap.max_send_sge + sizeof(struct qib_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; @@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp); if (!qp->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct qib_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + - qp->r_rq.size * sz); + if (gfp != GFP_NOIO) + qp->r_rq.wq = vmalloc_user( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + else + qp->r_rq.wq = __vmalloc( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); From c98b306e392edc0898a1dd715761998222c19676 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 9 Dec 2015 21:12:52 -0500 Subject: [PATCH 1764/2091] ideapad-laptop: Add Lenovo ideapad Y700-17ISK to no_hw_rfkill dmi list [ Upstream commit edde316acb5f07c04abf09a92f59db5d2efd14e2 ] One of the newest ideapad models also lacks a physical hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always reported blocking breaking wifi. Fix it by adding this model to the DMI list. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1286293 Cc: stable@vger.kernel.org Signed-off-by: Josh Boyer Signed-off-by: Darren Hart Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index cd78f1166b33d..32b58b4ea2e6e 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -844,6 +844,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"), }, }, + { + .ident = "Lenovo ideapad Y700-17ISK", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-17ISK"), + }, + }, { .ident = "Lenovo Yoga 2 11 / 13 / Pro", .matches = { From f3eaec38506470d74a642b9aa518ed1fd6849b57 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 19 Nov 2015 14:15:51 +0100 Subject: [PATCH 1765/2091] btrfs: put delayed item hook into inode [ Upstream commit 8089fe62c6603860f6796ca80519b92391292f21 ] Inodes for delayed iput allocate a trivial helper structure, let's place the list hook directly into the inode and save a kmalloc (killing a __GFP_NOFAIL as a bonus) at the cost of increasing size of btrfs_inode. The inode can be put into the delayed_iputs list more than once and we have to keep the count. This means we can't use the list_splice to process a bunch of inodes because we'd lost track of the count if the inode is put into the delayed iputs again while it's processed. Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/btrfs_inode.h | 4 +++ fs/btrfs/inode.c | 56 +++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae26..61205e3bbefac 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -192,6 +192,10 @@ struct btrfs_inode { /* File creation time. */ struct timespec i_otime; + /* Hook into fs_info->delayed_iputs */ + struct list_head delayed_iput; + long delayed_iput_count; + struct inode vfs_inode; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5136c73b3dce7..7b898f5be77d5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3080,55 +3080,47 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, start, (size_t)(end - start + 1)); } -struct delayed_iput { - struct list_head list; - struct inode *inode; -}; - -/* JDM: If this is fs-wide, why can't we add a pointer to - * btrfs_inode instead and avoid the allocation? */ void btrfs_add_delayed_iput(struct inode *inode) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; - struct delayed_iput *delayed; + struct btrfs_inode *binode = BTRFS_I(inode); if (atomic_add_unless(&inode->i_count, -1, 1)) return; - delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); - delayed->inode = inode; - spin_lock(&fs_info->delayed_iput_lock); - list_add_tail(&delayed->list, &fs_info->delayed_iputs); + if (binode->delayed_iput_count == 0) { + ASSERT(list_empty(&binode->delayed_iput)); + list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); + } else { + binode->delayed_iput_count++; + } spin_unlock(&fs_info->delayed_iput_lock); } void btrfs_run_delayed_iputs(struct btrfs_root *root) { - LIST_HEAD(list); struct btrfs_fs_info *fs_info = root->fs_info; - struct delayed_iput *delayed; - int empty; - - spin_lock(&fs_info->delayed_iput_lock); - empty = list_empty(&fs_info->delayed_iputs); - spin_unlock(&fs_info->delayed_iput_lock); - if (empty) - return; down_read(&fs_info->delayed_iput_sem); - spin_lock(&fs_info->delayed_iput_lock); - list_splice_init(&fs_info->delayed_iputs, &list); - spin_unlock(&fs_info->delayed_iput_lock); - - while (!list_empty(&list)) { - delayed = list_entry(list.next, struct delayed_iput, list); - list_del(&delayed->list); - iput(delayed->inode); - kfree(delayed); + while (!list_empty(&fs_info->delayed_iputs)) { + struct btrfs_inode *inode; + + inode = list_first_entry(&fs_info->delayed_iputs, + struct btrfs_inode, delayed_iput); + if (inode->delayed_iput_count) { + inode->delayed_iput_count--; + list_move_tail(&inode->delayed_iput, + &fs_info->delayed_iputs); + } else { + list_del_init(&inode->delayed_iput); + } + spin_unlock(&fs_info->delayed_iput_lock); + iput(&inode->vfs_inode); + spin_lock(&fs_info->delayed_iput_lock); } - + spin_unlock(&fs_info->delayed_iput_lock); up_read(&root->fs_info->delayed_iput_sem); } @@ -8890,6 +8882,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->dir_index = 0; ei->last_unlink_trans = 0; ei->last_log_commit = 0; + ei->delayed_iput_count = 0; spin_lock_init(&ei->lock); ei->outstanding_extents = 0; @@ -8914,6 +8907,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->delalloc_inodes); + INIT_LIST_HEAD(&ei->delayed_iput); RB_CLEAR_NODE(&ei->rb_node); return inode; From e004a097257612853c63b3f47726fe04981e94c2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 15 Jan 2016 11:05:12 +0000 Subject: [PATCH 1766/2091] Btrfs: fix deadlock running delayed iputs at transaction commit time [ Upstream commit c2d6cb1636d235257086f939a8194ef0bf93af6e ] While running a stress test I ran into a deadlock when running the delayed iputs at transaction time, which produced the following report and trace: [ 886.399989] ============================================= [ 886.400871] [ INFO: possible recursive locking detected ] [ 886.401663] 4.4.0-rc6-btrfs-next-18+ #1 Not tainted [ 886.402384] --------------------------------------------- [ 886.403182] fio/8277 is trying to acquire lock: [ 886.403568] (&fs_info->delayed_iput_sem){++++..}, at: [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.403568] [ 886.403568] but task is already holding lock: [ 886.403568] (&fs_info->delayed_iput_sem){++++..}, at: [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.403568] [ 886.403568] other info that might help us debug this: [ 886.403568] Possible unsafe locking scenario: [ 886.403568] [ 886.403568] CPU0 [ 886.403568] ---- [ 886.403568] lock(&fs_info->delayed_iput_sem); [ 886.403568] lock(&fs_info->delayed_iput_sem); [ 886.403568] [ 886.403568] *** DEADLOCK *** [ 886.403568] [ 886.403568] May be due to missing lock nesting notation [ 886.403568] [ 886.403568] 3 locks held by fio/8277: [ 886.403568] #0: (sb_writers#11){.+.+.+}, at: [] __sb_start_write+0x5f/0xb0 [ 886.403568] #1: (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [] btrfs_file_write_iter+0x73/0x408 [btrfs] [ 886.403568] #2: (&fs_info->delayed_iput_sem){++++..}, at: [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.403568] [ 886.403568] stack backtrace: [ 886.403568] CPU: 6 PID: 8277 Comm: fio Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [ 886.403568] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [ 886.403568] 0000000000000000 ffff88009f80f770 ffffffff8125d4fd ffffffff82af1fc0 [ 886.403568] ffff88009f80f830 ffffffff8108e5f9 0000000200000000 ffff88009fd92290 [ 886.403568] 0000000000000000 ffffffff82af1fc0 ffffffff829cfb01 00042b216d008804 [ 886.403568] Call Trace: [ 886.403568] [] dump_stack+0x4e/0x79 [ 886.403568] [] __lock_acquire+0xd42/0xf0b [ 886.403568] [] ? __module_address+0xdf/0x108 [ 886.403568] [] lock_acquire+0x10d/0x194 [ 886.403568] [] ? lock_acquire+0x10d/0x194 [ 886.403568] [] ? btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.489542] [] down_read+0x3e/0x4d [ 886.489542] [] ? btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.489542] [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.489542] [] btrfs_commit_transaction+0x8f5/0x96e [btrfs] [ 886.489542] [] flush_space+0x435/0x44a [btrfs] [ 886.489542] [] ? reserve_metadata_bytes+0x26a/0x384 [btrfs] [ 886.489542] [] reserve_metadata_bytes+0x28d/0x384 [btrfs] [ 886.489542] [] ? btrfs_block_rsv_refill+0x58/0x96 [btrfs] [ 886.489542] [] btrfs_block_rsv_refill+0x70/0x96 [btrfs] [ 886.489542] [] btrfs_evict_inode+0x394/0x55a [btrfs] [ 886.489542] [] evict+0xa7/0x15c [ 886.489542] [] iput+0x1d3/0x266 [ 886.489542] [] btrfs_run_delayed_iputs+0x8f/0xbf [btrfs] [ 886.489542] [] btrfs_commit_transaction+0x8f5/0x96e [btrfs] [ 886.489542] [] ? signal_pending_state+0x31/0x31 [ 886.489542] [] btrfs_alloc_data_chunk_ondemand+0x1d7/0x288 [btrfs] [ 886.489542] [] btrfs_check_data_free_space+0x40/0x59 [btrfs] [ 886.489542] [] btrfs_delalloc_reserve_space+0x1e/0x4e [btrfs] [ 886.489542] [] btrfs_direct_IO+0x10c/0x27e [btrfs] [ 886.489542] [] generic_file_direct_write+0xb3/0x128 [ 886.489542] [] btrfs_file_write_iter+0x229/0x408 [btrfs] [ 886.489542] [] ? __lock_is_held+0x38/0x50 [ 886.489542] [] __vfs_write+0x7c/0xa5 [ 886.489542] [] vfs_write+0xa0/0xe4 [ 886.489542] [] SyS_write+0x50/0x7e [ 886.489542] [] entry_SYSCALL_64_fastpath+0x12/0x6f [ 1081.852335] INFO: task fio:8244 blocked for more than 120 seconds. [ 1081.854348] Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [ 1081.857560] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1081.863227] fio D ffff880213f9bb28 0 8244 8240 0x00000000 [ 1081.868719] ffff880213f9bb28 00ffffff810fc6b0 ffffffff0000000a ffff88023ed55240 [ 1081.872499] ffff880206b5d400 ffff880213f9c000 ffff88020a4d5318 ffff880206b5d400 [ 1081.876834] ffffffff00000001 ffff880206b5d400 ffff880213f9bb40 ffffffff81482ba4 [ 1081.880782] Call Trace: [ 1081.881793] [] schedule+0x7f/0x97 [ 1081.883340] [] rwsem_down_write_failed+0x2d5/0x325 [ 1081.895525] [] ? trace_hardirqs_on_caller+0x16/0x1ab [ 1081.897419] [] call_rwsem_down_write_failed+0x13/0x20 [ 1081.899251] [] ? call_rwsem_down_write_failed+0x13/0x20 [ 1081.901063] [] ? __down_write_nested.isra.0+0x1f/0x21 [ 1081.902365] [] down_write+0x43/0x57 [ 1081.903846] [] ? btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1081.906078] [] btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1081.908846] [] ? mark_held_locks+0x56/0x6c [ 1081.910409] [] btrfs_check_data_free_space+0x40/0x59 [btrfs] [ 1081.912482] [] btrfs_delalloc_reserve_space+0x1e/0x4e [btrfs] [ 1081.914597] [] btrfs_direct_IO+0x10c/0x27e [btrfs] [ 1081.919037] [] generic_file_direct_write+0xb3/0x128 [ 1081.920754] [] btrfs_file_write_iter+0x229/0x408 [btrfs] [ 1081.922496] [] ? __lock_is_held+0x38/0x50 [ 1081.923922] [] __vfs_write+0x7c/0xa5 [ 1081.925275] [] vfs_write+0xa0/0xe4 [ 1081.926584] [] SyS_write+0x50/0x7e [ 1081.927968] [] entry_SYSCALL_64_fastpath+0x12/0x6f [ 1081.985293] INFO: lockdep is turned off. [ 1081.986132] INFO: task fio:8249 blocked for more than 120 seconds. [ 1081.987434] Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [ 1081.988534] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1081.990147] fio D ffff880218febbb8 0 8249 8240 0x00000000 [ 1081.991626] ffff880218febbb8 00ffffff81486b8e ffff88020000000b ffff88023ed75240 [ 1081.993258] ffff8802120a9a00 ffff880218fec000 ffff88020a4d5318 ffff8802120a9a00 [ 1081.994850] ffffffff00000001 ffff8802120a9a00 ffff880218febbd0 ffffffff81482ba4 [ 1081.996485] Call Trace: [ 1081.997037] [] schedule+0x7f/0x97 [ 1081.998017] [] rwsem_down_write_failed+0x2d5/0x325 [ 1081.999241] [] ? finish_wait+0x6d/0x76 [ 1082.000306] [] call_rwsem_down_write_failed+0x13/0x20 [ 1082.001533] [] ? call_rwsem_down_write_failed+0x13/0x20 [ 1082.002776] [] ? __down_write_nested.isra.0+0x1f/0x21 [ 1082.003995] [] down_write+0x43/0x57 [ 1082.005000] [] ? btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1082.007403] [] btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1082.008988] [] btrfs_fallocate+0x7c1/0xc2f [btrfs] [ 1082.010193] [] ? percpu_down_read+0x4e/0x77 [ 1082.011280] [] ? __sb_start_write+0x5f/0xb0 [ 1082.012265] [] ? __sb_start_write+0x5f/0xb0 [ 1082.013021] [] vfs_fallocate+0x170/0x1ff [ 1082.013738] [] ioctl_preallocate+0x89/0x9b [ 1082.014778] [] do_vfs_ioctl+0x40a/0x4ea [ 1082.015778] [] ? SYSC_newfstat+0x25/0x2e [ 1082.016806] [] ? __fget_light+0x4d/0x71 [ 1082.017789] [] SyS_ioctl+0x57/0x79 [ 1082.018706] [] entry_SYSCALL_64_fastpath+0x12/0x6f This happens because we can recursively acquire the semaphore fs_info->delayed_iput_sem when attempting to allocate space to satisfy a file write request as shown in the first trace above - when committing a transaction we acquire (down_read) the semaphore before running the delayed iputs, and when running a delayed iput() we can end up calling an inode's eviction handler, which in turn commits another transaction and attempts to acquire (down_read) again the semaphore to run more delayed iput operations. This results in a deadlock because if a task acquires multiple times a semaphore it should invoke down_read_nested() with a different lockdep class for each level of recursion. Fix this by simplifying the implementation and use a mutex instead that is acquired by the cleaner kthread before it runs the delayed iputs instead of always acquiring a semaphore before delayed references are run from anywhere. Fixes: d7c151717a1e (btrfs: Fix NO_SPACE bug caused by delayed-iput) Cc: stable@vger.kernel.org # 4.1+ Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Sasha Levin --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/extent-tree.c | 9 +++++---- fs/btrfs/inode.c | 2 -- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f364e1d8d3d1..699944a074919 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1544,7 +1544,7 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; - struct rw_semaphore delayed_iput_sem; + struct mutex cleaner_delayed_iput_mutex; /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2ef9a4b72d06e..99e8f60c79620 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1772,8 +1772,11 @@ static int cleaner_kthread(void *arg) goto sleep; } + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); btrfs_run_delayed_iputs(root); btrfs_delete_unused_bgs(root->fs_info); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); + again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -2491,8 +2494,8 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); + mutex_init(&fs_info->cleaner_delayed_iput_mutex); seqlock_init(&fs_info->profiles_lock); - init_rwsem(&fs_info->delayed_iput_sem); init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0ec3acd14cbf5..3c1938000a5dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3985,11 +3985,12 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes) if (ret) return ret; /* - * make sure that all running delayed iput are - * done + * The cleaner kthread might still be doing iput + * operations. Wait for it to finish so that + * more space is released. */ - down_write(&root->fs_info->delayed_iput_sem); - up_write(&root->fs_info->delayed_iput_sem); + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); goto again; } else { btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7b898f5be77d5..df4e0462976e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3102,7 +3102,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; - down_read(&fs_info->delayed_iput_sem); spin_lock(&fs_info->delayed_iput_lock); while (!list_empty(&fs_info->delayed_iputs)) { struct btrfs_inode *inode; @@ -3121,7 +3120,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) spin_lock(&fs_info->delayed_iput_lock); } spin_unlock(&fs_info->delayed_iput_lock); - up_read(&root->fs_info->delayed_iput_sem); } /* From 5446a444ae9e1976a77cb8faeab8198e44168daa Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 19 Jan 2016 16:15:27 -0800 Subject: [PATCH 1767/2091] iscsi-target: Fix potential dead-lock during node acl delete [ Upstream commit 26a99c19f810b2593410899a5b304b21b47428a6 ] This patch is a iscsi-target specific bug-fix for a dead-lock that can occur during explicit struct se_node_acl->acl_group se_session deletion via configfs rmdir(2), when iscsi-target time2retain timer is still active. It changes iscsi-target to obtain se_portal_group->session_lock internally using spin_in_locked() to check for the specific se_node_acl configfs shutdown rmdir(2) case. Note this patch is intended for stable, and the subsequent v4.5-rc patch converts target_core_tpg.c to use proper se_sess->sess_kref reference counting for both se_node_acl deletion + se_node_acl->queue_depth se_session restart. Reported-by:: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target_configfs.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 6f2fb546477e8..5a8add721741a 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1907,7 +1907,8 @@ static void lio_tpg_release_fabric_acl( } /* - * Called with spin_lock_bh(struct se_portal_group->session_lock) held.. + * Called with spin_lock_irq(struct se_portal_group->session_lock) held + * or not held. * * Also, this function calls iscsit_inc_session_usage_count() on the * struct iscsi_session in question. @@ -1915,19 +1916,32 @@ static void lio_tpg_release_fabric_acl( static int lio_tpg_shutdown_session(struct se_session *se_sess) { struct iscsi_session *sess = se_sess->fabric_sess_ptr; + struct se_portal_group *se_tpg = se_sess->se_tpg; + bool local_lock = false; + + if (!spin_is_locked(&se_tpg->session_lock)) { + spin_lock_irq(&se_tpg->session_lock); + local_lock = true; + } spin_lock(&sess->conn_lock); if (atomic_read(&sess->session_fall_back_to_erl0) || atomic_read(&sess->session_logout) || (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); + if (local_lock) + spin_unlock_irq(&sess->conn_lock); return 0; } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); iscsit_stop_time2retain_timer(sess); + spin_unlock_irq(&se_tpg->session_lock); + iscsit_stop_session(sess, 1, 1); + if (!local_lock) + spin_lock_irq(&se_tpg->session_lock); return 1; } From a9c56fd066f9b5d222e0813b732908dabd721867 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Jan 2016 21:23:57 +0800 Subject: [PATCH 1768/2091] crypto: algif_skcipher - sendmsg SG marking is off by one [ Upstream commit 202736d99b7f29279db9da61587f11a08a04a9c6 ] We mark the end of the SG list in sendmsg and sendpage and unmark it on the next send call. Unfortunately the unmarking in sendmsg is off-by-one, leading to an SG list that is too short. Fixes: 0f477b655a52 ("crypto: algif - Mark sgl end at the end of data") Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_skcipher.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 945075292bc95..5bc42f9b23f0d 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -387,7 +387,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); sg = sgl->sg; - sg_unmark_end(sg + sgl->cur); + if (sgl->cur) + sg_unmark_end(sg + sgl->cur - 1); do { i = sgl->cur; plen = min_t(int, len, PAGE_SIZE); From 3730198645d95e2ac438ea9276402d3908b9449d Mon Sep 17 00:00:00 2001 From: Junil Lee Date: Wed, 20 Jan 2016 14:58:18 -0800 Subject: [PATCH 1769/2091] zsmalloc: fix migrate_zspage-zs_free race condition [ Upstream commit c102f07ca0b04f2cb49cfc161c83f6239d17f491 ] record_obj() in migrate_zspage() does not preserve handle's HANDLE_PIN_BIT, set by find_aloced_obj()->trypin_tag(), and implicitly (accidentally) un-pins the handle, while migrate_zspage() still performs an explicit unpin_tag() on the that handle. This additional explicit unpin_tag() introduces a race condition with zs_free(), which can pin that handle by this time, so the handle becomes un-pinned. Schematically, it goes like this: CPU0 CPU1 migrate_zspage find_alloced_obj trypin_tag set HANDLE_PIN_BIT zs_free() pin_tag() obj_malloc() -- new object, no tag record_obj() -- remove HANDLE_PIN_BIT set HANDLE_PIN_BIT unpin_tag() -- remove zs_free's HANDLE_PIN_BIT The race condition may result in a NULL pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 00000000 CPU: 0 PID: 19001 Comm: CookieMonsterCl Tainted: PC is at get_zspage_mapping+0x0/0x24 LR is at obj_free.isra.22+0x64/0x128 Call trace: get_zspage_mapping+0x0/0x24 zs_free+0x88/0x114 zram_free_page+0x64/0xcc zram_slot_free_notify+0x90/0x108 swap_entry_free+0x278/0x294 free_swap_and_cache+0x38/0x11c unmap_single_vma+0x480/0x5c8 unmap_vmas+0x44/0x60 exit_mmap+0x50/0x110 mmput+0x58/0xe0 do_exit+0x320/0x8dc do_group_exit+0x44/0xa8 get_signal+0x538/0x580 do_signal+0x98/0x4b8 do_notify_resume+0x14/0x5c This patch keeps the lock bit in migration path and update value atomically. Signed-off-by: Junil Lee Signed-off-by: Minchan Kim Acked-by: Vlastimil Babka Cc: Sergey Senozhatsky Cc: [4.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/zsmalloc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a8b5e749e84e7..fb1ec10ce449d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -306,7 +306,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle) static void record_obj(unsigned long handle, unsigned long obj) { - *(unsigned long *)handle = obj; + /* + * lsb of @obj represents handle lock while other bits + * represent object value the handle is pointing so + * updating shouldn't do store tearing. + */ + WRITE_ONCE(*(unsigned long *)handle, obj); } /* zpool driver */ @@ -1641,6 +1646,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, free_obj = obj_malloc(d_page, class, handle); zs_object_copy(used_obj, free_obj, class); index++; + /* + * record_obj updates handle's value to free_obj and it will + * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which + * breaks synchronization using pin_tag(e,g, zs_free) so + * let's keep the lock bit. + */ + free_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, free_obj); unpin_tag(handle); obj_free(pool, class, used_obj); From 4aba5827e764110bd1462eecf79209e656090f9c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 17 Sep 2015 16:01:51 -0700 Subject: [PATCH 1770/2091] lib/string_helpers.c: fix infinite loop in string_get_size() [ Upstream commit 62bef58a55dfa8ada2a22b2496c6340468ecd98a ] Some string_get_size() calls (e.g.: string_get_size(1, 512, STRING_UNITS_10, ..., ...) string_get_size(15, 64, STRING_UNITS_10, ..., ...) ) result in an infinite loop. The problem is that if size is equal to divisor[units]/blk_size and is smaller than divisor[units] we'll end up with size == 0 when we start doing sf_cap calculations: For string_get_size(1, 512, STRING_UNITS_10, ..., ...) case: ... remainder = do_div(size, divisor[units]); -> size is 0, remainder is 1 remainder *= blk_size; -> remainder is 512 ... size *= blk_size; -> size is still 0 size += remainder / divisor[units]; -> size is still 0 The caller causing the issue is sd_read_capacity(), the problem was noticed on Hyper-V, such weird size was reported by host when scanning collides with device removal. This is probably a separate issue worth fixing, this patch is intended to prevent the library routine from infinite looping. Signed-off-by: Vitaly Kuznetsov Acked-by: James Bottomley Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: "K. Y. Srinivasan" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/string_helpers.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/string_helpers.c b/lib/string_helpers.c index c98ae818eb4ee..bbf1bef7d7ee4 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -59,7 +59,11 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, } exp = divisor[units] / (u32)blk_size; - if (size >= exp) { + /* + * size must be strictly greater than exp here to ensure that remainder + * is greater than divisor[units] coming out of the if below. + */ + if (size > exp) { remainder = do_div(size, divisor[units]); remainder *= blk_size; i++; From c81f4f0331deb1b482d93b3ac8da5f04ff20ad48 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 20 Jan 2016 14:58:29 -0800 Subject: [PATCH 1771/2091] string_helpers: fix precision loss for some inputs [ Upstream commit 564b026fbd0d28e9f70fb3831293d2922bb7855b ] It was noticed that we lose precision in the final calculation for some inputs. The most egregious example is size=3000 blk_size=1900 in units of 10 should yield 5.70 MB but in fact yields 3.00 MB (oops). This is because the current algorithm doesn't correctly account for all the remainders in the logarithms. Fix this by doing a correct calculation in the remainders based on napier's algorithm. Additionally, now we have the correct result, we have to account for arithmetic rounding because we're printing 3 digits of precision. This means that if the fourth digit is five or greater, we have to round up, so add a section to ensure correct rounding. Finally account for all possible inputs correctly, including zero for block size. Fixes: b9f28d863594c429e1df35a0474d2663ca28b307 Signed-off-by: James Bottomley Reported-by: Vitaly Kuznetsov Cc: [delay until after 4.4 release] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/string_helpers.c | 63 ++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/lib/string_helpers.c b/lib/string_helpers.c index bbf1bef7d7ee4..33e79b5eea779 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -43,50 +43,73 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, [STRING_UNITS_10] = 1000, [STRING_UNITS_2] = 1024, }; - int i, j; - u32 remainder = 0, sf_cap, exp; + static const unsigned int rounding[] = { 500, 50, 5 }; + int i = 0, j; + u32 remainder = 0, sf_cap; char tmp[8]; const char *unit; tmp[0] = '\0'; - i = 0; - if (!size) + + if (blk_size == 0) + size = 0; + if (size == 0) goto out; - while (blk_size >= divisor[units]) { - remainder = do_div(blk_size, divisor[units]); + /* This is Napier's algorithm. Reduce the original block size to + * + * coefficient * divisor[units]^i + * + * we do the reduction so both coefficients are just under 32 bits so + * that multiplying them together won't overflow 64 bits and we keep + * as much precision as possible in the numbers. + * + * Note: it's safe to throw away the remainders here because all the + * precision is in the coefficients. + */ + while (blk_size >> 32) { + do_div(blk_size, divisor[units]); i++; } - exp = divisor[units] / (u32)blk_size; - /* - * size must be strictly greater than exp here to ensure that remainder - * is greater than divisor[units] coming out of the if below. - */ - if (size > exp) { - remainder = do_div(size, divisor[units]); - remainder *= blk_size; + while (size >> 32) { + do_div(size, divisor[units]); i++; - } else { - remainder *= size; } + /* now perform the actual multiplication keeping i as the sum of the + * two logarithms */ size *= blk_size; - size += remainder / divisor[units]; - remainder %= divisor[units]; + /* and logarithmically reduce it until it's just under the divisor */ while (size >= divisor[units]) { remainder = do_div(size, divisor[units]); i++; } + /* work out in j how many digits of precision we need from the + * remainder */ sf_cap = size; for (j = 0; sf_cap*10 < 1000; j++) sf_cap *= 10; - if (j) { + if (units == STRING_UNITS_2) { + /* express the remainder as a decimal. It's currently the + * numerator of a fraction whose denominator is + * divisor[units], which is 1 << 10 for STRING_UNITS_2 */ remainder *= 1000; - remainder /= divisor[units]; + remainder >>= 10; + } + + /* add a 5 to the digit below what will be printed to ensure + * an arithmetical round up and carry it through to size */ + remainder += rounding[j]; + if (remainder >= 1000) { + remainder -= 1000; + size += 1; + } + + if (j) { snprintf(tmp, sizeof(tmp), ".%03u", remainder); tmp[j+1] = '\0'; } From 92d4a19886bcce4cca4c9f993a7324ac607d142c Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 20 Jan 2016 15:01:02 -0800 Subject: [PATCH 1772/2091] prctl: take mmap sem for writing to protect against others [ Upstream commit ddf1d398e517e660207e2c807f76a90df543a217 ] An unprivileged user can trigger an oops on a kernel with CONFIG_CHECKPOINT_RESTORE. proc_pid_cmdline_read takes mmap_sem for reading and obtains args + env start/end values. These get sanity checked as follows: BUG_ON(arg_start > arg_end); BUG_ON(env_start > env_end); These can be changed by prctl_set_mm. Turns out also takes the semaphore for reading, effectively rendering it useless. This results in: kernel BUG at fs/proc/base.c:240! invalid opcode: 0000 [#1] SMP Modules linked in: virtio_net CPU: 0 PID: 925 Comm: a.out Not tainted 4.4.0-rc8-next-20160105dupa+ #71 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff880077a68000 ti: ffff8800784d0000 task.ti: ffff8800784d0000 RIP: proc_pid_cmdline_read+0x520/0x530 RSP: 0018:ffff8800784d3db8 EFLAGS: 00010206 RAX: ffff880077c5b6b0 RBX: ffff8800784d3f18 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 00007f78e8857000 RDI: 0000000000000246 RBP: ffff8800784d3e40 R08: 0000000000000008 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000050 R13: 00007f78e8857800 R14: ffff88006fcef000 R15: ffff880077c5b600 FS: 00007f78e884a740(0000) GS:ffff88007b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f78e8361770 CR3: 00000000790a5000 CR4: 00000000000006f0 Call Trace: __vfs_read+0x37/0x100 vfs_read+0x82/0x130 SyS_read+0x58/0xd0 entry_SYSCALL_64_fastpath+0x12/0x76 Code: 4c 8b 7d a8 eb e9 48 8b 9d 78 ff ff ff 4c 8b 7d 90 48 8b 03 48 39 45 a8 0f 87 f0 fe ff ff e9 d1 fe ff ff 4c 8b 7d 90 eb c6 0f 0b <0f> 0b 0f 0b 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 RIP proc_pid_cmdline_read+0x520/0x530 ---[ end trace 97882617ae9c6818 ]--- Turns out there are instances where the code just reads aformentioned values without locking whatsoever - namely environ_read and get_cmdline. Interestingly these functions look quite resilient against bogus values, but I don't believe this should be relied upon. The first patch gets rid of the oops bug by grabbing mmap_sem for writing. The second patch is optional and puts locking around aformentioned consumers for safety. Consumers of other fields don't seem to benefit from similar treatment and are left untouched. This patch (of 2): The code was taking the semaphore for reading, which does not protect against readers nor concurrent modifications. The problem could cause a sanity checks to fail in procfs's cmdline reader, resulting in an OOPS. Note that some functions perform an unlocked read of various mm fields, but they seem to be fine despite possible modificaton. Signed-off-by: Mateusz Guzik Acked-by: Cyrill Gorcunov Cc: Alexey Dobriyan Cc: Jarod Wilson Cc: Jan Stancek Cc: Al Viro Cc: Anshuman Khandual Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sys.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index a4e372b798a5f..25ae8d2e65e2d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1854,11 +1854,13 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; } - if (prctl_map.exe_fd != (u32)-1) + if (prctl_map.exe_fd != (u32)-1) { error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); - down_read(&mm->mmap_sem); - if (error) - goto out; + if (error) + return error; + } + + down_write(&mm->mmap_sem); /* * We don't validate if these members are pointing to @@ -1895,10 +1897,8 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data if (prctl_map.auxv_size) memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); - error = 0; -out: - up_read(&mm->mmap_sem); - return error; + up_write(&mm->mmap_sem); + return 0; } #endif /* CONFIG_CHECKPOINT_RESTORE */ @@ -1930,7 +1930,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EINVAL; - down_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); vma = find_vma(mm, addr); switch (opt) { @@ -2033,7 +2033,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = 0; out: - up_read(&mm->mmap_sem); + up_write(&mm->mmap_sem); return error; } From 65a7633357c158ed164878d97e8a7fc09fb75946 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 28 Dec 2015 13:18:34 +0300 Subject: [PATCH 1773/2091] libceph: fix ceph_msg_revoke() [ Upstream commit 67645d7619738e51c668ca69f097cb90b5470422 ] There are a number of problems with revoking a "was sending" message: (1) We never make any attempt to revoke data - only kvecs contibute to con->out_skip. However, once the header (envelope) is written to the socket, our peer learns data_len and sets itself to expect at least data_len bytes to follow front or front+middle. If ceph_msg_revoke() is called while the messenger is sending message's data portion, anything we send after that call is counted by the OSD towards the now revoked message's data portion. The effects vary, the most common one is the eventual hang - higher layers get stuck waiting for the reply to the message that was sent out after ceph_msg_revoke() returned and treated by the OSD as a bunch of data bytes. This is what Matt ran into. (2) Flat out zeroing con->out_kvec_bytes worth of bytes to handle kvecs is wrong. If ceph_msg_revoke() is called before the tag is sent out or while the messenger is sending the header, we will get a connection reset, either due to a bad tag (0 is not a valid tag) or a bad header CRC, which kind of defeats the purpose of revoke. Currently the kernel client refuses to work with header CRCs disabled, but that will likely change in the future, making this even worse. (3) con->out_skip is not reset on connection reset, leading to one or more spurious connection resets if we happen to get a real one between con->out_skip is set in ceph_msg_revoke() and before it's cleared in write_partial_skip(). Fixing (1) and (3) is trivial. The idea behind fixing (2) is to never zero the tag or the header, i.e. send out tag+header regardless of when ceph_msg_revoke() is called. That way the header is always correct, no unnecessary resets are induced and revoke stands ready for disabled CRCs. Since ceph_msg_revoke() rips out con->out_msg, introduce a new "message out temp" and copy the header into it before sending. Cc: stable@vger.kernel.org # 4.0+ Reported-by: Matt Conner Signed-off-by: Ilya Dryomov Tested-by: Matt Conner Reviewed-by: Sage Weil Signed-off-by: Sasha Levin --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger.c | 76 ++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdcc..e91c6f15f6e81 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -224,6 +224,7 @@ struct ceph_connection { struct ceph_entity_addr actual_peer_addr; /* message out temps */ + struct ceph_msg_header out_hdr; struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ bool out_msg_done; @@ -233,7 +234,6 @@ struct ceph_connection { int out_kvec_left; /* kvec's left in out_kvec */ int out_skip; /* skip this many bytes */ int out_kvec_bytes; /* total bytes left */ - bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f0436..e51af69c61bfe 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -675,6 +675,8 @@ static void reset_connection(struct ceph_connection *con) } con->in_seq = 0; con->in_seq_acked = 0; + + con->out_skip = 0; } /* @@ -774,6 +776,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) static void con_out_kvec_reset(struct ceph_connection *con) { + BUG_ON(con->out_skip); + con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; @@ -782,9 +786,9 @@ static void con_out_kvec_reset(struct ceph_connection *con) static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { - int index; + int index = con->out_kvec_left; - index = con->out_kvec_left; + BUG_ON(con->out_skip); BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); con->out_kvec[index].iov_len = size; @@ -793,6 +797,27 @@ static void con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +/* + * Chop off a kvec from the end. Return residual number of bytes for + * that kvec, i.e. how many bytes would have been written if the kvec + * hadn't been nuked. + */ +static int con_out_kvec_skip(struct ceph_connection *con) +{ + int off = con->out_kvec_cur - con->out_kvec; + int skip = 0; + + if (con->out_kvec_bytes > 0) { + skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; + BUG_ON(con->out_kvec_bytes < skip); + BUG_ON(!con->out_kvec_left); + con->out_kvec_bytes -= skip; + con->out_kvec_left--; + } + + return skip; +} + #ifdef CONFIG_BLOCK /* @@ -1200,7 +1225,6 @@ static void prepare_write_message_footer(struct ceph_connection *con) m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); - con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) @@ -1228,7 +1252,6 @@ static void prepare_write_message(struct ceph_connection *con) u32 crc; con_out_kvec_reset(con); - con->out_kvec_is_msg = true; con->out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same @@ -1268,18 +1291,19 @@ static void prepare_write_message(struct ceph_connection *con) /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); - /* fill in crc (except data pages), footer */ + /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = 0; + memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); + /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { @@ -1291,6 +1315,7 @@ static void prepare_write_message(struct ceph_connection *con) dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); + con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; @@ -1485,7 +1510,6 @@ static int write_partial_kvec(struct ceph_connection *con) } } con->out_kvec_left = 0; - con->out_kvec_is_msg = false; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, @@ -1577,6 +1601,7 @@ static int write_partial_skip(struct ceph_connection *con) { int ret; + dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); @@ -2493,13 +2518,13 @@ static int try_write(struct ceph_connection *con) more_kvec: /* kvec data queued? */ - if (con->out_skip) { - ret = write_partial_skip(con); + if (con->out_kvec_left) { + ret = write_partial_kvec(con); if (ret <= 0) goto out; } - if (con->out_kvec_left) { - ret = write_partial_kvec(con); + if (con->out_skip) { + ret = write_partial_skip(con); if (ret <= 0) goto out; } @@ -3026,16 +3051,31 @@ void ceph_msg_revoke(struct ceph_msg *msg) ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("%s %p msg %p - was sending\n", __func__, con, msg); - con->out_msg = NULL; - if (con->out_kvec_is_msg) { - con->out_skip = con->out_kvec_bytes; - con->out_kvec_is_msg = false; + BUG_ON(con->out_skip); + /* footer */ + if (con->out_msg_done) { + con->out_skip += con_out_kvec_skip(con); + } else { + BUG_ON(!msg->data_length); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) + con->out_skip += sizeof(msg->footer); + else + con->out_skip += sizeof(msg->old_footer); } + /* data, middle, front */ + if (msg->data_length) + con->out_skip += msg->cursor.total_resid; + if (msg->middle) + con->out_skip += con_out_kvec_skip(con); + con->out_skip += con_out_kvec_skip(con); + + dout("%s %p msg %p - was sending, will write %d skip %d\n", + __func__, con, msg, con->out_kvec_bytes, con->out_skip); msg->hdr.seq = 0; - + con->out_msg = NULL; ceph_msg_put(msg); } + mutex_unlock(&con->mutex); } From d4dc115d9b67b7ec9769efd9b55d5bfce8d9ef20 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 21 Jan 2016 16:40:27 -0800 Subject: [PATCH 1774/2091] mm: fix mlock accouting [ Upstream commit 7162a1e87b3e380133dadc7909081bb70d0a7041 ] Tetsuo Handa reported underflow of NR_MLOCK on munlock. Testcase: #include #include #include #define BASE ((void *)0x400000000000) #define SIZE (1UL << 21) int main(int argc, char *argv[]) { void *addr; system("grep Mlocked /proc/meminfo"); addr = mmap(BASE, SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED | MAP_FIXED, -1, 0); if (addr == MAP_FAILED) printf("mmap() failed\n"), exit(1); munmap(addr, SIZE); system("grep Mlocked /proc/meminfo"); return 0; } It happens on munlock_vma_page() due to unfortunate choice of nr_pages data type: __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); For unsigned int nr_pages, implicitly casted to long in __mod_zone_page_state(), it becomes something around UINT_MAX. munlock_vma_page() usually called for THP as small pages go though pagevec. Let's make nr_pages signed int. Similar fixes in 6cdb18ad98a4 ("mm/vmstat: fix overflow in mod_zone_page_state()") used `long' type, but `int' here is OK for a count of the number of sub-pages in a huge page. Fixes: ff6a6da60b89 ("mm: accelerate munlock() treatment of THP pages") Signed-off-by: Kirill A. Shutemov Reported-by: Tetsuo Handa Tested-by: Tetsuo Handa Cc: Michel Lespinasse Acked-by: Michal Hocko Cc: [4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/mlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mlock.c b/mm/mlock.c index 6fd2cf15e8687..3d3ee6cad7767 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -172,7 +172,7 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int nr_pages; + int nr_pages; struct zone *zone = page_zone(page); /* For try_to_munlock() and to serialize with page migration */ From b085ab71a7e748a9edb77a36ceadbab514ce0947 Mon Sep 17 00:00:00 2001 From: Tariq Saeed Date: Thu, 21 Jan 2016 16:40:39 -0800 Subject: [PATCH 1775/2091] ocfs2: NFS hangs in __ocfs2_cluster_lock due to race with ocfs2_unblock_lock [ Upstream commit b1b1e15ef6b80facf76d6757649dfd7295eda29f ] NFS on a 2 node ocfs2 cluster each node exporting dir. The lock causing the hang is the global bit map inode lock. Node 1 is master, has the lock granted in PR mode; Node 2 is in the converting list (PR -> EX). There are no holders of the lock on the master node so it should downconvert to NL and grant EX to node 2 but that does not happen. BLOCKED + QUEUED in lock res are set and it is on osb blocked list. Threads are waiting in __ocfs2_cluster_lock on BLOCKED. One thread wants EX, rest want PR. So it is as though the downconvert thread needs to be kicked to complete the conv. The hang is caused by an EX req coming into __ocfs2_cluster_lock on the heels of a PR req after it sets BUSY (drops l_lock, releasing EX thread), forcing the incoming EX to wait on BUSY without doing anything. PR has called ocfs2_dlm_lock, which sets the node 1 lock from NL -> PR, queues ast. At this time, upconvert (PR ->EX) arrives from node 2, finds conflict with node 1 lock in PR, so the lock res is put on dlm thread's dirty listt. After ret from ocf2_dlm_lock, PR thread now waits behind EX on BUSY till awoken by ast. Now it is dlm_thread that serially runs dlm_shuffle_lists, ast, bast, in that order. dlm_shuffle_lists ques a bast on behalf of node 2 (which will be run by dlm_thread right after the ast). ast does its part, sets UPCONVERT_FINISHING, clears BUSY and wakes its waiters. Next, dlm_thread runs bast. It sets BLOCKED and kicks dc thread. dc thread runs ocfs2_unblock_lock, but since UPCONVERT_FINISHING set, skips doing anything and reques. Inside of __ocfs2_cluster_lock, since EX has been waiting on BUSY ahead of PR, it wakes up first, finds BLOCKED set and skips doing anything but clearing UPCONVERT_FINISHING (which was actually "meant" for the PR thread), and this time waits on BLOCKED. Next, the PR thread comes out of wait but since UPCONVERT_FINISHING is not set, it skips updating the l_ro_holders and goes straight to wait on BLOCKED. So there, we have a hang! Threads in __ocfs2_cluster_lock wait on BLOCKED, lock res in osb blocked list. Only when dc thread is awoken, it will run ocfs2_unblock_lock and things will unhang. One way to fix this is to wake the dc thread on the flag after clearing UPCONVERT_FINISHING Orabug: 20933419 Signed-off-by: Tariq Saeed Signed-off-by: Santosh Shilimkar Reviewed-by: Wengang Wang Reviewed-by: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Reviewed-by: Joseph Qi Cc: Eric Ren Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/dlmglue.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 23157e40dd740..3623ab6fa97f2 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, unsigned int gen; int noqueue_attempted = 0; int dlm_locked = 0; + int kick_dc = 0; if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { mlog_errno(-EINVAL); @@ -1524,7 +1525,12 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, unlock: lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ + kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); + spin_unlock_irqrestore(&lockres->l_lock, flags); + if (kick_dc) + ocfs2_wake_downconvert_thread(osb); out: /* * This is helping work around a lock inversion between the page lock From 9295dee2df7af7ea4233b664a8d4d4d057748e2f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2016 15:39:40 -0500 Subject: [PATCH 1776/2091] pNFS/flexfiles: Fix an XDR encoding bug in layoutreturn [ Upstream commit 082fa37d1351a41afc491d44a1d095cb8d919aa2 ] We must not skip encoding the statistics, or the server will see an XDR encoding error. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index fecd9201dbadf..c2abdc7db6c33 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1484,11 +1484,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - if (ff_layout_encode_ioerr(flo, xdr, args)) - goto out; - + ff_layout_encode_ioerr(flo, xdr, args); ff_layout_encode_iostats(flo, xdr, args); -out: + *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } From 085c2863e0f3e3107ff50eb79d226462b7f4fd78 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Nov 2015 17:09:05 +0100 Subject: [PATCH 1777/2091] ideapad-laptop: Add Lenovo Yoga 900 to no_hw_rfkill dmi list [ Upstream commit f71c882dd4cfe4aa88ea07b1402ddd43605d4aef ] Like some of the other Yoga models the Lenovo Yoga 900 does not have a hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always reported blocking breaking wifi. This commit adds the Lenovo Yoga 900 to the no_hw_rfkill dmi list, fixing the wifi breakage. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1275490 Cc: stable@vger.kernel.org Reported-and-tested-by: Kevin Fenzi Signed-off-by: Hans de Goede Signed-off-by: Darren Hart Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 32b58b4ea2e6e..b69fd9415418d 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -872,6 +872,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3 Pro-1370"), }, }, + { + .ident = "Lenovo Yoga 900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 900"), + }, + }, {} }; From f75f1b25fb42f28b59d6e050cbdd4440797ac9ad Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Sun, 24 Jan 2016 10:46:42 -0500 Subject: [PATCH 1778/2091] ideapad-laptop: Add Lenovo Yoga 700 to no_hw_rfkill dmi list [ Upstream commit 6b31de3e698582fe0b8f7f4bab15831b73204800 ] Like the Yoga 900 models the Lenovo Yoga 700 does not have a hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always reported blocking breaking wifi. This commit adds the Lenovo Yoga 700 to the no_hw_rfkill dmi list, fixing the wifi breakage. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1295272 Tested-by: Cc: stable@vger.kernel.org Signed-off-by: Josh Boyer Signed-off-by: Darren Hart Signed-off-by: Sasha Levin --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index b69fd9415418d..9a92d13e39178 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -872,6 +872,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3 Pro-1370"), }, }, + { + .ident = "Lenovo Yoga 700", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"), + }, + }, { .ident = "Lenovo Yoga 900", .matches = { From 6cdc037a9b649209051e3c982d5fcaf31274cb55 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 30 Dec 2015 12:59:08 +0800 Subject: [PATCH 1779/2091] usb: cdc-acm: handle unlinked urb in acm read callback [ Upstream commit 19454462acb1bdef80542061bdc9b410e4ed1ff6 ] In current acm driver, the bulk-in callback function ignores the URBs unlinked in usb core. This causes unexpected data loss in some cases. For example, runtime suspend entry will unlinked all urbs and set urb->status to -ENOENT even those urbs might have data not processed yet. Hence, data loss occurs. This patch lets bulk-in callback function handle unlinked urbs to avoid data loss. Signed-off-by: Tang Jian Qiang Signed-off-by: Lu Baolu Cc: stable@vger.kernel.org Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/class/cdc-acm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0fe15aec7ed07..2bf51b27959b5 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -432,7 +432,8 @@ static void acm_read_bulk_callback(struct urb *urb) set_bit(rb->index, &acm->read_urbs_free); dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n", __func__, status); - return; + if ((status != -ENOENT) || (urb->actual_length == 0)) + return; } usb_mark_last_busy(acm->dev); From a7b1bf8b60e4fd4deb414cabc9f218eff244d4f9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 6 Jan 2016 15:10:04 +0800 Subject: [PATCH 1780/2091] usb: cdc-acm: send zero packet for intel 7260 modem [ Upstream commit ffdb1e369a73b380fce95b05f8498d92c43842b4 ] For Intel 7260 modem, it is needed for host side to send zero packet if the BULK OUT size is equal to USB endpoint max packet length. Otherwise, modem side may still wait for more data and cannot give response to host side. Signed-off-by: Konrad Leszczynski Signed-off-by: Lu Baolu Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/class/cdc-acm.c | 6 ++++++ drivers/usb/class/cdc-acm.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 2bf51b27959b5..8bfcf208c6197 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1415,6 +1415,8 @@ static int acm_probe(struct usb_interface *intf, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), NULL, acm->writesize, acm_write_bulk, snd); snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + if (quirks & SEND_ZERO_PACKET) + snd->urb->transfer_flags |= URB_ZERO_PACKET; snd->instance = acm; } @@ -1872,6 +1874,10 @@ static const struct usb_device_id acm_ids[] = { { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_CDMA) }, + { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */ + .driver_info = SEND_ZERO_PACKET, + }, + { } }; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index b3b6c9db6fe5b..ac830e0ae38ba 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -134,3 +134,4 @@ struct acm { #define IGNORE_DEVICE BIT(5) #define QUIRK_CONTROL_LINE_STATE BIT(6) #define CLEAR_HALT_CONDITIONS BIT(7) +#define SEND_ZERO_PACKET BIT(8) From 08d717c38d856465804b1b1d3fea19b64d8214e7 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 18 Jan 2016 15:45:18 +0100 Subject: [PATCH 1781/2091] cdc-acm:exclude Samsung phone 04e8:685d [ Upstream commit e912e685f372ab62a2405a1acd923597f524e94a ] This phone needs to be handled by a specialised firmware tool and is reported to crash irrevocably if cdc-acm takes it. Signed-off-by: Oliver Neukum CC: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/class/cdc-acm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 8bfcf208c6197..df3deb000a808 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1851,6 +1851,11 @@ static const struct usb_device_id acm_ids[] = { }, #endif + /*Samsung phone in firmware update mode */ + { USB_DEVICE(0x04e8, 0x685d), + .driver_info = IGNORE_DEVICE, + }, + /* Exclude Infineon Flash Loader utility */ { USB_DEVICE(0x058b, 0x0041), .driver_info = IGNORE_DEVICE, From 522dc09ca0451d118795e8a05c7b3717f3d1ad8b Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Mon, 18 Jan 2016 21:02:42 +0800 Subject: [PATCH 1782/2091] usb: hub: do not clear BOS field during reset device [ Upstream commit d8f00cd685f5c8e0def8593e520a7fef12c22407 ] In function usb_reset_and_verify_device, the old BOS descriptor may still be used before allocating a new one. (usb_unlocked_disable_lpm function uses it under the situation that it fails to disable lpm.) So we cannot set the udev->bos to NULL before that, just keep what it was. It will be overwrite when allocating a new one. Crash log: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] usb_enable_link_state+0x2d/0x2f0 Call Trace: [] ? usb_set_lpm_timeout+0x12b/0x140 [] usb_enable_lpm+0x81/0xa0 [] usb_disable_lpm+0xa8/0xc0 [] usb_unlocked_disable_lpm+0x2c/0x50 [] usb_reset_and_verify_device+0xc3/0x710 [] ? usb_sg_wait+0x13d/0x190 [] usb_reset_device+0x133/0x280 [] usb_stor_port_reset+0x61/0x70 [] usb_stor_invoke_transport+0x88/0x520 Signed-off-by: Du, Changbin Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/hub.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ee11b301f3dad..e56ad83b35a46 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5346,7 +5346,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev) } bos = udev->bos; - udev->bos = NULL; for (i = 0; i < SET_CONFIG_TRIES; ++i) { @@ -5439,8 +5438,11 @@ static int usb_reset_and_verify_device(struct usb_device *udev) usb_set_usb2_hardware_lpm(udev, 1); usb_unlocked_enable_lpm(udev); usb_enable_ltm(udev); - usb_release_bos_descriptor(udev); - udev->bos = bos; + /* release the new BOS descriptor allocated by hub_port_init() */ + if (udev->bos != bos) { + usb_release_bos_descriptor(udev); + udev->bos = bos; + } return 0; re_enumerate: From 1c47f2c5aff5993647468e8476bb6bbc6e09a939 Mon Sep 17 00:00:00 2001 From: Peter Dedecker Date: Fri, 8 Jan 2016 12:34:41 +0100 Subject: [PATCH 1783/2091] USB: cp210x: add ID for IAI USB to RS485 adaptor [ Upstream commit f487c54ddd544e1c9172cd510954f697b77b76e3 ] Added the USB serial console device ID for IAI Corp. RCB-CV-USB USB to RS485 adaptor. Signed-off-by: Peter Dedecker Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 59b2126b21a39..1dd9919081f80 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -98,6 +98,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */ { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */ { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */ + { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */ { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */ { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */ { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */ From 5c9cad0a726131934408f3a9e66fc381204a9ba2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 Jan 2016 12:05:20 +0100 Subject: [PATCH 1784/2091] USB: visor: fix null-deref at probe [ Upstream commit cac9b50b0d75a1d50d6c056ff65c005f3224c8e0 ] Fix null-pointer dereference at probe should a (malicious) Treo device lack the expected endpoints. Specifically, the Treo port-setup hack was dereferencing the bulk-in and interrupt-in urbs without first making sure they had been allocated by core. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/visor.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 60afb39eb73c0..c53fbb3e0b8c6 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial) (serial->num_interrupt_in == 0)) return 0; + if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + /* * It appears that Treos and Kyoceras want to use the * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint, From 85491ceb50c4bc446127776714b41d2b9ca627f1 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 12 Jan 2016 15:10:50 +0100 Subject: [PATCH 1785/2091] USB: serial: visor: fix crash on detecting device without write_urbs [ Upstream commit cb3232138e37129e88240a98a1d2aba2187ff57c ] The visor driver crashes in clie_5_attach() when a specially crafted USB device without bulk-out endpoint is detected. This fix adds a check that the device has proper configuration expected by the driver. Reported-by: Ralf Spenneberg Signed-off-by: Vladis Dronov Fixes: cfb8da8f69b8 ("USB: visor: fix initialisation of UX50/TH55 devices") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/visor.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index c53fbb3e0b8c6..337a0be89fcf0 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -602,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial) */ /* some sanity check */ - if (serial->num_ports < 2) - return -1; + if (serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing bulk out endpoints\n"); + return -ENODEV; + } /* port 0 now uses the modified endpoint Address */ port = serial->port[0]; From a498ef85e10d1ad035499be0bdcef38bdc791c1a Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 12 Jan 2016 17:22:06 +0100 Subject: [PATCH 1786/2091] USB: serial: option: Adding support for Telit LE922 [ Upstream commit ff4e2494dc17b173468e1713fdf6237fd8578bc7 ] This patch adds support for two PIDs of LE922. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4021846139c93..42016408bc58c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -271,6 +271,8 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_CC864_SINGLE 0x1006 #define TELIT_PRODUCT_DE910_DUAL 0x1010 #define TELIT_PRODUCT_UE910_V2 0x1012 +#define TELIT_PRODUCT_LE922_USBCFG0 0x1042 +#define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 @@ -623,6 +625,16 @@ static const struct option_blacklist_info sierra_mc73xx_blacklist = { .reserved = BIT(8) | BIT(10) | BIT(11), }; +static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = { + .sendsetup = BIT(2), + .reserved = BIT(0) | BIT(1) | BIT(3), +}; + +static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = { + .sendsetup = BIT(0), + .reserved = BIT(1) | BIT(2) | BIT(3), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1172,6 +1184,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), From 16116a3de550c2341933f4b0c54d7e52ceee4edf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jan 2016 11:01:47 +0100 Subject: [PATCH 1787/2091] ALSA: seq: Fix incorrect sanity check at snd_seq_oss_synth_cleanup() [ Upstream commit 599151336638d57b98d92338aa59c048e3a3e97d ] ALSA sequencer OSS emulation code has a sanity check for currently opened devices, but there is a thinko there, eventually it spews warnings and skips the operation wrongly like: WARNING: CPU: 1 PID: 7573 at sound/core/seq/oss/seq_oss_synth.c:311 Fix this off-by-one error. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/oss/seq_oss_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 48e4fe1b68abb..f38cf91b4faf3 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp) struct seq_oss_synth *rec; struct seq_oss_synthinfo *info; - if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS)) + if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS)) return; for (i = 0; i < dp->max_synthdev; i++) { info = &dp->synths[i]; From 14a2303b85c63299dad236f0d2df9143d22b9242 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jan 2016 11:24:56 +0100 Subject: [PATCH 1788/2091] ALSA: seq: Degrade the error message for too many opens [ Upstream commit da10816e3d923565b470fec78a674baba794ed33 ] ALSA OSS sequencer spews a kernel error message ("ALSA: seq_oss: too many applications") when user-space tries to open more than the limit. This means that it can easily fill the log buffer. Since it's merely a normal error, it's safe to suppress it via pr_debug() instead. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/oss/seq_oss_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index 2de3feff70d06..dad5b1123e46e 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level) dp->index = i; if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) { - pr_err("ALSA: seq_oss: too many applications\n"); + pr_debug("ALSA: seq_oss: too many applications\n"); rc = -ENOMEM; goto _error; } From 5ebf917025ec0d6f62f7d8411744d16efe9745d0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Jan 2016 23:43:13 -0800 Subject: [PATCH 1789/2091] USB: serial: ftdi_sio: add support for Yaesu SCU-18 cable [ Upstream commit e03cdf22a2727c60307be6a729233edab3bfda9c ] Harald Linden reports that the ftdi_sio driver works properly for the Yaesu SCU-18 cable if the device ids are added to the driver. So let's add them. Reported-by: Harald Linden Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index a5a0376bbd48c..8c660ae401d82 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -824,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) }, + { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) }, { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) }, /* Papouch devices based on FTDI chip */ diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 2943b97b2a836..7850071c0ae18 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -615,6 +615,7 @@ */ #define RATOC_VENDOR_ID 0x0584 #define RATOC_PRODUCT_ID_USB60F 0xb020 +#define RATOC_PRODUCT_ID_SCU18 0xb03a /* * Infineon Technologies From be630626eccf643f4c2c7e276e1f25b9928ec2e4 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 13 Jan 2016 14:50:03 +0000 Subject: [PATCH 1790/2091] arm64: kernel: fix architected PMU registers unconditional access [ Upstream commit f436b2ac90a095746beb6729b8ee8ed87c9eaede ] The Performance Monitors extension is an optional feature of the AArch64 architecture, therefore, in order to access Performance Monitors registers safely, the kernel should detect the architected PMU unit presence through the ID_AA64DFR0_EL1 register PMUVer field before accessing them. This patch implements a guard by reading the ID_AA64DFR0_EL1 register PMUVer field to detect the architected PMU presence and prevent accessing PMU system registers if the Performance Monitors extension is not implemented in the core. Cc: Peter Maydell Cc: Mark Rutland Cc: Fixes: 60792ad349f3 ("arm64: kernel: enforce pmuserenr_el0 initialization and restore") Signed-off-by: Lorenzo Pieralisi Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/head.S | 5 +++++ arch/arm64/mm/proc-macros.S | 12 ++++++++++++ arch/arm64/mm/proc.S | 4 ++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 36aa31ff2c060..cc7435c9676ec 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -566,9 +566,14 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems #endif /* EL2 debug */ + mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx x0, x0, #8, #4 + cmp x0, #1 + b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to msr mdcr_el2, x0 // all PMU counters from EL1 +4: /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 4c4d93c4bf65b..d69dffffaa899 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -62,3 +62,15 @@ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH #endif .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 55b3f1400b3e7..d253908a988d5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -165,7 +165,7 @@ ENTRY(cpu_do_resume) */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 - msr pmuserenr_el0, xzr // Disable PMU access from EL0 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 mov x0, x12 dsb nsh // Make sure local tlb invalidation completed isb @@ -205,7 +205,7 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 - msr pmuserenr_el0, xzr // Disable PMU access from EL0 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: * From 3f0333c42683452b2315dec5963a5b997e213387 Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Mon, 25 Jan 2016 12:27:17 +0000 Subject: [PATCH 1791/2091] USB: option: fix Cinterion AHxx enumeration [ Upstream commit 4152b387da81617c80cb2946b2d56e3958906b3e ] In certain kernel configurations where the cdc_ether and option drivers are compiled as modules there can occur a race condition in enumeration. This causes the option driver to enumerate the ethernet(wwan) interface as usb-serial interfaces. usb-devices output for the modem: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1e2d ProdID=0055 Rev=00.00 S: Manufacturer=Cinterion S: Product=AHx C: #Ifs= 6 Cfg#= 1 Atr=e0 MxPwr=10mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether I: If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether Signed-off-by: John Ernberg Fixes: 1941138e1c02 ("USB: added support for Cinterion's products...") Cc: stable # v3.9: 8ff10bdb14a52 Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 42016408bc58c..88540596973f1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1707,7 +1707,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, - { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) }, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, From 488c5719648811d5b9bb174f2c77fe2f09dcd799 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jan 2016 13:59:21 +0100 Subject: [PATCH 1792/2091] ALSA: compress: Disable GET_CODEC_CAPS ioctl for some architectures [ Upstream commit 462b3f161beb62eeb290f4ec52f5ead29a2f8ac7 ] Some architectures like PowerPC can handle the maximum struct size in an ioctl only up to 13 bits, and struct snd_compr_codec_caps used by SNDRV_COMPRESS_GET_CODEC_CAPS ioctl overflows this limit. This problem was revealed recently by a powerpc change, as it's now treated as a fatal build error. This patch is a stop-gap for that: for architectures with less than 14 bit ioctl struct size, get rid of the handling of the relevant ioctl. We should provide an alternative equivalent ioctl code later, but for now just paper over it. Luckily, the compress API hasn't been used on such architectures, so the impact must be effectively zero. Reviewed-by: Mark Brown Acked-by: Sudip Mukherjee Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/compress_offload.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index b123c42e7dc89..b554d7f9e3be1 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -44,6 +44,13 @@ #include #include +/* struct snd_compr_codec_caps overflows the ioctl bit size for some + * architectures, so we need to disable the relevant ioctls. + */ +#if _IOC_SIZEBITS < 14 +#define COMPR_CODEC_CAPS_OVERFLOW +#endif + /* TODO: * - add substream support for multiple devices in case of * SND_DYNAMIC_MINORS is not used @@ -438,6 +445,7 @@ snd_compr_get_caps(struct snd_compr_stream *stream, unsigned long arg) return retval; } +#ifndef COMPR_CODEC_CAPS_OVERFLOW static int snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg) { @@ -461,6 +469,7 @@ snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg) kfree(caps); return retval; } +#endif /* !COMPR_CODEC_CAPS_OVERFLOW */ /* revisit this with snd_pcm_preallocate_xxx */ static int snd_compr_allocate_buffer(struct snd_compr_stream *stream, @@ -799,9 +808,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg) case _IOC_NR(SNDRV_COMPRESS_GET_CAPS): retval = snd_compr_get_caps(stream, arg); break; +#ifndef COMPR_CODEC_CAPS_OVERFLOW case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS): retval = snd_compr_get_codec_caps(stream, arg); break; +#endif case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS): retval = snd_compr_set_params(stream, arg); break; From 465cc6de211a88a8af978fd8e1d07e6a453a31c4 Mon Sep 17 00:00:00 2001 From: Guillaume Fougnies Date: Tue, 26 Jan 2016 00:28:27 +0100 Subject: [PATCH 1793/2091] ALSA: usb-audio: Fix TEAC UD-501/UD-503/NT-503 usb delay [ Upstream commit 5a4ff9ec8d6edd2ab1cfe8ce6a080d6e57cbea9a ] TEAC UD-501/UD-503/NT-503 fail to switch properly between different rate/format. Similar to 'Playback Design', this patch corrects the invalid clock source error for TEAC products and avoids complete freeze of the usb interface of 503 series. Signed-off-by: Guillaume Fougnies Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index fb9a8a5787a66..5585cfd7f63e3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1202,8 +1202,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev) * "Playback Design" products need a 50ms delay after setting the * USB interface. */ - if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba) + switch (le16_to_cpu(dev->descriptor.idVendor)) { + case 0x23ba: /* Playback Design */ + case 0x0644: /* TEAC Corp. */ mdelay(50); + break; + } } void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, @@ -1218,6 +1222,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); + /* + * "TEAC Corp." products need a 20ms delay after each + * class compliant request + */ + if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + mdelay(20); + /* Marantz/Denon devices with USB DAC functionality need a delay * after each class compliant request */ From 593bd1058b30eb4f0d6949afdfd9c2659d94ae1e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 Jan 2016 16:00:41 +0200 Subject: [PATCH 1794/2091] virtio_pci: fix use after free on release [ Upstream commit 2989be09a8a9d62a785137586ad941f916e08f83 ] KASan detected a use-after-free error in virtio-pci remove code. In virtio_pci_remove(), vp_dev is still used after being freed in unregister_virtio_device() (in virtio_pci_release_dev() more precisely). To fix, keep a reference until cleanup is done. Fixes: 63bd62a08ca4 ("virtio_pci: defer kfree until release callback") Reported-by: Jerome Marchand Cc: stable@vger.kernel.org Cc: Sasha Levin Signed-off-by: Michael S. Tsirkin Tested-by: Jerome Marchand Signed-off-by: Sasha Levin --- drivers/virtio/virtio_pci_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index eba1b7ac72945..14f767e8e5c55 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -554,6 +554,7 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct device *dev = get_device(&vp_dev->vdev.dev); unregister_virtio_device(&vp_dev->vdev); @@ -564,6 +565,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) pci_release_regions(pci_dev); pci_disable_device(pci_dev); + put_device(dev); } static struct pci_driver virtio_pci_driver = { From 2c68386075ac61d3bedbc34491a0ed828ff832b9 Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Mon, 25 Jan 2016 19:30:23 -0200 Subject: [PATCH 1795/2091] ALSA: bebob: Use a signed return type for get_formation_index [ Upstream commit 07905298e4d5777eb58516cdc242f7ac1ca387a2 ] The return type "unsigned int" was used by the get_formation_index function despite of the aspect that it will eventually return a negative error code. So, change to signed int and get index by reference in the parameters. Done with the help of Coccinelle. [Fix the missing braces suggested by Julia Lawall -- tiwai] Signed-off-by: Lucas Tanure Reviewed-by: Takashi Sakamoto Tested-by: Takashi Sakamoto Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/firewire/bebob/bebob_stream.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index 98e4fc8121a1f..5e547cb199f0e 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = { [6] = 0x07, }; -static unsigned int -get_formation_index(unsigned int rate) +static int +get_formation_index(unsigned int rate, unsigned int *index) { unsigned int i; for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) { - if (snd_bebob_rate_table[i] == rate) - return i; + if (snd_bebob_rate_table[i] == rate) { + *index = i; + return 0; + } } return -EINVAL; } @@ -367,7 +369,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate) goto end; /* confirm params for both streams */ - index = get_formation_index(rate); + err = get_formation_index(rate, &index); + if (err < 0) + goto end; pcm_channels = bebob->tx_stream_formations[index].pcm; midi_channels = bebob->tx_stream_formations[index].midi; amdtp_stream_set_parameters(&bebob->tx_stream, From daab58866c29c4e9af4d3df2175f2dc485057e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=20Penttil=C3=A4?= Date: Tue, 26 Jan 2016 15:47:25 +0000 Subject: [PATCH 1796/2091] arm64: mm: avoid calling apply_to_page_range on empty range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 57adec866c0440976c96a4b8f5b59fb411b1cacb ] Calling apply_to_page_range with an empty range results in a BUG_ON from the core code. This can be triggered by trying to load the st_drv module with CONFIG_DEBUG_SET_MODULE_RONX enabled: kernel BUG at mm/memory.c:1874! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 3 PID: 1764 Comm: insmod Not tainted 4.5.0-rc1+ #2 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc9763b8000 ti: ffffffc975af8000 task.ti: ffffffc975af8000 PC is at apply_to_page_range+0x2cc/0x2d0 LR is at change_memory_common+0x80/0x108 This patch fixes the issue by making change_memory_common (called by the set_memory_* functions) a NOP when numpages == 0, therefore avoiding the erroneous call to apply_to_page_range and bringing us into line with x86 and s390. Cc: Reviewed-by: Laura Abbott Acked-by: David Rientjes Signed-off-by: Mika Penttilä Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/mm/pageattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index e47ed1c5dce1b..545710f854f81 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages, if (end < MODULES_VADDR || end >= MODULES_END) return -EINVAL; + if (!numpages) + return 0; + data.set_mask = set_mask; data.clear_mask = clear_mask; From bd6d701606119bbbc3b55a064a53c6789a1acab6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Jan 2016 04:15:18 -0700 Subject: [PATCH 1797/2091] x86/mm: Fix types used in pgprot cacheability flags translations [ Upstream commit 3625c2c234ef66acf21a72d47a5ffa94f6c5ebf2 ] For PAE kernels "unsigned long" is not suitable to hold page protection flags, since _PAGE_NX doesn't fit there. This is the reason for quite a few W+X pages getting reported as insecure during boot (observed namely for the entire initrd range). Fixes: 281d4078be ("x86: Make page cache mode a real type") Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/56A7635602000078000CAFF1@prv-mh.provo.novell.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- arch/x86/include/asm/pgtable_types.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 78f0c8cbe316f..74fcdf3f1534c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -337,20 +337,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) } static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { + pgprotval_t val = pgprot_val(pgprot); pgprot_t new; - unsigned long val; - val = pgprot_val(pgprot); pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); return new; } static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) { + pgprotval_t val = pgprot_val(pgprot); pgprot_t new; - unsigned long val; - val = pgprot_val(pgprot); pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT_LARGE) >> (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); From 329d4fd5f2af659d62fa6f6cea72233c7e033e9a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 2 Dec 2015 16:25:32 +1100 Subject: [PATCH 1798/2091] powerpc/eeh: Fix PE location code [ Upstream commit 7e56f627768da4e6480986b5145dc3422bc448a5 ] In eeh_pe_loc_get(), the PE location code is retrieved from the "ibm,loc-code" property of the device node for the bridge of the PE's primary bus. It's not correct because the property indicates the parent PE's location code. This reads the correct PE location code from "ibm,io-base-loc-code" or "ibm,slot-location-code" property of PE parent bus's device node. Cc: stable@vger.kernel.org # v3.16+ Fixes: 357b2f3dd9b7 ("powerpc/eeh: Dump PE location code") Signed-off-by: Gavin Shan Tested-by: Russell Currey Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/eeh_pe.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 35f0b62259bbd..22f6d954ef89e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -861,32 +861,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) const char *eeh_pe_loc_get(struct eeh_pe *pe) { struct pci_bus *bus = eeh_pe_bus_get(pe); - struct device_node *dn = pci_bus_to_OF_node(bus); + struct device_node *dn; const char *loc = NULL; - if (!dn) - goto out; + while (bus) { + dn = pci_bus_to_OF_node(bus); + if (!dn) { + bus = bus->parent; + continue; + } - /* PHB PE or root PE ? */ - if (pci_is_root_bus(bus)) { - loc = of_get_property(dn, "ibm,loc-code", NULL); - if (!loc) + if (pci_is_root_bus(bus)) loc = of_get_property(dn, "ibm,io-base-loc-code", NULL); + else + loc = of_get_property(dn, "ibm,slot-location-code", + NULL); + if (loc) - goto out; + return loc; - /* Check the root port */ - dn = dn->child; - if (!dn) - goto out; + bus = bus->parent; } - loc = of_get_property(dn, "ibm,loc-code", NULL); - if (!loc) - loc = of_get_property(dn, "ibm,slot-location-code", NULL); - -out: - return loc ? loc : "N/A"; + return "N/A"; } /** From 91e65860a7fe55d3d8c150104084b54f2760bc3d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 20 Jan 2016 11:26:01 -0500 Subject: [PATCH 1799/2091] SCSI: fix crashes in sd and sr runtime PM [ Upstream commit 13b4389143413a1f18127c07f72c74cad5b563e8 ] Runtime suspend during driver probe and removal can cause problems. The driver's runtime_suspend or runtime_resume callbacks may invoked before the driver has finished binding to the device or after the driver has unbound from the device. This problem shows up with the sd and sr drivers, and can cause disk or CD/DVD drives to become unusable as a result. The fix is simple. The drivers store a pointer to the scsi_disk or scsi_cd structure as their private device data when probing is finished, so we simply have to be sure to clear the private data during removal and test it during runtime suspend/resume. This fixes . Signed-off-by: Alan Stern Reported-by: Paul Menzel Reported-by: Erich Schubert Reported-by: Alexandre Rossi Tested-by: Paul Menzel Tested-by: Erich Schubert CC: Signed-off-by: James Bottomley Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 7 +++++-- drivers/scsi/sr.c | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 11ea52b2c36b3..c66fd23b3c13d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3141,8 +3141,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; - if (!sdkp) - return 0; /* this can happen */ + if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ + return 0; if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); @@ -3181,6 +3181,9 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + if (!sdkp->device->manage_start_stop) return 0; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 8bd54a64efd6a..64c867405ad4f 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev) { struct scsi_cd *cd = dev_get_drvdata(dev); + if (!cd) /* E.g.: runtime suspend following sr_remove() */ + return 0; + if (cd->media_present) return -EBUSY; else @@ -985,6 +988,7 @@ static int sr_remove(struct device *dev) scsi_autopm_get_device(cd->device); del_gendisk(cd->disk); + dev_set_drvdata(dev, NULL); mutex_lock(&sr_ref_mutex); kref_put(&cd->kref, sr_kref_release); From 27055738c910ee29a9de4b496e198e17b38b0eed Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 10 Jan 2016 22:40:55 -0800 Subject: [PATCH 1800/2091] tty: Fix unsafe ldisc reference via ioctl(TIOCGETD) [ Upstream commit 5c17c861a357e9458001f021a7afa7aab9937439 ] ioctl(TIOCGETD) retrieves the line discipline id directly from the ldisc because the line discipline id (c_line) in termios is untrustworthy; userspace may have set termios via ioctl(TCSETS*) without actually changing the line discipline via ioctl(TIOCSETD). However, directly accessing the current ldisc via tty->ldisc is unsafe; the ldisc ptr dereferenced may be stale if the line discipline is changing via ioctl(TIOCSETD) or hangup. Wait for the line discipline reference (just like read() or write()) to retrieve the "current" line discipline id. Cc: Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/tty_io.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 5a5c1ab5a3756..be96970646a96 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2669,6 +2669,28 @@ static int tiocsetd(struct tty_struct *tty, int __user *p) return ret; } +/** + * tiocgetd - get line discipline + * @tty: tty device + * @p: pointer to user data + * + * Retrieves the line discipline id directly from the ldisc. + * + * Locking: waits for ldisc reference (in case the line discipline + * is changing or the tty is being hungup) + */ + +static int tiocgetd(struct tty_struct *tty, int __user *p) +{ + struct tty_ldisc *ld; + int ret; + + ld = tty_ldisc_ref_wait(tty); + ret = put_user(ld->ops->num, p); + tty_ldisc_deref(ld); + return ret; +} + /** * send_break - performed time break * @tty: device to break on @@ -2895,7 +2917,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: - return put_user(tty->ldisc->ops->num, (int __user *)p); + return tiocgetd(tty, p); case TIOCSETD: return tiocsetd(tty, p); case TIOCVHANGUP: From 28b824966ab269de94e0ca640b7c61aedc6a83de Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 10 Jan 2016 22:40:56 -0800 Subject: [PATCH 1801/2091] n_tty: Fix unsafe reference to "other" ldisc [ Upstream commit 6d27a63caad3f13e96cf065d2d96828c2006be6b ] Although n_tty_check_unthrottle() has a valid ldisc reference (since the tty core gets the ldisc ref in tty_read() before calling the line discipline read() method), it does not have a valid ldisc reference to the "other" pty of a pty pair. Since getting an ldisc reference for tty->link essentially open-codes tty_wakeup(), just replace with the equivalent tty_wakeup(). Cc: Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/n_tty.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index e5edf45e9d4cd..33088c70ef3b1 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -258,16 +258,13 @@ static void n_tty_check_throttle(struct tty_struct *tty) static void n_tty_check_unthrottle(struct tty_struct *tty) { - if (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) { + if (tty->driver->type == TTY_DRIVER_TYPE_PTY) { if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE) return; if (!tty->count) return; n_tty_kick_worker(tty); - n_tty_write_wakeup(tty->link); - if (waitqueue_active(&tty->link->write_wait)) - wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT); + tty_wakeup(tty->link); return; } From d515d710883d98ee77819fe9b4848bfdcfd0a324 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 10 Jan 2016 22:40:58 -0800 Subject: [PATCH 1802/2091] staging/speakup: Use tty_ldisc_ref() for paste kworker [ Upstream commit f4f9edcf9b5289ed96113e79fa65a7bf27ecb096 ] As the function documentation for tty_ldisc_ref_wait() notes, it is only callable from a tty file_operations routine; otherwise there is no guarantee the ref won't be NULL. The key difference with the VT's paste_selection() is that is an ioctl, where __speakup_paste_selection() is completely async kworker, kicked off from interrupt context. Fixes: 28a821c30688 ("Staging: speakup: Update __speakup_paste_selection() tty (ab)usage to match vt") Cc: Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/speakup/selection.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c index a0315701c7d96..ed68b2cfe0311 100644 --- a/drivers/staging/speakup/selection.c +++ b/drivers/staging/speakup/selection.c @@ -141,7 +141,9 @@ static void __speakup_paste_selection(struct work_struct *work) struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); - ld = tty_ldisc_ref_wait(tty); + ld = tty_ldisc_ref(tty); + if (!ld) + goto tty_unref; tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); @@ -161,6 +163,7 @@ static void __speakup_paste_selection(struct work_struct *work) tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); +tty_unref: tty_kref_put(tty); } From aa54b6de2b2ffbd6bb3086dbebd8fea61701202b Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Wed, 13 Jan 2016 16:19:50 +0900 Subject: [PATCH 1803/2091] irqchip/atmel-aic: Fix wrong bit operation for IRQ priority [ Upstream commit 49f34134aea74f19ca016f055d25ee55ec359dee ] Atmel AIC has common structure for SMR (Source Mode Register). bit[6:5] Interrupt source type bit[2:0] Priority level Other bits are unused. To update new priority value, bit[2:0] should be cleared first and then new priority level can be written. However, aic_common_set_priority() helper clears source type bits instead of priority bits. This patch fixes wrong mask bit operation. Fixes: b1479ebb7720 "irqchip: atmel-aic: Add atmel AIC/AIC5 drivers" Signed-off-by: Milo Kim Acked-by: Boris Brezillon Cc: Jason Cooper Cc: Marc Zyngier Cc: Ludovic Desroches Cc: Nicholas Ferre Cc: stable@vger.kernel.org #v3.17+ Link: http://lkml.kernel.org/r/1452669592-3401-2-git-send-email-milo.kim@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- drivers/irqchip/irq-atmel-aic-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 63cd031b2c28d..869d01dd40630 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val) priority > AT91_AIC_IRQ_MAX_PRIORITY) return -EINVAL; - *val &= AT91_AIC_PRIOR; + *val &= ~AT91_AIC_PRIOR; *val |= priority; return 0; From 988590966531b9ab4d7c6101f02a6f065c5df7a5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 26 Dec 2015 06:00:48 +0100 Subject: [PATCH 1804/2091] seccomp: always propagate NO_NEW_PRIVS on tsync [ Upstream commit 103502a35cfce0710909da874f092cb44823ca03 ] Before this patch, a process with some permissive seccomp filter that was applied by root without NO_NEW_PRIVS was able to add more filters to itself without setting NO_NEW_PRIVS by setting the new filter from a throwaway thread with NO_NEW_PRIVS. Signed-off-by: Jann Horn Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- kernel/seccomp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4f44028943e66..30c682adcdeb8 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -317,24 +317,24 @@ static inline void seccomp_sync_threads(void) put_seccomp_filter(thread); smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); + + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + /* * Opt the other thread into seccomp if needed. * As threads are considered to be trust-realm * equivalent (see ptrace_may_access), it is safe to * allow one thread to transition the other. */ - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { - /* - * Don't let an unprivileged task work around - * the no_new_privs restriction by creating - * a thread that sets it up, enters seccomp, - * then dies. - */ - if (task_no_new_privs(caller)) - task_set_no_new_privs(thread); - + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); - } } } From 9646e177106ce8154bf87cde52188f015250e8ed Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 Jul 2015 00:42:02 -0400 Subject: [PATCH 1805/2091] drm/radeon: rework audio modeset to handle non-audio hdmi features [ Upstream commit 7726e72b3d6879ee5fc743a230eb6f5afa12844b ] Need to setup the deep color and avi packets regardless of audio setup. Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/atombios_encoders.c | 3 +- drivers/gpu/drm/radeon/radeon_audio.c | 67 +++++++++++++--------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 5be50ef2b30ef..bb292143997ee 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -2310,8 +2310,7 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, encoder_mode = atombios_get_encoder_mode(encoder); if (connector && (radeon_audio != 0) && ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || - (ENCODER_MODE_IS_DP(encoder_mode) && - drm_detect_monitor_audio(radeon_connector_edid(connector))))) + ENCODER_MODE_IS_DP(encoder_mode))) radeon_audio_mode_set(encoder, adjusted_mode); } diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index d77dd1430d58b..2c02e99b5f95a 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -698,26 +698,37 @@ static void radeon_audio_hdmi_mode_set(struct drm_encoder *encoder, { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); if (!dig || !dig->afmt) return; - radeon_audio_set_mute(encoder, true); + if (!connector) + return; - radeon_audio_write_speaker_allocation(encoder); - radeon_audio_write_sad_regs(encoder); - radeon_audio_write_latency_fields(encoder, mode); - radeon_audio_set_dto(encoder, mode->clock); - radeon_audio_set_vbi_packet(encoder); - radeon_hdmi_set_color_depth(encoder); - radeon_audio_update_acr(encoder, mode->clock); - radeon_audio_set_audio_packet(encoder); - radeon_audio_select_pin(encoder); + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_audio_set_mute(encoder, true); - if (radeon_audio_set_avi_packet(encoder, mode) < 0) - return; + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + radeon_audio_set_dto(encoder, mode->clock); + radeon_audio_set_vbi_packet(encoder); + radeon_hdmi_set_color_depth(encoder); + radeon_audio_update_acr(encoder, mode->clock); + radeon_audio_set_audio_packet(encoder); + radeon_audio_select_pin(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + + radeon_audio_set_mute(encoder, false); + } else { + radeon_hdmi_set_color_depth(encoder); - radeon_audio_set_mute(encoder, false); + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + } } static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, @@ -732,24 +743,26 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; - if (!connector) - return; - if (!dig || !dig->afmt) return; - radeon_audio_write_speaker_allocation(encoder); - radeon_audio_write_sad_regs(encoder); - radeon_audio_write_latency_fields(encoder, mode); - if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); - else - radeon_audio_set_dto(encoder, dig_connector->dp_clock); - radeon_audio_set_audio_packet(encoder); - radeon_audio_select_pin(encoder); - - if (radeon_audio_set_avi_packet(encoder, mode) < 0) + if (!connector) return; + + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) + radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); + else + radeon_audio_set_dto(encoder, dig_connector->dp_clock); + radeon_audio_set_audio_packet(encoder); + radeon_audio_select_pin(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + } } void radeon_audio_mode_set(struct drm_encoder *encoder, From 7fb7275ff91c477cc1a8fbbd9667343f8261998e Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Tue, 26 Jan 2016 16:45:10 -0500 Subject: [PATCH 1806/2091] drm/radeon: cleaned up VCO output settings for DP audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c9a392eac18409f51a071520cf508c0b4ad990e2 ] This is preparation for the fixes in the following patches. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/dce6_afmt.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_atombios.c | 12 +++++++----- drivers/gpu/drm/radeon/radeon_audio.c | 8 +------- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 883dce66e2dcf..abd72ee7a24db 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -296,7 +296,7 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, div = 0; if (div) - clock = rdev->clock.gpupll_outputfreq * 10 / div; + clock /= div; WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 95770d02a6797..4bca29c5abfa1 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,7 +268,7 @@ struct radeon_clock { uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; - uint32_t gpupll_outputfreq; + uint32_t vco_freq; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 08fc1b5effa8a..9a9363a7e5b9a 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1257,12 +1257,14 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) rdev->mode_info.firmware_flags = le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess); - if (ASIC_IS_DCE8(rdev)) { - rdev->clock.gpupll_outputfreq = + if (ASIC_IS_DCE8(rdev)) + rdev->clock.vco_freq = le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); - if (rdev->clock.gpupll_outputfreq == 0) - rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */ - } + else + rdev->clock.vco_freq = rdev->clock.current_dispclk; + + if (rdev->clock.vco_freq == 0) + rdev->clock.vco_freq = 360000; /* 3.6 GHz */ return true; } diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 2c02e99b5f95a..85e1c234f0203 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - struct radeon_connector *radeon_connector = to_radeon_connector(connector); - struct radeon_connector_atom_dig *dig_connector = - radeon_connector->con_priv; if (!dig || !dig->afmt) return; @@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, radeon_audio_write_speaker_allocation(encoder); radeon_audio_write_sad_regs(encoder); radeon_audio_write_latency_fields(encoder, mode); - if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); - else - radeon_audio_set_dto(encoder, dig_connector->dp_clock); + radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10); radeon_audio_set_audio_packet(encoder); radeon_audio_select_pin(encoder); From 2652823a09685084d3c1dc0abe17a2fa6596d19a Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Tue, 26 Jan 2016 16:56:25 -0500 Subject: [PATCH 1807/2091] drm/radeon: Add a common function for DFS handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a64c9dab1c4d05c87ec8a1cb9b48915816462143 ] Move encoding of DFS (digital frequency synthesizer) divider into a separate function and improve calculation precision. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/dce6_afmt.c | 12 ++---------- drivers/gpu/drm/radeon/radeon_audio.c | 12 ++++++++++++ drivers/gpu/drm/radeon/radeon_audio.h | 1 + 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index abd72ee7a24db..848b1ffd5cc43 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -285,18 +285,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) & DENTIST_DPREFCLK_WDIVIDER_MASK) >> DENTIST_DPREFCLK_WDIVIDER_SHIFT; - - if (div < 128 && div >= 96) - div -= 64; - else if (div >= 64) - div = div / 2 - 16; - else if (div >= 8) - div /= 4; - else - div = 0; + div = radeon_audio_decode_dfs_div(div); if (div) - clock /= div; + clock = clock * 100 / div; WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 85e1c234f0203..b214663b370da 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -775,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode) if (radeon_encoder->audio && radeon_encoder->audio->dpms) radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON); } + +unsigned int radeon_audio_decode_dfs_div(unsigned int div) +{ + if (div >= 8 && div < 64) + return (div - 8) * 25 + 200; + else if (div >= 64 && div < 96) + return (div - 64) * 50 + 1600; + else if (div >= 96 && div < 128) + return (div - 96) * 100 + 3200; + else + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 059cc3012062a..5c70cceaa4a6c 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev); void radeon_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); void radeon_audio_dpms(struct drm_encoder *encoder, int mode); +unsigned int radeon_audio_decode_dfs_div(unsigned int div); #endif From 2fbdb6c658f4a10c649fd57c42a5199d2a8032d8 Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Tue, 26 Jan 2016 17:35:57 -0500 Subject: [PATCH 1808/2091] drm/radeon: fix DP audio support for APU with DCE4.1 display engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fe6fc1f132b4300c1f6defd43a5d673eb60a820d ] Properly setup the DFS divider for DP audio for DCE4.1. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 10 +++++++ drivers/gpu/drm/radeon/evergreend.h | 5 ++++ drivers/gpu/drm/radeon/radeon_atombios.c | 37 +++++++++++++++++++----- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 9953356fe2637..3cf04a2f44bbb 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev, * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ + if (ASIC_IS_DCE41(rdev)) { + unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + div = radeon_audio_decode_dfs_div(div); + + if (div) + clock = 100 * clock / div; + } + WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCCG_AUDIO_DTO1_MODULE, clock); } diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 4aa5f755572b1..13b6029d65cc5 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -511,6 +511,11 @@ #define DCCG_AUDIO_DTO1_CNTL 0x05cc # define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3) +#define DCE41_DENTIST_DISPCLK_CNTL 0x049c +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + /* DCE 4.0 AFMT */ #define HDMI_CONTROL 0x7030 # define HDMI_KEEPOUT_MODE (1 << 0) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 9a9363a7e5b9a..de9a2ffcf5f76 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1106,6 +1106,31 @@ union firmware_info { ATOM_FIRMWARE_INFO_V2_2 info_22; }; +union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; +}; + +static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + union igp_info *igp_info; + u8 frev, crev; + u16 data_offset; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *)(mode_info->atom_context->bios + + data_offset); + rdev->clock.vco_freq = + le32_to_cpu(igp_info->info_6.ulDentistVCOFreq); + } +} + bool radeon_atom_get_clock_info(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; @@ -1260,6 +1285,10 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) if (ASIC_IS_DCE8(rdev)) rdev->clock.vco_freq = le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); + else if (ASIC_IS_DCE5(rdev)) + rdev->clock.vco_freq = rdev->clock.current_dispclk; + else if (ASIC_IS_DCE41(rdev)) + radeon_atombios_get_dentist_vco_freq(rdev); else rdev->clock.vco_freq = rdev->clock.current_dispclk; @@ -1272,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) return false; } -union igp_info { - struct _ATOM_INTEGRATED_SYSTEM_INFO info; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; -}; - bool radeon_atombios_sideport_present(struct radeon_device *rdev) { struct radeon_mode_info *mode_info = &rdev->mode_info; From f6b4419806e546fb02f91bed03572362e2623879 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Jan 2016 07:54:16 +0100 Subject: [PATCH 1809/2091] ALSA: dummy: Disable switching timer backend via sysfs [ Upstream commit 7ee96216c31aabe1eb42fb91ff50dae9fcd014b2 ] ALSA dummy driver can switch the timer backend between system timer and hrtimer via its hrtimer module option. This can be also switched dynamically via sysfs, but it may lead to a memory corruption when switching is done while a PCM stream is running; the stream instance for the newly switched timer method tries to access the memory that was allocated by another timer method although the sizes differ. As the simplest fix, this patch just disables the switch via sysfs by dropping the writable bit. BugLink: http://lkml.kernel.org/r/CACT4Y+ZGEeEBntHW5WHn2GoeE0G_kRrCmUh6=dWyy-wfzvuJLg@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/drivers/dummy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index d11baaf0f0b4a..96592d5ba7bfe 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver."); module_param(fake_buffer, bool, 0444); MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations."); #ifdef CONFIG_HIGH_RES_TIMERS -module_param(hrtimer, bool, 0644); +module_param(hrtimer, bool, 0444); MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source."); #endif From c4bbd2785e0ed7ed3e8c16776194ea9181fc3454 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 15 Oct 2014 15:00:47 -0400 Subject: [PATCH 1810/2091] drm/vmwgfx: respect 'nomodeset' [ Upstream commit 96c5d076f0a5e2023ecdb44d8261f87641ee71e0 ] Signed-off-by: Rob Clark Reviewed-by: Thomas Hellstrom . Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 15a8d7746fd2c..2aa0e927d4907 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,6 +25,7 @@ * **************************************************************************/ #include +#include #include #include "vmwgfx_drv.h" @@ -1447,6 +1448,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static int __init vmwgfx_init(void) { int ret; + +#ifdef CONFIG_VGA_CONSOLE + if (vgacon_text_force()) + return -EINVAL; +#endif + ret = drm_pci_init(&driver, &vmw_pci_driver); if (ret) DRM_ERROR("Failed initializing DRM.\n"); From b143adc3f446be7dbb5a0d4c63089078d7718f06 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 15 Jan 2016 00:47:41 +0100 Subject: [PATCH 1811/2091] Staging: speakup: Fix getting port information [ Upstream commit 327b882d3bcc1fba82dbd39b5cf5a838c81218e2 ] Commit f79b0d9c223c ("staging: speakup: Fixed warning instead of ") broke the port information in the speakup driver: SERIAL_PORT_DFNS only gets defined if asm/serial.h is included, and no other header includes asm/serial.h. We here make sure serialio.c does get the arch-specific definition of SERIAL_PORT_DFNS from asm/serial.h, if any. Along the way, this makes sure that we do have information for the requested serial port number (index) Fixes: f79b0d9c223c ("staging: speakup: Fixed warning instead of ") Signed-off-by: Samuel Thibault Cc: stable # 3.18 Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/speakup/serialio.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/staging/speakup/serialio.c b/drivers/staging/speakup/serialio.c index 1d9d51bdf5171..f41a7da1949dc 100644 --- a/drivers/staging/speakup/serialio.c +++ b/drivers/staging/speakup/serialio.c @@ -6,6 +6,11 @@ #include "spk_priv.h" #include "serialio.h" +#include +/* WARNING: Do not change this to without testing that + * SERIAL_PORT_DFNS does get defined to the appropriate value. */ +#include + #ifndef SERIAL_PORT_DFNS #define SERIAL_PORT_DFNS #endif @@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index) int baud = 9600, quot = 0; unsigned int cval = 0; int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8; - const struct old_serial_port *ser = rs_table + index; + const struct old_serial_port *ser; int err; + if (index >= ARRAY_SIZE(rs_table)) { + pr_info("no port info for ttyS%d\n", index); + return NULL; + } + ser = rs_table + index; + /* Divisor, bytesize and parity */ quot = ser->baud_base / baud; cval = cflag & (CSIZE | CSTOPB); From 3704a44fc2706841b7af21235e928e0be0bb251d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 29 Jan 2016 11:36:10 +0000 Subject: [PATCH 1812/2091] x86/mm/pat: Avoid truncation when converting cpa->numpages to address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 742563777e8da62197d6cb4b99f4027f59454735 ] There are a couple of nasty truncation bugs lurking in the pageattr code that can be triggered when mapping EFI regions, e.g. when we pass a cpa->pgd pointer. Because cpa->numpages is a 32-bit value, shifting left by PAGE_SHIFT will truncate the resultant address to 32-bits. Viorel-Cătălin managed to trigger this bug on his Dell machine that provides a ~5GB EFI region which requires 1236992 pages to be mapped. When calling populate_pud() the end of the region gets calculated incorrectly in the following buggy expression, end = start + (cpa->numpages << PAGE_SHIFT); And only 188416 pages are mapped. Next, populate_pud() gets invoked for a second time because of the loop in __change_page_attr_set_clr(), only this time no pages get mapped because shifting the remaining number of pages (1048576) by PAGE_SHIFT is zero. At which point the loop in __change_page_attr_set_clr() spins forever because we fail to map progress. Hitting this bug depends very much on the virtual address we pick to map the large region at and how many pages we map on the initial run through the loop. This explains why this issue was only recently hit with the introduction of commit a5caa209ba9c ("x86/efi: Fix boot crash by mapping EFI memmap entries bottom-up at runtime, instead of top-down") It's interesting to note that safe uses of cpa->numpages do exist in the pageattr code. If instead of shifting ->numpages we multiply by PAGE_SIZE, no truncation occurs because PAGE_SIZE is a UL value, and so the result is unsigned long. To avoid surprises when users try to convert very large cpa->numpages values to addresses, change the data type from 'int' to 'unsigned long', thereby making it suitable for shifting by PAGE_SHIFT without any type casting. The alternative would be to make liberal use of casting, but that is far more likely to cause problems in the future when someone adds more code and fails to cast properly; this bug was difficult enough to track down in the first place. Reported-and-tested-by: Viorel-Cătălin Răpițeanu Acked-by: Borislav Petkov Cc: Sai Praneeth Prakhya Cc: Signed-off-by: Matt Fleming Link: https://bugzilla.kernel.org/show_bug.cgi?id=110131 Link: http://lkml.kernel.org/r/1454067370-10374-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 89af288ec6740..2dd9b3ad3bb58 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -33,7 +33,7 @@ struct cpa_data { pgd_t *pgd; pgprot_t mask_set; pgprot_t mask_clr; - int numpages; + unsigned long numpages; int flags; unsigned long pfn; unsigned force_split : 1; @@ -1324,7 +1324,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) * CPA operation. Either a large page has been * preserved or a single page update happened. */ - BUG_ON(cpa->numpages > numpages); + BUG_ON(cpa->numpages > numpages || !cpa->numpages); numpages -= cpa->numpages; if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) cpa->curpage++; From 156e0fd0ecf2f78b732584253a2f21c56a7e79ba Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 29 Jan 2016 16:49:47 +0200 Subject: [PATCH 1813/2091] serial: 8250_pci: Add Intel Broadwell ports [ Upstream commit 6c55d9b98335f7f6bd5f061866ff1633401f3a44 ] Some recent (early 2015) macbooks have Intel Broadwell where LPSS UARTs are PCI enumerated instead of ACPI. The LPSS UART block is pretty much same as used on Intel Baytrail so we can reuse the existing Baytrail setup code. Add both Broadwell LPSS UART ports to the list of supported devices. Signed-off-by: Leif Liddy Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_pci.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index eb8adc2e68c15..2fd163b756653 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1380,6 +1380,9 @@ ce4100_serial_setup(struct serial_private *priv, #define PCI_DEVICE_ID_INTEL_BSW_UART1 0x228a #define PCI_DEVICE_ID_INTEL_BSW_UART2 0x228c +#define PCI_DEVICE_ID_INTEL_BDW_UART1 0x9ce3 +#define PCI_DEVICE_ID_INTEL_BDW_UART2 0x9ce4 + #define BYT_PRV_CLK 0x800 #define BYT_PRV_CLK_EN (1 << 0) #define BYT_PRV_CLK_M_VAL_SHIFT 1 @@ -1458,11 +1461,13 @@ byt_serial_setup(struct serial_private *priv, switch (pdev->device) { case PCI_DEVICE_ID_INTEL_BYT_UART1: case PCI_DEVICE_ID_INTEL_BSW_UART1: + case PCI_DEVICE_ID_INTEL_BDW_UART1: rx_param->src_id = 3; tx_param->dst_id = 2; break; case PCI_DEVICE_ID_INTEL_BYT_UART2: case PCI_DEVICE_ID_INTEL_BSW_UART2: + case PCI_DEVICE_ID_INTEL_BDW_UART2: rx_param->src_id = 5; tx_param->dst_id = 4; break; @@ -2154,6 +2159,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = byt_serial_setup, }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BDW_UART1, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = byt_serial_setup, + }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_BDW_UART2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = byt_serial_setup, + }, /* * ITE */ @@ -5603,6 +5622,16 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000, pbn_byt }, + /* Intel Broadwell */ + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000, + pbn_byt }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000, + pbn_byt }, + /* * Intel Penwell */ From 4470ba0e8a7f353cf3868eca475272d49605f823 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 21 Jan 2016 21:09:52 +0800 Subject: [PATCH 1814/2091] MIPS: Fix some missing CONFIG_CPU_MIPSR6 #ifdefs [ Upstream commit 4f33f6c522948fffc345261896042b58dea23754 ] Commit be0c37c985eddc4 (MIPS: Rearrange PTE bits into fixed positions.) defines fixed PTE bits for MIPS R2. Then, commit d7b631419b3d230a4d383 (MIPS: pgtable-bits: Fix XPA damage to R6 definitions.) adds the MIPS R6 definitions in the same way as MIPS R2. But some R6 #ifdefs in the later commit are missing, so in this patch I fix that. Signed-off-by: Huacai Chen Cc: Aurelien Jarno Cc: Steven J. Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12164/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/include/asm/pgtable.h | 4 ++-- arch/mips/mm/tlbex.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 70f6e7f073b04..7fe24aef7fdc6 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -353,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (!(pte_val(pte) & _PAGE_NO_READ)) pte_val(pte) |= _PAGE_SILENT_READ; else @@ -558,7 +558,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) { pmd_val(pmd) |= _PAGE_ACCESSED; -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (!(pmd_val(pmd) & _PAGE_NO_READ)) pmd_val(pmd) |= _PAGE_SILENT_READ; else diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 97c87027c17f8..90b0e83167903 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -242,7 +242,7 @@ static void output_pgtable_bits_defines(void) pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (cpu_has_rixi) { #ifdef _PAGE_NO_EXEC_SHIFT pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); From 9f4bab04f3788cd66b203c501fe30d093fe4d8d7 Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Sat, 23 Jan 2016 15:44:19 -0500 Subject: [PATCH 1815/2091] ACPI / PCI / hotplug: unlock in error path in acpiphp_enable_slot() [ Upstream commit 2c3033a0664dfae91e1dee7fabac10f24354b958 ] In acpiphp_enable_slot(), there is a missing unlock path when error occurred. It needs to be unlocked before returning an error. Signed-off-by: Insu Yun Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/pci/hotplug/acpiphp_glue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index bcb90e4888dd8..b60309ee80ed0 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -954,8 +954,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot) { pci_lock_rescan_remove(); - if (slot->flags & SLOT_IS_GOING_AWAY) + if (slot->flags & SLOT_IS_GOING_AWAY) { + pci_unlock_rescan_remove(); return -ENODEV; + } /* configure all functions */ if (!(slot->flags & SLOT_ENABLED)) From 01c7c52c66428c45484b59dac950b613e38cc7dc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 14 Jan 2016 12:20:47 -0800 Subject: [PATCH 1816/2091] ARM: OMAP2+: Fix wait_dll_lock_timed for rodata [ Upstream commit d9db59103305eb5ec2a86369f32063e9921b6ac5 ] We don't want to be writing to .text so it can be set rodata. Fix error "Unable to handle kernel paging request at virtual address c012396c" in wait_dll_lock_timed if CONFIG_DEBUG_RODATA is selected. As these counters are for debugging only and unused, we can just remove them. Cc: Kees Cook Cc: Laura Abbott Cc: Nishanth Menon Cc: Richard Woodruff Cc: Russell King Cc: Tero Kristo Acked-by: Nicolas Pitre Cc: stable@vger.kernel.org # v4.0+ Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be non-executable") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/sleep34xx.S | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index eafd120b53f1b..787cfda7a9b3e 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -289,12 +289,6 @@ wait_sdrc_ready: bic r5, r5, #0x40 str r5, [r4] -/* - * PC-relative stores lead to undefined behaviour in Thumb-2: use a r7 as a - * base instead. - * Be careful not to clobber r7 when maintaing this code. - */ - is_dll_in_lock_mode: /* Is dll in lock mode? */ ldr r4, sdrc_dlla_ctrl @@ -302,11 +296,7 @@ is_dll_in_lock_mode: tst r5, #0x4 bne exit_nonoff_modes @ Return if locked /* wait till dll locks */ - adr r7, kick_counter wait_dll_lock_timed: - ldr r4, wait_dll_lock_counter - add r4, r4, #1 - str r4, [r7, #wait_dll_lock_counter - kick_counter] ldr r4, sdrc_dlla_status /* Wait 20uS for lock */ mov r6, #8 @@ -330,9 +320,6 @@ kick_dll: orr r6, r6, #(1<<3) @ enable dll str r6, [r4] dsb - ldr r4, kick_counter - add r4, r4, #1 - str r4, [r7] @ kick_counter b wait_dll_lock_timed exit_nonoff_modes: @@ -360,15 +347,6 @@ sdrc_dlla_status: .word SDRC_DLLA_STATUS_V sdrc_dlla_ctrl: .word SDRC_DLLA_CTRL_V - /* - * When exporting to userspace while the counters are in SRAM, - * these 2 words need to be at the end to facilitate retrival! - */ -kick_counter: - .word 0 -wait_dll_lock_counter: - .word 0 - ENTRY(omap3_do_wfi_sz) .word . - omap3_do_wfi From 2c0fae21283fe4d8ca6475e40f56603f7426613d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 14 Jan 2016 12:20:47 -0800 Subject: [PATCH 1817/2091] ARM: OMAP2+: Fix l2dis_3630 for rodata [ Upstream commit eeaf9646aca89d097861caa24d9818434e48810e ] We don't want to write to .text section. Let's move l2dis_3630 to .data and access it via a pointer. For calculating the offset, let's optimize out the add and do it in ldr/str as suggested by Nicolas Pitre . Cc: Kees Cook Cc: Laura Abbott Cc: Nishanth Menon Cc: Richard Woodruff Cc: Russell King Cc: Tero Kristo Cc: stable@vger.kernel.org # v4.0+ Acked-by: Nicolas Pitre Cc: stable@vger.kernel.org # v4.0+ Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be non-executable") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/sleep34xx.S | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index 787cfda7a9b3e..5659d96b78d5e 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -86,8 +86,9 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) stmfd sp!, {lr} @ save registers on stack /* Setup so that we will disable and enable l2 */ mov r1, #0x1 - adrl r2, l2dis_3630 @ may be too distant for plain adr - str r1, [r2] + adrl r3, l2dis_3630_offset @ may be too distant for plain adr + ldr r2, [r3] @ value for offset + str r1, [r2, r3] @ write to l2dis_3630 ldmfd sp!, {pc} @ restore regs and return ENDPROC(enable_omap3630_toggle_l2_on_restore) @@ -415,7 +416,9 @@ ENTRY(omap3_restore) cmp r2, #0x0 @ Check if target power state was OFF or RET bne logic_l1_restore - ldr r0, l2dis_3630 + adr r1, l2dis_3630_offset @ address for offset + ldr r0, [r1] @ value for offset + ldr r0, [r1, r0] @ value at l2dis_3630 cmp r0, #0x1 @ should we disable L2 on 3630? bne skipl2dis mrc p15, 0, r0, c1, c0, 1 @@ -484,7 +487,9 @@ l2_inv_gp: mov r12, #0x2 smc #0 @ Call SMI monitor (smieq) logic_l1_restore: - ldr r1, l2dis_3630 + adr r0, l2dis_3630_offset @ adress for offset + ldr r1, [r0] @ value for offset + ldr r1, [r0, r1] @ value at l2dis_3630 cmp r1, #0x1 @ Test if L2 re-enable needed on 3630 bne skipl2reen mrc p15, 0, r1, c1, c0, 1 @@ -513,6 +518,10 @@ control_stat: .word CONTROL_STAT control_mem_rta: .word CONTROL_MEM_RTA_CTRL +l2dis_3630_offset: + .long l2dis_3630 - . + + .data l2dis_3630: .word 0 From b6204db55d575691f837a64fbc31eb9a4bcb3c52 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 14 Jan 2016 12:20:47 -0800 Subject: [PATCH 1818/2091] ARM: OMAP2+: Fix save_secure_ram_context for rodata [ Upstream commit a5311d4d13df80bd71a9e47f9ecaf327f478fab1 ] We don't want to write to .text and we can move save_secure_ram_context into .data as it all gets copied into SRAM anyways. Cc: Kees Cook Cc: Laura Abbott Cc: Nishanth Menon Cc: Richard Woodruff Cc: Russell King Cc: Sergei Shtylyov Cc: Tero Kristo Acked-by: Nicolas Pitre Cc: stable@vger.kernel.org # v4.0+ Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be non-executable") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/sleep34xx.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index 5659d96b78d5e..8e2a7acb823b6 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -92,8 +92,12 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) ldmfd sp!, {pc} @ restore regs and return ENDPROC(enable_omap3630_toggle_l2_on_restore) - .text -/* Function to call rom code to save secure ram context */ +/* + * Function to call rom code to save secure ram context. This gets + * relocated to SRAM, so it can be all in .data section. Otherwise + * we need to initialize api_params separately. + */ + .data .align 3 ENTRY(save_secure_ram_context) stmfd sp!, {r4 - r11, lr} @ save registers on stack @@ -127,6 +131,8 @@ ENDPROC(save_secure_ram_context) ENTRY(save_secure_ram_context_sz) .word . - save_secure_ram_context + .text + /* * ====================== * == Idle entry point == From 22186aa8b53c158418f8c1f70b1a556a78ed1012 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 14 Jan 2016 12:20:48 -0800 Subject: [PATCH 1819/2091] ARM: OMAP2+: Fix ppa_zero_params and ppa_por_params for rodata [ Upstream commit 4da597d16602d14405b71a18d45e1c59f28f0fd2 ] We don't want to write to .text so let's move ppa_zero_params and ppa_por_params to .data and access them via pointers. Note that I have not been able to test as we I don't have a HS omap4 to test with. The code has been changed in similar way as for omap3 though. Cc: Kees Cook Cc: Laura Abbott Cc: Nishanth Menon Cc: Richard Woodruff Cc: Russell King Cc: Tero Kristo Acked-by: Nicolas Pitre Cc: stable@vger.kernel.org # v4.0+ Fixes: 1e6b48116a95 ("ARM: mm: allow non-text sections to be non-executable") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/sleep44xx.S | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index ad1bb9431e941..5373a32817798 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -29,12 +29,6 @@ dsb .endm -ppa_zero_params: - .word 0x0 - -ppa_por_params: - .word 1, 0 - #ifdef CONFIG_ARCH_OMAP4 /* @@ -266,7 +260,9 @@ ENTRY(omap4_cpu_resume) beq skip_ns_smp_enable ppa_actrl_retry: mov r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX - adr r3, ppa_zero_params @ Pointer to parameters + adr r1, ppa_zero_params_offset + ldr r3, [r1] + add r3, r3, r1 @ Pointer to ppa_zero_params mov r1, #0x0 @ Process ID mov r2, #0x4 @ Flag mov r6, #0xff @@ -303,7 +299,9 @@ skip_ns_smp_enable: ldr r0, =OMAP4_PPA_L2_POR_INDEX ldr r1, =OMAP44XX_SAR_RAM_BASE ldr r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET] - adr r3, ppa_por_params + adr r1, ppa_por_params_offset + ldr r3, [r1] + add r3, r3, r1 @ Pointer to ppa_por_params str r4, [r3, #0x04] mov r1, #0x0 @ Process ID mov r2, #0x4 @ Flag @@ -328,6 +326,8 @@ skip_l2en: #endif b cpu_resume @ Jump to generic resume +ppa_por_params_offset: + .long ppa_por_params - . ENDPROC(omap4_cpu_resume) #endif /* CONFIG_ARCH_OMAP4 */ @@ -382,4 +382,13 @@ ENTRY(omap_do_wfi) nop ldmfd sp!, {pc} +ppa_zero_params_offset: + .long ppa_zero_params - . ENDPROC(omap_do_wfi) + + .data +ppa_zero_params: + .word 0 + +ppa_por_params: + .word 1, 0 From 8448609ec0b8b36759338905a7a5f620555623d8 Mon Sep 17 00:00:00 2001 From: Sachin Kulkarni Date: Tue, 12 Jan 2016 14:30:19 +0530 Subject: [PATCH 1820/2091] mac80211: Requeue work after scan complete for all VIF types. [ Upstream commit 4fa11ec726a32ea6dd768dbb2e2af3453a98ec0a ] During a sw scan ieee80211_iface_work ignores work items for all vifs. However after the scan complete work is requeued only for STA, ADHOC and MESH iftypes. This occasionally results in event processing getting delayed/not processed for iftype AP when it coexists with a STA. This can result in data halt and eventually disconnection on the AP interface. Cc: stable@vger.kernel.org Signed-off-by: Sachin Kulkarni Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/ibss.c | 1 - net/mac80211/mesh.c | 11 ----------- net/mac80211/mesh.h | 4 ---- net/mac80211/mlme.c | 2 -- net/mac80211/scan.c | 12 +++++++++++- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a9c9d961f039c..41adfc898a18e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1727,7 +1727,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; sdata->u.ibss.last_scan_completed = jiffies; - ieee80211_queue_work(&local->hw, &sdata->work); } mutex_unlock(&local->iflist_mtx); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 817098add1d67..afcc67a157fd8 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1299,17 +1299,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) sdata_unlock(sdata); } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) -{ - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_sdata_running(sdata)) - ieee80211_queue_work(&local->hw, &sdata->work); - rcu_read_unlock(); -} void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 50c8473cf9dc5..472bdc73e950b 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -358,14 +358,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP; } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); - void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); void ieee80211s_stop(void); #else -static inline void -ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a93906103f8b0..8448258299928 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4002,8 +4002,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) if (!(flags & IEEE80211_HW_CONNECTION_MONITOR)) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); - /* and do all the other regular work too */ - ieee80211_queue_work(&sdata->local->hw, &sdata->work); } } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7bb6a9383f58e..ee9351affa5bd 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -310,6 +310,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) bool was_scanning = local->scanning; struct cfg80211_scan_request *scan_req; struct ieee80211_sub_if_data *scan_sdata; + struct ieee80211_sub_if_data *sdata; lockdep_assert_held(&local->mtx); @@ -369,7 +370,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); - ieee80211_mesh_notify_scan_completed(local); + + /* Requeue all the work that might have been ignored while + * the scan was in progress; if there was none this will + * just be a no-op for the particular interface. + */ + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + } + if (was_scanning) ieee80211_start_next_roc(local); } From 5abd25f01c03b52d8aa39813656782917aa10d9b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Jan 2016 11:29:03 +0100 Subject: [PATCH 1821/2091] rfkill: fix rfkill_fop_read wait_event usage [ Upstream commit 6736fde9672ff6717ac576e9bba2fd5f3dfec822 ] The code within wait_event_interruptible() is called with !TASK_RUNNING, so mustn't call any functions that can sleep, like mutex_lock(). Since we re-check the list_empty() in a loop after the wait, it's safe to simply use list_empty() without locking. This bug has existed forever, but was only discovered now because all userspace implementations, including the default 'rfkill' tool, use poll() or select() to get a readable fd before attempting to read. Cc: stable@vger.kernel.org Fixes: c64fb01627e24 ("rfkill: create useful userspace interface") Reported-by: Dmitry Vyukov Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/rfkill/core.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index fa7cd792791cb..a97bb7332607a 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1081,17 +1081,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) return res; } -static bool rfkill_readable(struct rfkill_data *data) -{ - bool r; - - mutex_lock(&data->mtx); - r = !list_empty(&data->events); - mutex_unlock(&data->mtx); - - return r; -} - static ssize_t rfkill_fop_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -1108,8 +1097,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, goto out; } mutex_unlock(&data->mtx); + /* since we re-check and it just compares pointers, + * using !list_empty() without locking isn't a problem + */ ret = wait_event_interruptible(data->read_wait, - rfkill_readable(data)); + !list_empty(&data->events)); mutex_lock(&data->mtx); if (ret) From d54ed290a5011beeeeb38082aa4323af4c60e7be Mon Sep 17 00:00:00 2001 From: Mohamed Jamsheeth Hajanajubudeen Date: Fri, 11 Dec 2015 17:06:26 +0530 Subject: [PATCH 1822/2091] ARM: dts: at91: sama5d4: fix instance id of DBGU [ Upstream commit 929e883f2bfdf68d4bd3aec43912e956417005c7 ] Change instance id of DBGU to 45. Signed-off-by: Mohamed Jamsheeth Hajanajubudeen Fixes: 7c661394c56c ("ARM: at91: dt: add device tree file for SAMA5D4 SoC") Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Nicolas Ferre Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sama5d4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 9cf0ab62db7d2..cf11660f35a17 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -1219,7 +1219,7 @@ dbgu: serial@fc069000 { compatible = "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; reg = <0xfc069000 0x200>; - interrupts = <2 IRQ_TYPE_LEVEL_HIGH 7>; + interrupts = <45 IRQ_TYPE_LEVEL_HIGH 7>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_dbgu>; clocks = <&dbgu_clk>; From 06fa2c9ac1aa1dfe3d7e55644c23e96214067040 Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Wed, 27 Jan 2016 13:16:24 +0800 Subject: [PATCH 1823/2091] ARM: dts: at91: sama5d4ek: add phy address and IRQ for macb0 [ Upstream commit aae6b18f5c95b9dc78de66d1e27e8afeee2763b7 ] On SAMA5D4EK board, the Ethernet doesn't work after resuming from the suspend state. Signed-off-by: Wenyou Yang [nicolas.ferre@atmel.com: adapt to newer kernel] Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board") Cc: # 4.1+ Signed-off-by: Nicolas Ferre Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sama5d4ek.dts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts index 45e7761b7a295..d4d24a0814048 100644 --- a/arch/arm/boot/dts/at91-sama5d4ek.dts +++ b/arch/arm/boot/dts/at91-sama5d4ek.dts @@ -141,8 +141,15 @@ }; macb0: ethernet@f8020000 { + pinctrl-0 = <&pinctrl_macb0_rmii &pinctrl_macb0_phy_irq>; phy-mode = "rmii"; status = "okay"; + + ethernet-phy@1 { + reg = <0x1>; + interrupt-parent = <&pioE>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; + }; }; mmc1: mmc@fc000000 { @@ -174,6 +181,10 @@ pinctrl@fc06a000 { board { + pinctrl_macb0_phy_irq: macb0_phy_irq { + atmel,pins = + ; + }; pinctrl_mmc0_cd: mmc0_cd { atmel,pins = ; From 505a9f25a6c33070eb51ea50aabf168b8aafa6d1 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 27 Jan 2016 11:03:02 +0100 Subject: [PATCH 1824/2091] ARM: dts: at91: sama5d4 xplained: fix phy0 IRQ type [ Upstream commit e873cc022ce5e2c04bbc53b5874494b657e29d3f ] For phy0 KSZ8081, the type of GPIO IRQ should be "level low" instead of "edge falling". Signed-off-by: Nicolas Ferre Fixes: 38153a017896 ("ARM: at91/dt: sama5d4: add dts for sama5d4 xplained board") Cc: # 4.1+ Signed-off-by: Sasha Levin --- arch/arm/boot/dts/at91-sama5d4_xplained.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index c740e1a2a3a5c..4f29968076ceb 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -98,7 +98,7 @@ phy0: ethernet-phy@1 { interrupt-parent = <&pioE>; - interrupts = <1 IRQ_TYPE_EDGE_FALLING>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; reg = <1>; }; }; From 73f876ae98498f5a929053e4d402062a5516aa73 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 27 Jan 2016 00:16:37 +0800 Subject: [PATCH 1825/2091] crypto: shash - Fix has_key setting [ Upstream commit 00420a65fa2beb3206090ead86942484df2275f3 ] The has_key logic is wrong for shash algorithms as they always have a setkey function. So we should instead be testing against shash_no_setkey. Fixes: a5596d633278 ("crypto: hash - Add crypto_ahash_has_setkey") Cc: stable@vger.kernel.org Reported-by: Stephan Mueller Signed-off-by: Herbert Xu Tested-by: Stephan Mueller Signed-off-by: Sasha Levin --- crypto/shash.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/crypto/shash.c b/crypto/shash.c index aa3e505045e0b..03fbcd4a82c44 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -354,11 +354,10 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->final = shash_async_final; crt->finup = shash_async_finup; crt->digest = shash_async_digest; + crt->setkey = shash_async_setkey; + + crt->has_setkey = alg->setkey != shash_no_setkey; - if (alg->setkey) { - crt->setkey = shash_async_setkey; - crt->has_setkey = true; - } if (alg->export) crt->export = shash_async_export; if (alg->import) From 43293f825820f7228871ff16b6ff34260113b62e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 13 Jan 2016 16:35:20 +0200 Subject: [PATCH 1826/2091] drm/i915/dp: fall back to 18 bpp when sink capability is unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5efd407674068dede403551bea3b0b134c32513a ] Per DP spec, the source device should fall back to 18 bpp, VESA range RGB when the sink capability is unknown. Fix the color depth clamping. 18 bpp color depth should ensure full color range in automatic mode. The clamping has been HDMI specific since its introduction in commit 996a2239f93b03c5972923f04b097f65565c5bed Author: Daniel Vetter Date: Fri Apr 19 11:24:34 2013 +0200 drm/i915: Disable high-bpc on pre-1.4 EDID screens Cc: stable@vger.kernel.org Reported-and-tested-by: Dihan Wickremasuriya Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=105331 Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1452695720-7076-1-git-send-email-jani.nikula@intel.com (cherry picked from commit 013dd9e038723bbd2aa67be51847384b75be8253) Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7b27a114b030c..b103773df2a34 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10391,11 +10391,21 @@ connected_sink_compute_bpp(struct intel_connector *connector, pipe_config->pipe_bpp = connector->base.display_info.bpc*3; } - /* Clamp bpp to 8 on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0 && bpp > 24) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", - bpp); - pipe_config->pipe_bpp = 24; + /* Clamp bpp to default limit on screens without EDID 1.4 */ + if (connector->base.display_info.bpc == 0) { + int type = connector->base.connector_type; + int clamp_bpp = 24; + + /* Fall back to 18 bpp when DP sink capability is unknown. */ + if (type == DRM_MODE_CONNECTOR_DisplayPort || + type == DRM_MODE_CONNECTOR_eDP) + clamp_bpp = 18; + + if (bpp > clamp_bpp) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", + bpp, clamp_bpp); + pipe_config->pipe_bpp = clamp_bpp; + } } } From a16d95b6a648c5d4761ec5988923421b4c0ecabb Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Fri, 29 Jan 2016 14:49:55 +0100 Subject: [PATCH 1827/2091] ALSA: usb-audio: Fix OPPO HA-1 vendor ID [ Upstream commit 5327d6ba975042fd3da50ac6e94d1e9551ebeaec ] In my patch adding native DSD support for the Oppo HA-1, the wrong vendor ID got through. This patch fixes the vendor ID and aligns the comment. Fixes: a4eae3a506ea ('ALSA: usb: Add native DSD support for Oppo HA-1') Signed-off-by: Jurgen Kramer Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 5585cfd7f63e3..d7c7bcdbad0f8 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1278,7 +1278,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */ case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ - case USB_ID(0x22d8, 0x0416): /* OPPO HA-1*/ + case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; From 29eccdaa46023d0c57c691ce047f9f424114c2b6 Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Fri, 29 Jan 2016 14:59:25 +0100 Subject: [PATCH 1828/2091] ALSA: usb-audio: Add native DSD support for PS Audio NuWave DAC [ Upstream commit ad678b4ccd41aa51cf5f142c0e8cffe9d61fc2bf ] This patch adds native DSD support for the PS Audio NuWave DAC. Signed-off-by: Jurgen Kramer Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d7c7bcdbad0f8..c61107867e9bd 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1287,6 +1287,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */ case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */ case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */ + case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */ if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; From caec5c97583b7ea44c57be91a19d28b0ac447fe4 Mon Sep 17 00:00:00 2001 From: Lev Lybin Date: Fri, 29 Jan 2016 22:55:11 +0700 Subject: [PATCH 1829/2091] ALSA: usb-audio: Add quirk for Microsoft LifeCam HD-6000 [ Upstream commit 1b3c993a699bed282e47c3f7c49d539c331dae04 ] Microsoft LifeCam HD-6000 (045e:076f) requires the similar quirk for avoiding the stall due to the invalid sample rate reads. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111491 Signed-off-by: Lev Lybin Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index c61107867e9bd..37d8ababfc043 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1118,6 +1118,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) switch (chip->usb_id) { case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ + case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ From bd92b10c95b37a4d6c043ad76d51d13dee45c477 Mon Sep 17 00:00:00 2001 From: "Wang, Rui Y" Date: Wed, 27 Jan 2016 17:08:37 +0800 Subject: [PATCH 1830/2091] crypto: algif_hash - wait for crypto_ahash_init() to complete [ Upstream commit fe09786178f9df713a4b2dd6b93c0a722346bf5e ] hash_sendmsg/sendpage() need to wait for the completion of crypto_ahash_init() otherwise it can cause panic. Cc: stable@vger.kernel.org Signed-off-by: Rui Wang Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_hash.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 7b5018b34c97c..d7a3435280d89 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -54,7 +54,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); if (!ctx->more) { - err = crypto_ahash_init(&ctx->req); + err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req), + &ctx->completion); if (err) goto unlock; } @@ -125,6 +126,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, } else { if (!ctx->more) { err = crypto_ahash_init(&ctx->req); + err = af_alg_wait_for_completion(err, &ctx->completion); if (err) goto unlock; } From 81bce29eec26617db18ff21543c7a091b5c10c92 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 1 Feb 2016 12:06:42 +0100 Subject: [PATCH 1831/2091] ALSA: seq: Fix race at closing in virmidi driver [ Upstream commit 2d1b5c08366acd46c35a2e9aba5d650cb5bf5c19 ] The virmidi driver has an open race at closing its assigned rawmidi device, and this may lead to use-after-free in snd_seq_deliver_single_event(). Plug the hole by properly protecting the linked list deletion and calling in the right order in snd_virmidi_input_close(). BugLink: http://lkml.kernel.org/r/CACT4Y+Zd66+w12fNN85-425cVQT=K23kWbhnCEcMB8s3us-Frw@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_virmidi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 56e0f4cd3f829..f2975927551e9 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -254,9 +254,13 @@ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream) */ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream) { + struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_virmidi *vmidi = substream->runtime->private_data; - snd_midi_event_free(vmidi->parser); + + write_lock_irq(&rdev->filelist_lock); list_del(&vmidi->list); + write_unlock_irq(&rdev->filelist_lock); + snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); return 0; From 216b6d4f24b88ab70c58e0574e30b8da1b2b053b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 1 Feb 2016 12:04:55 +0100 Subject: [PATCH 1832/2091] ALSA: rawmidi: Remove kernel WARNING for NULL user-space buffer check [ Upstream commit cc85f7a634cfaf9f0713c6aa06d08817424db37a ] NULL user-space buffer can be passed even in a normal path, thus it's not good to spew a kernel warning with stack trace at each time. Just drop snd_BUG_ON() macro usage there. BugLink: http://lkml.kernel.org/r/CACT4Y+YfVJ3L+q0i-4vyQVyyPD7V=OMX0PWPi29x9Bo3QaBLdw@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/rawmidi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index a7759846fbaad..f75d1656272c8 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1178,7 +1178,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, long count1, result; struct snd_rawmidi_runtime *runtime = substream->runtime; - if (snd_BUG_ON(!kernelbuf && !userbuf)) + if (!kernelbuf && !userbuf) return -EINVAL; if (snd_BUG_ON(!runtime->buffer)) return -EINVAL; From 666f365e85209f799819793d475cd7cde98a2be2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 31 Jan 2016 10:32:37 +0100 Subject: [PATCH 1833/2091] ALSA: pcm: Fix potential deadlock in OSS emulation [ Upstream commit b248371628aad599a48540962f6b85a21a8a0c3f ] There are potential deadlocks in PCM OSS emulation code while accessing read/write and mmap concurrently. This comes from the infamous mmap_sem usage in copy_from/to_user(). Namely, snd_pcm_oss_write() -> &runtime->oss.params_lock -> copy_to_user() -> &mm->mmap_sem mmap() -> &mm->mmap_sem -> snd_pcm_oss_mmap() -> &runtime->oss.params_lock Since we can't avoid taking params_lock from mmap code path, use trylock variant and aborts with -EAGAIN as a workaround of this AB/BA deadlock. BugLink: http://lkml.kernel.org/r/CACT4Y+bVrBKDG0G2_AcUgUQa+X91VKTeS4v+wN7BSHwHtqn3kQ@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/oss/pcm_oss.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 58550cc93f280..33e72c809e50e 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -834,7 +834,8 @@ static int choose_rate(struct snd_pcm_substream *substream, return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL); } -static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream) +static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, + bool trylock) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_params *params, *sparams; @@ -848,7 +849,10 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream) struct snd_mask sformat_mask; struct snd_mask mask; - if (mutex_lock_interruptible(&runtime->oss.params_lock)) + if (trylock) { + if (!(mutex_trylock(&runtime->oss.params_lock))) + return -EAGAIN; + } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) return -EINTR; sw_params = kmalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); @@ -1092,7 +1096,7 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil if (asubstream == NULL) asubstream = substream; if (substream->runtime->oss.params) { - err = snd_pcm_oss_change_params(substream); + err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } @@ -1132,7 +1136,7 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) return 0; runtime = substream->runtime; if (runtime->oss.params) { - err = snd_pcm_oss_change_params(substream); + err = snd_pcm_oss_change_params(substream, false); if (err < 0) return err; } @@ -2163,7 +2167,7 @@ static int snd_pcm_oss_get_space(struct snd_pcm_oss_file *pcm_oss_file, int stre runtime = substream->runtime; if (runtime->oss.params && - (err = snd_pcm_oss_change_params(substream)) < 0) + (err = snd_pcm_oss_change_params(substream, false)) < 0) return err; info.fragsize = runtime->oss.period_bytes; @@ -2800,7 +2804,12 @@ static int snd_pcm_oss_mmap(struct file *file, struct vm_area_struct *area) return -EIO; if (runtime->oss.params) { - if ((err = snd_pcm_oss_change_params(substream)) < 0) + /* use mutex_trylock() for params_lock for avoiding a deadlock + * between mmap_sem and params_lock taken by + * copy_from/to_user() in snd_pcm_oss_write/read() + */ + err = snd_pcm_oss_change_params(substream, true); + if (err < 0) return err; } #ifdef CONFIG_SND_PCM_OSS_PLUGINS From b55a0342e88343fbca10e8a1ac5f6aa1c001f40f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 30 Jan 2016 23:30:25 +0100 Subject: [PATCH 1834/2091] ALSA: seq: Fix yet another races among ALSA timer accesses [ Upstream commit 2cdc7b636d55cbcf42e1e6c8accd85e62d3e9ae8 ] ALSA sequencer may open/close and control ALSA timer instance dynamically either via sequencer events or direct ioctls. These are done mostly asynchronously, and it may call still some timer action like snd_timer_start() while another is calling snd_timer_close(). Since the instance gets removed by snd_timer_close(), it may lead to a use-after-free. This patch tries to address such a race by protecting each snd_timer_*() call via the existing spinlock and also by avoiding the access to timer during close call. BugLink: http://lkml.kernel.org/r/CACT4Y+Z6RzW5MBr-HUdV-8zwg71WQfKTdPpYGvOeS7v4cyurNQ@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_timer.c | 87 +++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 186f1611103c5..a2468f1101d16 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -90,6 +90,9 @@ void snd_seq_timer_delete(struct snd_seq_timer **tmr) void snd_seq_timer_defaults(struct snd_seq_timer * tmr) { + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); /* setup defaults */ tmr->ppq = 96; /* 96 PPQ */ tmr->tempo = 500000; /* 120 BPM */ @@ -105,21 +108,25 @@ void snd_seq_timer_defaults(struct snd_seq_timer * tmr) tmr->preferred_resolution = seq_default_timer_resolution; tmr->skew = tmr->skew_base = SKEW_BASE; + spin_unlock_irqrestore(&tmr->lock, flags); } -void snd_seq_timer_reset(struct snd_seq_timer * tmr) +static void seq_timer_reset(struct snd_seq_timer *tmr) { - unsigned long flags; - - spin_lock_irqsave(&tmr->lock, flags); - /* reset time & songposition */ tmr->cur_time.tv_sec = 0; tmr->cur_time.tv_nsec = 0; tmr->tick.cur_tick = 0; tmr->tick.fraction = 0; +} + +void snd_seq_timer_reset(struct snd_seq_timer *tmr) +{ + unsigned long flags; + spin_lock_irqsave(&tmr->lock, flags); + seq_timer_reset(tmr); spin_unlock_irqrestore(&tmr->lock, flags); } @@ -138,8 +145,11 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri, tmr = q->timer; if (tmr == NULL) return; - if (!tmr->running) + spin_lock_irqsave(&tmr->lock, flags); + if (!tmr->running) { + spin_unlock_irqrestore(&tmr->lock, flags); return; + } resolution *= ticks; if (tmr->skew != tmr->skew_base) { @@ -148,8 +158,6 @@ static void snd_seq_timer_interrupt(struct snd_timer_instance *timeri, (((resolution & 0xffff) * tmr->skew) >> 16); } - spin_lock_irqsave(&tmr->lock, flags); - /* update timer */ snd_seq_inc_time_nsec(&tmr->cur_time, resolution); @@ -296,26 +304,30 @@ int snd_seq_timer_open(struct snd_seq_queue *q) t->callback = snd_seq_timer_interrupt; t->callback_data = q; t->flags |= SNDRV_TIMER_IFLG_AUTO; + spin_lock_irq(&tmr->lock); tmr->timeri = t; + spin_unlock_irq(&tmr->lock); return 0; } int snd_seq_timer_close(struct snd_seq_queue *q) { struct snd_seq_timer *tmr; + struct snd_timer_instance *t; tmr = q->timer; if (snd_BUG_ON(!tmr)) return -EINVAL; - if (tmr->timeri) { - snd_timer_stop(tmr->timeri); - snd_timer_close(tmr->timeri); - tmr->timeri = NULL; - } + spin_lock_irq(&tmr->lock); + t = tmr->timeri; + tmr->timeri = NULL; + spin_unlock_irq(&tmr->lock); + if (t) + snd_timer_close(t); return 0; } -int snd_seq_timer_stop(struct snd_seq_timer * tmr) +static int seq_timer_stop(struct snd_seq_timer *tmr) { if (! tmr->timeri) return -EINVAL; @@ -326,6 +338,17 @@ int snd_seq_timer_stop(struct snd_seq_timer * tmr) return 0; } +int snd_seq_timer_stop(struct snd_seq_timer *tmr) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&tmr->lock, flags); + err = seq_timer_stop(tmr); + spin_unlock_irqrestore(&tmr->lock, flags); + return err; +} + static int initialize_timer(struct snd_seq_timer *tmr) { struct snd_timer *t; @@ -358,13 +381,13 @@ static int initialize_timer(struct snd_seq_timer *tmr) return 0; } -int snd_seq_timer_start(struct snd_seq_timer * tmr) +static int seq_timer_start(struct snd_seq_timer *tmr) { if (! tmr->timeri) return -EINVAL; if (tmr->running) - snd_seq_timer_stop(tmr); - snd_seq_timer_reset(tmr); + seq_timer_stop(tmr); + seq_timer_reset(tmr); if (initialize_timer(tmr) < 0) return -EINVAL; snd_timer_start(tmr->timeri, tmr->ticks); @@ -373,14 +396,25 @@ int snd_seq_timer_start(struct snd_seq_timer * tmr) return 0; } -int snd_seq_timer_continue(struct snd_seq_timer * tmr) +int snd_seq_timer_start(struct snd_seq_timer *tmr) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&tmr->lock, flags); + err = seq_timer_start(tmr); + spin_unlock_irqrestore(&tmr->lock, flags); + return err; +} + +static int seq_timer_continue(struct snd_seq_timer *tmr) { if (! tmr->timeri) return -EINVAL; if (tmr->running) return -EBUSY; if (! tmr->initialized) { - snd_seq_timer_reset(tmr); + seq_timer_reset(tmr); if (initialize_timer(tmr) < 0) return -EINVAL; } @@ -390,11 +424,24 @@ int snd_seq_timer_continue(struct snd_seq_timer * tmr) return 0; } +int snd_seq_timer_continue(struct snd_seq_timer *tmr) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&tmr->lock, flags); + err = seq_timer_continue(tmr); + spin_unlock_irqrestore(&tmr->lock, flags); + return err; +} + /* return current 'real' time. use timeofday() to get better granularity. */ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) { snd_seq_real_time_t cur_time; + unsigned long flags; + spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; if (tmr->running) { struct timeval tm; @@ -410,7 +457,7 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) } snd_seq_sanity_real_time(&cur_time); } - + spin_unlock_irqrestore(&tmr->lock, flags); return cur_time; } From aef523ec3812f822fdd256f64c0d941a0588647f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Jan 2016 17:01:46 +0100 Subject: [PATCH 1835/2091] ALSA: timer: Code cleanup [ Upstream commit c3b1681375dc6e71d89a3ae00cc3ce9e775a8917 ] This is a minor code cleanup without any functional changes: - Kill keep_flag argument from _snd_timer_stop(), as all callers pass only it false. - Remove redundant NULL check in _snd_timer_stop(). Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index a419878901c46..5187780612f39 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -305,8 +305,7 @@ int snd_timer_open(struct snd_timer_instance **ti, return 0; } -static int _snd_timer_stop(struct snd_timer_instance *timeri, - int keep_flag, int event); +static int _snd_timer_stop(struct snd_timer_instance *timeri, int event); /* * close a timer instance @@ -348,7 +347,7 @@ int snd_timer_close(struct snd_timer_instance *timeri) spin_unlock_irq(&timer->lock); mutex_lock(®ister_mutex); list_del(&timeri->open_list); - if (timer && list_empty(&timer->open_list_head) && + if (list_empty(&timer->open_list_head) && timer->hw.close) timer->hw.close(timer); /* remove slave links */ @@ -493,8 +492,7 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks) return result; } -static int _snd_timer_stop(struct snd_timer_instance * timeri, - int keep_flag, int event) +static int _snd_timer_stop(struct snd_timer_instance *timeri, int event) { struct snd_timer *timer; unsigned long flags; @@ -503,13 +501,11 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, return -ENXIO; if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) { - if (!keep_flag) { - spin_lock_irqsave(&slave_active_lock, flags); - timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; - list_del_init(&timeri->ack_list); - list_del_init(&timeri->active_list); - spin_unlock_irqrestore(&slave_active_lock, flags); - } + spin_lock_irqsave(&slave_active_lock, flags); + timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; + list_del_init(&timeri->ack_list); + list_del_init(&timeri->active_list); + spin_unlock_irqrestore(&slave_active_lock, flags); goto __end; } timer = timeri->timer; @@ -534,9 +530,7 @@ static int _snd_timer_stop(struct snd_timer_instance * timeri, } } } - if (!keep_flag) - timeri->flags &= - ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); + timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); spin_unlock_irqrestore(&timer->lock, flags); __end: if (event != SNDRV_TIMER_EVENT_RESOLUTION) @@ -555,7 +549,7 @@ int snd_timer_stop(struct snd_timer_instance *timeri) unsigned long flags; int err; - err = _snd_timer_stop(timeri, 0, SNDRV_TIMER_EVENT_STOP); + err = _snd_timer_stop(timeri, SNDRV_TIMER_EVENT_STOP); if (err < 0) return err; timer = timeri->timer; @@ -601,7 +595,7 @@ int snd_timer_continue(struct snd_timer_instance *timeri) */ int snd_timer_pause(struct snd_timer_instance * timeri) { - return _snd_timer_stop(timeri, 0, SNDRV_TIMER_EVENT_PAUSE); + return _snd_timer_stop(timeri, SNDRV_TIMER_EVENT_PAUSE); } /* From b99c635faf56fa3cbd1cf61632418b06543dc145 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 30 Jan 2016 23:09:08 +0100 Subject: [PATCH 1836/2091] ALSA: timer: Fix link corruption due to double start or stop [ Upstream commit f784beb75ce82f4136f8a0960d3ee872f7109e09 ] Although ALSA timer code got hardening for races, it still causes use-after-free error. This is however rather a corrupted linked list, not actually the concurrent accesses. Namely, when timer start is triggered twice, list_add_tail() is called twice, too. This ends up with the link corruption and triggers KASAN error. The simplest fix would be replacing list_add_tail() with list_move_tail(), but fundamentally it's the problem that we don't check the double start/stop correctly. So, the right fix here is to add the proper checks to snd_timer_start() and snd_timer_stop() (and their variants). BugLink: http://lkml.kernel.org/r/CACT4Y+ZyPRoMQjmawbvmCEDrkBD2BQuH7R09=eOkf5ESK8kJAw@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 5187780612f39..357e86f7804a3 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -451,6 +451,10 @@ static int snd_timer_start_slave(struct snd_timer_instance *timeri) unsigned long flags; spin_lock_irqsave(&slave_active_lock, flags); + if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) { + spin_unlock_irqrestore(&slave_active_lock, flags); + return -EBUSY; + } timeri->flags |= SNDRV_TIMER_IFLG_RUNNING; if (timeri->master && timeri->timer) { spin_lock(&timeri->timer->lock); @@ -475,7 +479,8 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks) return -EINVAL; if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) { result = snd_timer_start_slave(timeri); - snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); + if (result >= 0) + snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); return result; } timer = timeri->timer; @@ -484,11 +489,18 @@ int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks) if (timer->card && timer->card->shutdown) return -ENODEV; spin_lock_irqsave(&timer->lock, flags); + if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | + SNDRV_TIMER_IFLG_START)) { + result = -EBUSY; + goto unlock; + } timeri->ticks = timeri->cticks = ticks; timeri->pticks = 0; result = snd_timer_start1(timer, timeri, ticks); + unlock: spin_unlock_irqrestore(&timer->lock, flags); - snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); + if (result >= 0) + snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_START); return result; } @@ -502,6 +514,10 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event) if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) { spin_lock_irqsave(&slave_active_lock, flags); + if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) { + spin_unlock_irqrestore(&slave_active_lock, flags); + return -EBUSY; + } timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); @@ -512,6 +528,11 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event) if (!timer) return -EINVAL; spin_lock_irqsave(&timer->lock, flags); + if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | + SNDRV_TIMER_IFLG_START))) { + spin_unlock_irqrestore(&timer->lock, flags); + return -EBUSY; + } list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); if (timer->card && timer->card->shutdown) { @@ -581,10 +602,15 @@ int snd_timer_continue(struct snd_timer_instance *timeri) if (timer->card && timer->card->shutdown) return -ENODEV; spin_lock_irqsave(&timer->lock, flags); + if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING) { + result = -EBUSY; + goto unlock; + } if (!timeri->cticks) timeri->cticks = 1; timeri->pticks = 0; result = snd_timer_start1(timer, timeri, timer->sticks); + unlock: spin_unlock_irqrestore(&timer->lock, flags); snd_timer_notify1(timeri, SNDRV_TIMER_EVENT_CONTINUE); return result; From 2322974c5d55e8be4be4b7cbf8cf996daf72b0bd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 30 Jan 2016 07:59:32 +0200 Subject: [PATCH 1837/2091] drm: add helper to check for wc memory support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4b0e4e4af6c6dc8354dcb72182d52c1bc55f12fc ] Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie Signed-off-by: Oded Gabbay Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- include/drm/drm_cache.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 7bfb063029d83..461a0558bca4d 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,4 +35,13 @@ void drm_clflush_pages(struct page *pages[], unsigned long num_pages); +static inline bool drm_arch_can_wc_memory(void) +{ +#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) + return false; +#else + return true; +#endif +} + #endif From 5e92a4058e7035fde9a623b5ca36d862e7677ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 5 Nov 2015 17:25:27 +0900 Subject: [PATCH 1838/2091] drm/radeon: Always disable RADEON_GEM_GTT_UC along with RADEON_GEM_GTT_WC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a28bbd5824d4a2af98de45b300ab8d8fb39739fc ] Write-combining is a CPU feature. From the GPU POV, these both simply mean no GPU<->CPU cache coherency. Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 676362769b8db..c12eea5e58750 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -225,7 +225,7 @@ int radeon_bo_create(struct radeon_device *rdev, /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 */ - bo->flags &= ~RADEON_GEM_GTT_WC; + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) /* Don't try to enable write-combining when it can't work, or things * may be slow @@ -237,7 +237,7 @@ int radeon_bo_create(struct radeon_device *rdev, DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); - bo->flags &= ~RADEON_GEM_GTT_WC; + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #endif radeon_ttm_placement_from_domain(bo, domain); From 2b374ca06b3bba78733da7968b82330fad7b60ce Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 30 Jan 2016 07:59:33 +0200 Subject: [PATCH 1839/2091] drm/radeon: mask out WC from BO on unsupported arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c5244987394648913ae1a03879c58058a2fc2cee ] Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Oded Gabbay Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_object.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index c12eea5e58750..741065bd14b34 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "radeon.h" #include "radeon_trace.h" @@ -238,6 +239,12 @@ int radeon_bo_create(struct radeon_device *rdev, DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + bo->flags &= ~RADEON_GEM_GTT_WC; #endif radeon_ttm_placement_from_domain(bo, domain); From d84572ebdb7320ab597f933d8f65df9a9c7f9537 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Feb 2016 12:32:51 +0100 Subject: [PATCH 1840/2091] ALSA: hda - Add fixup for Mac Mini 7,1 model [ Upstream commit 2154cc0e2d4ae15132d005d17e473327c70c9a06 ] Mac Mini 7,1 model with CS4208 codec reports the headphone jack detection wrongly in an inverted way. Moreover, the advertised pins for the audio input and SPDIF output have actually no jack detection. This patch addresses these issues. The inv_jack_detect flag is set for fixing the headphone jack detection, and the pin configs for audio input and SPDIF output are marked as non-detectable. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=105161 Report-and-tested-by: moosotc@gmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_cirrus.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index b791529bf31cd..8f50a257a80d6 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -614,6 +614,7 @@ enum { CS4208_MAC_AUTO, CS4208_MBA6, CS4208_MBP11, + CS4208_MACMINI, CS4208_GPIO0, }; @@ -621,6 +622,7 @@ static const struct hda_model_fixup cs4208_models[] = { { .id = CS4208_GPIO0, .name = "gpio0" }, { .id = CS4208_MBA6, .name = "mba6" }, { .id = CS4208_MBP11, .name = "mbp11" }, + { .id = CS4208_MACMINI, .name = "macmini" }, {} }; @@ -632,6 +634,7 @@ static const struct snd_pci_quirk cs4208_fixup_tbl[] = { /* codec SSID matching */ static const struct snd_pci_quirk cs4208_mac_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x5e00, "MacBookPro 11,2", CS4208_MBP11), + SND_PCI_QUIRK(0x106b, 0x6c00, "MacMini 7,1", CS4208_MACMINI), SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6), SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6), SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11), @@ -666,6 +669,24 @@ static void cs4208_fixup_mac(struct hda_codec *codec, snd_hda_apply_fixup(codec, action); } +/* MacMini 7,1 has the inverted jack detection */ +static void cs4208_fixup_macmini(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const struct hda_pintbl pincfgs[] = { + { 0x18, 0x00ab9150 }, /* mic (audio-in) jack: disable detect */ + { 0x21, 0x004be140 }, /* SPDIF: disable detect */ + { } + }; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + /* HP pin (0x10) has an inverted detection */ + codec->inv_jack_detect = 1; + /* disable the bogus Mic and SPDIF jack detections */ + snd_hda_apply_pincfgs(codec, pincfgs); + } +} + static int cs4208_spdif_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -709,6 +730,12 @@ static const struct hda_fixup cs4208_fixups[] = { .chained = true, .chain_id = CS4208_GPIO0, }, + [CS4208_MACMINI] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs4208_fixup_macmini, + .chained = true, + .chain_id = CS4208_GPIO0, + }, [CS4208_GPIO0] = { .type = HDA_FIXUP_FUNC, .v.func = cs4208_fixup_gpio0, From 8bd8e5389ea62cdefa03d383daccb6474fef93a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 31 Jan 2016 11:57:41 +0100 Subject: [PATCH 1841/2091] ALSA: rawmidi: Make snd_rawmidi_transmit() race-free [ Upstream commit 06ab30034ed9c200a570ab13c017bde248ddb2a6 ] A kernel WARNING in snd_rawmidi_transmit_ack() is triggered by syzkaller fuzzer: WARNING: CPU: 1 PID: 20739 at sound/core/rawmidi.c:1136 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:515 [] snd_rawmidi_transmit_ack+0x275/0x400 sound/core/rawmidi.c:1136 [] snd_virmidi_output_trigger+0x4b1/0x5a0 sound/core/seq/seq_virmidi.c:163 [< inline >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150 [] snd_rawmidi_kernel_write1+0x549/0x780 sound/core/rawmidi.c:1223 [] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1273 [] __vfs_write+0x113/0x480 fs/read_write.c:528 [] vfs_write+0x167/0x4a0 fs/read_write.c:577 [< inline >] SYSC_write fs/read_write.c:624 [] SyS_write+0x111/0x220 fs/read_write.c:616 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Also a similar warning is found but in another path: Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:515 [] rawmidi_transmit_ack+0x24a/0x3b0 sound/core/rawmidi.c:1133 [] snd_rawmidi_transmit_ack+0x51/0x80 sound/core/rawmidi.c:1163 [] snd_virmidi_output_trigger+0x2b6/0x570 sound/core/seq/seq_virmidi.c:185 [< inline >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150 [] snd_rawmidi_kernel_write1+0x4bb/0x760 sound/core/rawmidi.c:1252 [] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1302 [] __vfs_write+0x113/0x480 fs/read_write.c:528 [] vfs_write+0x167/0x4a0 fs/read_write.c:577 [< inline >] SYSC_write fs/read_write.c:624 [] SyS_write+0x111/0x220 fs/read_write.c:616 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 In the former case, the reason is that virmidi has an open code calling snd_rawmidi_transmit_ack() with the value calculated outside the spinlock. We may use snd_rawmidi_transmit() in a loop just for consuming the input data, but even there, there is a race between snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack(). Similarly in the latter case, it calls snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack() separately without protection, so they are racy as well. The patch tries to address these issues by the following ways: - Introduce the unlocked versions of snd_rawmidi_transmit_peek() and snd_rawmidi_transmit_ack() to be called inside the explicit lock. - Rewrite snd_rawmidi_transmit() to be race-free (the former case). - Make the split calls (the latter case) protected in the rawmidi spin lock. BugLink: http://lkml.kernel.org/r/CACT4Y+YPq1+cYLkadwjWa5XjzF1_Vki1eHnVn-Lm0hzhSpu5PA@mail.gmail.com BugLink: http://lkml.kernel.org/r/CACT4Y+acG4iyphdOZx47Nyq_VHGbpJQK-6xNpiqUjaZYqsXOGw@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/sound/rawmidi.h | 4 ++ sound/core/rawmidi.c | 98 ++++++++++++++++++++++++++---------- sound/core/seq/seq_virmidi.c | 17 +++++-- 3 files changed, 88 insertions(+), 31 deletions(-) diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index f6cbef78db620..3b91ad5d51158 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count); +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count); +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, + int count); /* main midi functions */ diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index f75d1656272c8..26ca022488857 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1055,23 +1055,16 @@ int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream) EXPORT_SYMBOL(snd_rawmidi_transmit_empty); /** - * snd_rawmidi_transmit_peek - copy data from the internal buffer + * __snd_rawmidi_transmit_peek - copy data from the internal buffer * @substream: the rawmidi substream * @buffer: the buffer pointer * @count: data size to transfer * - * Copies data from the internal output buffer to the given buffer. - * - * Call this in the interrupt handler when the midi output is ready, - * and call snd_rawmidi_transmit_ack() after the transmission is - * finished. - * - * Return: The size of copied data, or a negative error code on failure. + * This is a variant of snd_rawmidi_transmit_peek() without spinlock. */ -int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { - unsigned long flags; int result, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; @@ -1081,7 +1074,6 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, return -EINVAL; } result = 0; - spin_lock_irqsave(&runtime->lock, flags); if (runtime->avail >= runtime->buffer_size) { /* warning: lowlevel layer MUST trigger down the hardware */ goto __skip; @@ -1106,25 +1098,47 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, } } __skip: + return result; +} +EXPORT_SYMBOL(__snd_rawmidi_transmit_peek); + +/** + * snd_rawmidi_transmit_peek - copy data from the internal buffer + * @substream: the rawmidi substream + * @buffer: the buffer pointer + * @count: data size to transfer + * + * Copies data from the internal output buffer to the given buffer. + * + * Call this in the interrupt handler when the midi output is ready, + * and call snd_rawmidi_transmit_ack() after the transmission is + * finished. + * + * Return: The size of copied data, or a negative error code on failure. + */ +int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count) +{ + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); + result = __snd_rawmidi_transmit_peek(substream, buffer, count); spin_unlock_irqrestore(&runtime->lock, flags); return result; } EXPORT_SYMBOL(snd_rawmidi_transmit_peek); /** - * snd_rawmidi_transmit_ack - acknowledge the transmission + * __snd_rawmidi_transmit_ack - acknowledge the transmission * @substream: the rawmidi substream * @count: the transferred count * - * Advances the hardware pointer for the internal output buffer with - * the given size and updates the condition. - * Call after the transmission is finished. - * - * Return: The advanced size if successful, or a negative error code on failure. + * This is a variant of __snd_rawmidi_transmit_ack() without spinlock. */ -int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) { - unsigned long flags; struct snd_rawmidi_runtime *runtime = substream->runtime; if (runtime->buffer == NULL) { @@ -1132,7 +1146,6 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) "snd_rawmidi_transmit_ack: output is not active!!!\n"); return -EINVAL; } - spin_lock_irqsave(&runtime->lock, flags); snd_BUG_ON(runtime->avail + count > runtime->buffer_size); runtime->hw_ptr += count; runtime->hw_ptr %= runtime->buffer_size; @@ -1142,9 +1155,32 @@ int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) if (runtime->drain || snd_rawmidi_ready(substream)) wake_up(&runtime->sleep); } - spin_unlock_irqrestore(&runtime->lock, flags); return count; } +EXPORT_SYMBOL(__snd_rawmidi_transmit_ack); + +/** + * snd_rawmidi_transmit_ack - acknowledge the transmission + * @substream: the rawmidi substream + * @count: the transferred count + * + * Advances the hardware pointer for the internal output buffer with + * the given size and updates the condition. + * Call after the transmission is finished. + * + * Return: The advanced size if successful, or a negative error code on failure. + */ +int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count) +{ + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); + result = __snd_rawmidi_transmit_ack(substream, count); + spin_unlock_irqrestore(&runtime->lock, flags); + return result; +} EXPORT_SYMBOL(snd_rawmidi_transmit_ack); /** @@ -1160,12 +1196,22 @@ EXPORT_SYMBOL(snd_rawmidi_transmit_ack); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count) { + struct snd_rawmidi_runtime *runtime = substream->runtime; + int result; + unsigned long flags; + + spin_lock_irqsave(&runtime->lock, flags); if (!substream->opened) - return -EBADFD; - count = snd_rawmidi_transmit_peek(substream, buffer, count); - if (count < 0) - return count; - return snd_rawmidi_transmit_ack(substream, count); + result = -EBADFD; + else { + count = __snd_rawmidi_transmit_peek(substream, buffer, count); + if (count <= 0) + result = count; + else + result = __snd_rawmidi_transmit_ack(substream, count); + } + spin_unlock_irqrestore(&runtime->lock, flags); + return result; } EXPORT_SYMBOL(snd_rawmidi_transmit); diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index f2975927551e9..81134e067184f 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -155,21 +155,26 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, struct snd_virmidi *vmidi = substream->runtime->private_data; int count, res; unsigned char buf[32], *pbuf; + unsigned long flags; if (up) { vmidi->trigger = 1; if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH && !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) { - snd_rawmidi_transmit_ack(substream, substream->runtime->buffer_size - substream->runtime->avail); - return; /* ignored */ + while (snd_rawmidi_transmit(substream, buf, + sizeof(buf)) > 0) { + /* ignored */ + } + return; } if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0) return; vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } + spin_lock_irqsave(&substream->runtime->lock, flags); while (1) { - count = snd_rawmidi_transmit_peek(substream, buf, sizeof(buf)); + count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf)); if (count <= 0) break; pbuf = buf; @@ -179,16 +184,18 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, snd_midi_event_reset_encode(vmidi->parser); continue; } - snd_rawmidi_transmit_ack(substream, res); + __snd_rawmidi_transmit_ack(substream, res); pbuf += res; count -= res; if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) { if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0) - return; + goto out; vmidi->event.type = SNDRV_SEQ_EVENT_NONE; } } } + out: + spin_unlock_irqrestore(&substream->runtime->lock, flags); } else { vmidi->trigger = 0; } From a2c892c31423bdef823c6f6b2d54c159be8f4e20 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Feb 2016 14:41:22 +0100 Subject: [PATCH 1842/2091] ALSA: rawmidi: Fix race at copying & updating the position [ Upstream commit 81f577542af15640cbcb6ef68baa4caa610cbbfc ] The rawmidi read and write functions manage runtime stream status such as runtime->appl_ptr and runtime->avail. These point where to copy the new data and how many bytes have been copied (or to be read). The problem is that rawmidi read/write call copy_from_user() or copy_to_user(), and the runtime spinlock is temporarily unlocked and relocked while copying user-space. Since the current code advances and updates the runtime status after the spin unlock/relock, the copy and the update may be asynchronous, and eventually runtime->avail might go to a negative value when many concurrent accesses are done. This may lead to memory corruption in the end. For fixing this race, in this patch, the status update code is performed in the same lock before the temporary unlock. Also, the spinlock is now taken more widely in snd_rawmidi_kernel_read1() for protecting more properly during the whole operation. BugLink: http://lkml.kernel.org/r/CACT4Y+b-dCmNf1GpgPKfDO0ih+uZCL2JV4__j-r1kdhPLSgQCQ@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/rawmidi.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 26ca022488857..795437b100820 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -942,31 +942,36 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, unsigned long flags; long result = 0, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; + unsigned long appl_ptr; + spin_lock_irqsave(&runtime->lock, flags); while (count > 0 && runtime->avail) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) count1 = count; - spin_lock_irqsave(&runtime->lock, flags); if (count1 > (int)runtime->avail) count1 = runtime->avail; + + /* update runtime->appl_ptr before unlocking for userbuf */ + appl_ptr = runtime->appl_ptr; + runtime->appl_ptr += count1; + runtime->appl_ptr %= runtime->buffer_size; + runtime->avail -= count1; + if (kernelbuf) - memcpy(kernelbuf + result, runtime->buffer + runtime->appl_ptr, count1); + memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1); if (userbuf) { spin_unlock_irqrestore(&runtime->lock, flags); if (copy_to_user(userbuf + result, - runtime->buffer + runtime->appl_ptr, count1)) { + runtime->buffer + appl_ptr, count1)) { return result > 0 ? result : -EFAULT; } spin_lock_irqsave(&runtime->lock, flags); } - runtime->appl_ptr += count1; - runtime->appl_ptr %= runtime->buffer_size; - runtime->avail -= count1; - spin_unlock_irqrestore(&runtime->lock, flags); result += count1; count -= count1; } + spin_unlock_irqrestore(&runtime->lock, flags); return result; } @@ -1223,6 +1228,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, unsigned long flags; long count1, result; struct snd_rawmidi_runtime *runtime = substream->runtime; + unsigned long appl_ptr; if (!kernelbuf && !userbuf) return -EINVAL; @@ -1243,12 +1249,19 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, count1 = count; if (count1 > (long)runtime->avail) count1 = runtime->avail; + + /* update runtime->appl_ptr before unlocking for userbuf */ + appl_ptr = runtime->appl_ptr; + runtime->appl_ptr += count1; + runtime->appl_ptr %= runtime->buffer_size; + runtime->avail -= count1; + if (kernelbuf) - memcpy(runtime->buffer + runtime->appl_ptr, + memcpy(runtime->buffer + appl_ptr, kernelbuf + result, count1); else if (userbuf) { spin_unlock_irqrestore(&runtime->lock, flags); - if (copy_from_user(runtime->buffer + runtime->appl_ptr, + if (copy_from_user(runtime->buffer + appl_ptr, userbuf + result, count1)) { spin_lock_irqsave(&runtime->lock, flags); result = result > 0 ? result : -EFAULT; @@ -1256,9 +1269,6 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, } spin_lock_irqsave(&runtime->lock, flags); } - runtime->appl_ptr += count1; - runtime->appl_ptr %= runtime->buffer_size; - runtime->avail -= count1; result += count1; count -= count1; } From 44660dacb478f79d7fbc7cec0ef342414b563b6a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Feb 2016 08:32:44 +0100 Subject: [PATCH 1843/2091] ALSA: seq: Fix lockdep warnings due to double mutex locks [ Upstream commit 7f0973e973cd74aa40747c9d38844560cd184ee8 ] The port subscription code uses double mutex locks for source and destination ports, and this may become racy once when wrongly set up. It leads to lockdep warning splat, typically triggered by fuzzer like syzkaller, although the actual deadlock hasn't been seen, so far. This patch simplifies the handling by reducing to two single locks, so that no lockdep warning will be trigger any longer. By splitting to two actions, a still-in-progress element shall be added in one list while handling another. For ignoring this element, a new check is added in deliver_to_subscribers(). Along with it, the code to add/remove the subscribers list element was cleaned up and refactored. BugLink: http://lkml.kernel.org/r/CACT4Y+aKQXV7xkBW9hpQbzaDO7LrUvohxWh-UwMxXjDy-yBD=A@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_clientmgr.c | 3 + sound/core/seq/seq_ports.c | 233 ++++++++++++++++++--------------- 2 files changed, 133 insertions(+), 103 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index bd4741442909a..ce6703ecfcefc 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -678,6 +678,9 @@ static int deliver_to_subscribers(struct snd_seq_client *client, else down_read(&grp->list_mutex); list_for_each_entry(subs, &grp->list_head, src_list) { + /* both ports ready? */ + if (atomic_read(&subs->ref_count) != 2) + continue; event->dest = subs->info.dest; if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP) /* convert time according to flag with subscription */ diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index 55170a20ae723..921fb2bd8fadb 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -173,10 +173,6 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, } /* */ -enum group_type { - SRC_LIST, DEST_LIST -}; - static int subscribe_port(struct snd_seq_client *client, struct snd_seq_client_port *port, struct snd_seq_port_subs_info *grp, @@ -203,6 +199,20 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr, return NULL; } +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack); + +static inline struct snd_seq_subscribers * +get_subscriber(struct list_head *p, bool is_src) +{ + if (is_src) + return list_entry(p, struct snd_seq_subscribers, src_list); + else + return list_entry(p, struct snd_seq_subscribers, dest_list); +} + /* * remove all subscribers on the list * this is called from port_delete, for each src and dest list. @@ -210,7 +220,7 @@ static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr, static void clear_subscriber_list(struct snd_seq_client *client, struct snd_seq_client_port *port, struct snd_seq_port_subs_info *grp, - int grptype) + int is_src) { struct list_head *p, *n; @@ -219,15 +229,13 @@ static void clear_subscriber_list(struct snd_seq_client *client, struct snd_seq_client *c; struct snd_seq_client_port *aport; - if (grptype == SRC_LIST) { - subs = list_entry(p, struct snd_seq_subscribers, src_list); + subs = get_subscriber(p, is_src); + if (is_src) aport = get_client_port(&subs->info.dest, &c); - } else { - subs = list_entry(p, struct snd_seq_subscribers, dest_list); + else aport = get_client_port(&subs->info.sender, &c); - } - list_del(p); - unsubscribe_port(client, port, grp, &subs->info, 0); + delete_and_unsubscribe_port(client, port, subs, is_src, false); + if (!aport) { /* looks like the connected port is being deleted. * we decrease the counter, and when both ports are deleted @@ -235,21 +243,14 @@ static void clear_subscriber_list(struct snd_seq_client *client, */ if (atomic_dec_and_test(&subs->ref_count)) kfree(subs); - } else { - /* ok we got the connected port */ - struct snd_seq_port_subs_info *agrp; - agrp = (grptype == SRC_LIST) ? &aport->c_dest : &aport->c_src; - down_write(&agrp->list_mutex); - if (grptype == SRC_LIST) - list_del(&subs->dest_list); - else - list_del(&subs->src_list); - up_write(&agrp->list_mutex); - unsubscribe_port(c, aport, agrp, &subs->info, 1); - kfree(subs); - snd_seq_port_unlock(aport); - snd_seq_client_unlock(c); + continue; } + + /* ok we got the connected port */ + delete_and_unsubscribe_port(c, aport, subs, !is_src, true); + kfree(subs); + snd_seq_port_unlock(aport); + snd_seq_client_unlock(c); } } @@ -262,8 +263,8 @@ static int port_delete(struct snd_seq_client *client, snd_use_lock_sync(&port->use_lock); /* clear subscribers info */ - clear_subscriber_list(client, port, &port->c_src, SRC_LIST); - clear_subscriber_list(client, port, &port->c_dest, DEST_LIST); + clear_subscriber_list(client, port, &port->c_src, true); + clear_subscriber_list(client, port, &port->c_dest, false); if (port->private_free) port->private_free(port->private_data); @@ -479,85 +480,120 @@ static int match_subs_info(struct snd_seq_port_subscribe *r, return 0; } - -/* connect two ports */ -int snd_seq_port_connect(struct snd_seq_client *connector, - struct snd_seq_client *src_client, - struct snd_seq_client_port *src_port, - struct snd_seq_client *dest_client, - struct snd_seq_client_port *dest_port, - struct snd_seq_port_subscribe *info) +static int check_and_subscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool exclusive, bool ack) { - struct snd_seq_port_subs_info *src = &src_port->c_src; - struct snd_seq_port_subs_info *dest = &dest_port->c_dest; - struct snd_seq_subscribers *subs, *s; - int err, src_called = 0; - unsigned long flags; - int exclusive; + struct snd_seq_port_subs_info *grp; + struct list_head *p; + struct snd_seq_subscribers *s; + int err; - subs = kzalloc(sizeof(*subs), GFP_KERNEL); - if (! subs) - return -ENOMEM; - - subs->info = *info; - atomic_set(&subs->ref_count, 2); - - down_write(&src->list_mutex); - down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING); - - exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0; + grp = is_src ? &port->c_src : &port->c_dest; err = -EBUSY; + down_write(&grp->list_mutex); if (exclusive) { - if (! list_empty(&src->list_head) || ! list_empty(&dest->list_head)) + if (!list_empty(&grp->list_head)) goto __error; } else { - if (src->exclusive || dest->exclusive) + if (grp->exclusive) goto __error; /* check whether already exists */ - list_for_each_entry(s, &src->list_head, src_list) { - if (match_subs_info(info, &s->info)) - goto __error; - } - list_for_each_entry(s, &dest->list_head, dest_list) { - if (match_subs_info(info, &s->info)) + list_for_each(p, &grp->list_head) { + s = get_subscriber(p, is_src); + if (match_subs_info(&subs->info, &s->info)) goto __error; } } - if ((err = subscribe_port(src_client, src_port, src, info, - connector->number != src_client->number)) < 0) - goto __error; - src_called = 1; - - if ((err = subscribe_port(dest_client, dest_port, dest, info, - connector->number != dest_client->number)) < 0) + err = subscribe_port(client, port, grp, &subs->info, ack); + if (err < 0) { + grp->exclusive = 0; goto __error; + } /* add to list */ - write_lock_irqsave(&src->list_lock, flags); - // write_lock(&dest->list_lock); // no other lock yet - list_add_tail(&subs->src_list, &src->list_head); - list_add_tail(&subs->dest_list, &dest->list_head); - // write_unlock(&dest->list_lock); // no other lock yet - write_unlock_irqrestore(&src->list_lock, flags); + write_lock_irq(&grp->list_lock); + if (is_src) + list_add_tail(&subs->src_list, &grp->list_head); + else + list_add_tail(&subs->dest_list, &grp->list_head); + grp->exclusive = exclusive; + atomic_inc(&subs->ref_count); + write_unlock_irq(&grp->list_lock); + err = 0; + + __error: + up_write(&grp->list_mutex); + return err; +} - src->exclusive = dest->exclusive = exclusive; +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) +{ + struct snd_seq_port_subs_info *grp; + + grp = is_src ? &port->c_src : &port->c_dest; + down_write(&grp->list_mutex); + write_lock_irq(&grp->list_lock); + if (is_src) + list_del(&subs->src_list); + else + list_del(&subs->dest_list); + grp->exclusive = 0; + write_unlock_irq(&grp->list_lock); + up_write(&grp->list_mutex); + + unsubscribe_port(client, port, grp, &subs->info, ack); +} + +/* connect two ports */ +int snd_seq_port_connect(struct snd_seq_client *connector, + struct snd_seq_client *src_client, + struct snd_seq_client_port *src_port, + struct snd_seq_client *dest_client, + struct snd_seq_client_port *dest_port, + struct snd_seq_port_subscribe *info) +{ + struct snd_seq_subscribers *subs; + bool exclusive; + int err; + + subs = kzalloc(sizeof(*subs), GFP_KERNEL); + if (!subs) + return -ENOMEM; + + subs->info = *info; + atomic_set(&subs->ref_count, 0); + INIT_LIST_HEAD(&subs->src_list); + INIT_LIST_HEAD(&subs->dest_list); + + exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE); + + err = check_and_subscribe_port(src_client, src_port, subs, true, + exclusive, + connector->number != src_client->number); + if (err < 0) + goto error; + err = check_and_subscribe_port(dest_client, dest_port, subs, false, + exclusive, + connector->number != dest_client->number); + if (err < 0) + goto error_dest; - up_write(&dest->list_mutex); - up_write(&src->list_mutex); return 0; - __error: - if (src_called) - unsubscribe_port(src_client, src_port, src, info, - connector->number != src_client->number); + error_dest: + delete_and_unsubscribe_port(src_client, src_port, subs, true, + connector->number != src_client->number); + error: kfree(subs); - up_write(&dest->list_mutex); - up_write(&src->list_mutex); return err; } - /* remove the connection */ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_client *src_client, @@ -567,37 +603,28 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_port_subscribe *info) { struct snd_seq_port_subs_info *src = &src_port->c_src; - struct snd_seq_port_subs_info *dest = &dest_port->c_dest; struct snd_seq_subscribers *subs; int err = -ENOENT; - unsigned long flags; down_write(&src->list_mutex); - down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING); - /* look for the connection */ list_for_each_entry(subs, &src->list_head, src_list) { if (match_subs_info(info, &subs->info)) { - write_lock_irqsave(&src->list_lock, flags); - // write_lock(&dest->list_lock); // no lock yet - list_del(&subs->src_list); - list_del(&subs->dest_list); - // write_unlock(&dest->list_lock); - write_unlock_irqrestore(&src->list_lock, flags); - src->exclusive = dest->exclusive = 0; - unsubscribe_port(src_client, src_port, src, info, - connector->number != src_client->number); - unsubscribe_port(dest_client, dest_port, dest, info, - connector->number != dest_client->number); - kfree(subs); + atomic_dec(&subs->ref_count); /* mark as not ready */ err = 0; break; } } - - up_write(&dest->list_mutex); up_write(&src->list_mutex); - return err; + if (err < 0) + return err; + + delete_and_unsubscribe_port(src_client, src_port, subs, true, + connector->number != src_client->number); + delete_and_unsubscribe_port(dest_client, dest_port, subs, false, + connector->number != dest_client->number); + kfree(subs); + return 0; } From 46aef54fb8feb8158bfe0237ecec754ce5003f41 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 2 Feb 2016 16:57:35 -0800 Subject: [PATCH 1844/2091] drivers/scsi/sg.c: mark VMA as VM_IO to prevent migration [ Upstream commit 461c7fa126794157484dca48e88effa4963e3af3 ] Reduced testcase: #include #include #include #include #define SIZE 0x2000 int main() { int fd; void *p; fd = open("/dev/sg0", O_RDWR); p = mmap(NULL, SIZE, PROT_EXEC, MAP_PRIVATE | MAP_LOCKED, fd, 0); mbind(p, SIZE, 0, NULL, 0, MPOL_MF_MOVE); return 0; } We shouldn't try to migrate pages in sg VMA as we don't have a way to update Sg_scatter_hold::pages accordingly from mm core. Let's mark the VMA as VM_IO to indicate to mm core that the VMA is not migratable. Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Acked-by: Vlastimil Babka Cc: Doug Gilbert Cc: David Rientjes Cc: Naoya Horiguchi Cc: "Kirill A. Shutemov" Cc: Shiraz Hashim Cc: Hugh Dickins Cc: Sasha Levin Cc: syzkaller Cc: Kostya Serebryany Cc: Alexander Potapenko Cc: James Bottomley Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9d7b7db75e4b9..3bbf4853733cd 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1255,7 +1255,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) } sfp->mmap_called = 1; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; return 0; From cd3c40afce6cd0209432e6690e60c2156a27c68d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 2 Feb 2016 16:57:52 -0800 Subject: [PATCH 1845/2091] radix-tree: fix race in gang lookup [ Upstream commit 46437f9a554fbe3e110580ca08ab703b59f2f95a ] If the indirect_ptr bit is set on a slot, that indicates we need to redo the lookup. Introduce a new function radix_tree_iter_retry() which forces the loop to retry the lookup by setting 'slot' to NULL and turning the iterator back to point at the problematic entry. This is a pretty rare problem to hit at the moment; the lookup has to race with a grow of the radix tree from a height of 0. The consequences of hitting this race are that gang lookup could return a pointer to a radix_tree_node instead of a pointer to whatever the user had inserted in the tree. Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator") Signed-off-by: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Konstantin Khlebnikov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/radix-tree.h | 16 ++++++++++++++++ lib/radix-tree.c | 12 ++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 33170dbd9db40..1a2b2276ffb3a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -369,6 +369,22 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags); +/** + * radix_tree_iter_retry - retry this chunk of the iteration + * @iter: iterator state + * + * If we iterate over a tree protected only by the RCU lock, a race + * against deletion or creation may result in seeing a slot for which + * radix_tree_deref_retry() returns true. If so, call this function + * and continue the iteration. + */ +static inline __must_check +void **radix_tree_iter_retry(struct radix_tree_iter *iter) +{ + iter->next_index = iter->index; + return NULL; +} + /** * radix_tree_chunk_size - get current chunk size * diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3d2aa27b845b5..8399002aa0f02 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1014,9 +1014,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_slot(slot, root, &iter, first_index) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } @@ -1093,9 +1097,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } From bba8f524da11a915f37835925cc602eb64383b56 Mon Sep 17 00:00:00 2001 From: Rajmohan Mani Date: Tue, 21 Jul 2015 17:20:26 +0300 Subject: [PATCH 1846/2091] xhci: Workaround to get D3 working in Intel xHCI [ Upstream commit abce329c27b315cfc01be1a305ee976ee13ed4cf ] The xHCI in Intel CherryView / Braswell Platform requires a driver workaround to get xHCI D3 working. Without this workaround, xHCI might not enter D3. Workaround is to configure SSIC PORT as "unused" before D3 entry and "used" after D3 exit. This is done through a vendor specific register (PORT2_SSIC_CONFIG_REG2 at offset 0x883c), in xhci suspend / resume callbacks. Verified xHCI D3 works fine in CherryView / Braswell platform. Signed-off-by: Rajmohan Mani Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 40 ++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7e5c90eebb9c0..033e23a3ab6f4 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -27,6 +27,10 @@ #include "xhci.h" #include "xhci-trace.h" +#define PORT2_SSIC_CONFIG_REG2 0x883c +#define PROG_DONE (1 << 30) +#define SSIC_PORT_UNUSED (1 << 31) + /* Device for a quirk */ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK 0x1000 @@ -170,14 +174,44 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } /* + * In some Intel xHCI controllers, in order to get D3 working, + * through a vendor specific SSIC CONFIG register at offset 0x883c, + * SSIC PORT need to be marked as "unused" before putting xHCI + * into D3. After D3 exit, the SSIC port need to be marked as "used". + * Without this change, xHCI might not enter D3 state. * Make sure PME works on some Intel xHCI controllers by writing 1 to clear * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 */ -static void xhci_pme_quirk(struct xhci_hcd *xhci) +static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend) { + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); u32 val; void __iomem *reg; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { + + reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2; + + /* Notify SSIC that SSIC profile programming is not done */ + val = readl(reg) & ~PROG_DONE; + writel(val, reg); + + /* Mark SSIC port as unused(suspend) or used(resume) */ + val = readl(reg); + if (suspend) + val |= SSIC_PORT_UNUSED; + else + val &= ~SSIC_PORT_UNUSED; + writel(val, reg); + + /* Notify SSIC that SSIC profile programming is done */ + val = readl(reg) | PROG_DONE; + writel(val, reg); + readl(reg); + } + reg = (void __iomem *) xhci->cap_regs + 0x80a4; val = readl(reg); writel(val | BIT(28), reg); @@ -309,7 +343,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) pdev->no_d3cold = true; if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_quirk(xhci); + xhci_pme_quirk(hcd, true); return xhci_suspend(xhci, do_wakeup); } @@ -342,7 +376,7 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) usb_enable_intel_xhci_ports(pdev); if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_quirk(xhci); + xhci_pme_quirk(hcd, false); retval = xhci_resume(xhci, hibernated); return retval; From 5d3a10799e1199acef123e57a014b491a12044d4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 21 Jul 2015 17:20:25 +0300 Subject: [PATCH 1847/2091] xhci: call BIOS workaround to enable runtime suspend on Intel Braswell [ Upstream commit c3c5819a350952439c3198aa46581f9e4c46557f ] Intel xhci hw that require XHCI_PME_STUCK quirk have as default disabled xhci from going to D3 state in runtime suspend. Driver needs to verify it can deal with the hw by calling an ACPI _DSM method to get D3 enabled. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 033e23a3ab6f4..0228e97cd09e3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "xhci.h" #include "xhci-trace.h" @@ -218,6 +219,19 @@ static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend) readl(reg); } +#ifdef CONFIG_ACPI +static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) +{ + static const u8 intel_dsm_uuid[] = { + 0xb7, 0x0c, 0x34, 0xac, 0x01, 0xe9, 0xbf, 0x45, + 0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23, + }; + acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1, NULL); +} +#else + static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { } +#endif /* CONFIG_ACPI */ + /* called during probe() after chip reset completes */ static int xhci_pci_setup(struct usb_hcd *hcd) { @@ -297,6 +311,9 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(dev); + /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); From fa91182ea7c3ea6476cf24ced5cb9878216d5f9e Mon Sep 17 00:00:00 2001 From: Tomer Barletz Date: Mon, 21 Sep 2015 17:46:11 +0300 Subject: [PATCH 1848/2091] xhci: Move xhci_pme_quirk() behind #ifdef CONFIG_PM [ Upstream commit 2b7627b73e81e5d23d5ae1490fe8e690af86e053 ] xhci_pme_quirk() is only used when CONFIG_PM is defined. Compiling a kernel without PM complains about this function [reworded commit message -Mathias] Cc: Signed-off-by: Tomer Barletz Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 90 ++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0228e97cd09e3..dd41053afd6fe 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -174,51 +174,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) "QUIRK: Resetting on resume"); } -/* - * In some Intel xHCI controllers, in order to get D3 working, - * through a vendor specific SSIC CONFIG register at offset 0x883c, - * SSIC PORT need to be marked as "unused" before putting xHCI - * into D3. After D3 exit, the SSIC port need to be marked as "used". - * Without this change, xHCI might not enter D3 state. - * Make sure PME works on some Intel xHCI controllers by writing 1 to clear - * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 - */ -static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend) -{ - struct xhci_hcd *xhci = hcd_to_xhci(hcd); - struct pci_dev *pdev = to_pci_dev(hcd->self.controller); - u32 val; - void __iomem *reg; - - if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { - - reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2; - - /* Notify SSIC that SSIC profile programming is not done */ - val = readl(reg) & ~PROG_DONE; - writel(val, reg); - - /* Mark SSIC port as unused(suspend) or used(resume) */ - val = readl(reg); - if (suspend) - val |= SSIC_PORT_UNUSED; - else - val &= ~SSIC_PORT_UNUSED; - writel(val, reg); - - /* Notify SSIC that SSIC profile programming is done */ - val = readl(reg) | PROG_DONE; - writel(val, reg); - readl(reg); - } - - reg = (void __iomem *) xhci->cap_regs + 0x80a4; - val = readl(reg); - writel(val | BIT(28), reg); - readl(reg); -} - #ifdef CONFIG_ACPI static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { @@ -347,6 +302,51 @@ static void xhci_pci_remove(struct pci_dev *dev) } #ifdef CONFIG_PM +/* + * In some Intel xHCI controllers, in order to get D3 working, + * through a vendor specific SSIC CONFIG register at offset 0x883c, + * SSIC PORT need to be marked as "unused" before putting xHCI + * into D3. After D3 exit, the SSIC port need to be marked as "used". + * Without this change, xHCI might not enter D3 state. + * Make sure PME works on some Intel xHCI controllers by writing 1 to clear + * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 + */ +static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + u32 val; + void __iomem *reg; + + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { + + reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2; + + /* Notify SSIC that SSIC profile programming is not done */ + val = readl(reg) & ~PROG_DONE; + writel(val, reg); + + /* Mark SSIC port as unused(suspend) or used(resume) */ + val = readl(reg); + if (suspend) + val |= SSIC_PORT_UNUSED; + else + val &= ~SSIC_PORT_UNUSED; + writel(val, reg); + + /* Notify SSIC that SSIC profile programming is done */ + val = readl(reg) | PROG_DONE; + writel(val, reg); + readl(reg); + } + + reg = (void __iomem *) xhci->cap_regs + 0x80a4; + val = readl(reg); + writel(val | BIT(28), reg); + readl(reg); +} + static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); From 23159bb315f95c245ccfa9a248f8ecb786dbdc1e Mon Sep 17 00:00:00 2001 From: "Lu, Baolu" Date: Fri, 9 Oct 2015 13:30:10 +0300 Subject: [PATCH 1849/2091] usb: xhci: Makefile: move xhci-pci and xhci-plat-hcd after xhci-hcd [ Upstream commit 8451a34ff6c7c756e9e0f0094a3ba856c9734e5d ] Module xhci-pci and xhci-plat-hcd depend on xhci-hcd. Module xhci-hcd should be put at a place before xhci-pci and xhci-plat-hcd. Otherwise, xhci_hcd_init() might be executed after other functions in xhci-hcd if they are all selected to be built in. Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile index 65b0b6a585997..da03d8b258dda 100644 --- a/drivers/usb/host/Makefile +++ b/drivers/usb/host/Makefile @@ -26,9 +26,6 @@ obj-$(CONFIG_USB_WHCI_HCD) += whci/ obj-$(CONFIG_PCI) += pci-quirks.o -obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o -obj-$(CONFIG_USB_XHCI_PLATFORM) += xhci-plat-hcd.o - obj-$(CONFIG_USB_EHCI_HCD) += ehci-hcd.o obj-$(CONFIG_USB_EHCI_PCI) += ehci-pci.o obj-$(CONFIG_USB_EHCI_HCD_PLATFORM) += ehci-platform.o @@ -63,6 +60,8 @@ obj-$(CONFIG_USB_OHCI_HCD_PXA27X) += ohci-pxa27x.o obj-$(CONFIG_USB_UHCI_HCD) += uhci-hcd.o obj-$(CONFIG_USB_FHCI_HCD) += fhci.o obj-$(CONFIG_USB_XHCI_HCD) += xhci-hcd.o +obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o +obj-$(CONFIG_USB_XHCI_PLATFORM) += xhci-plat-hcd.o obj-$(CONFIG_USB_SL811_HCD) += sl811-hcd.o obj-$(CONFIG_USB_SL811_CS) += sl811_cs.o obj-$(CONFIG_USB_U132_HCD) += u132-hcd.o From 1c0232f6415b1c1c89f71fd3d98e2b497eaf988b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 9 Oct 2015 13:30:08 +0300 Subject: [PATCH 1850/2091] xhci: create one unified function to calculate TRB TD remainder. [ Upstream commit c840d6ce772d47c777070ca4bbbfbf21d8d727a3 ] xhci versions 1.0 and later report the untransferred data remaining in a TD a bit differently than older hosts. We used to have separate functions for these, and needed to check host version before calling the right function. Now Mediatek host has an additional quirk on how it uses the TD Size field for remaining data. To prevent yet another function for calculating remainder we instead want to make one quirk friendly unified function. Tested-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 106 +++++++++++++++-------------------- drivers/usb/host/xhci.h | 2 + 2 files changed, 46 insertions(+), 62 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 41d7a05f8af4a..e6d858a49d04d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3000,21 +3000,6 @@ int xhci_queue_intr_tx(struct xhci_hcd *xhci, gfp_t mem_flags, return xhci_queue_bulk_tx(xhci, mem_flags, urb, slot_id, ep_index); } -/* - * The TD size is the number of bytes remaining in the TD (including this TRB), - * right shifted by 10. - * It must fit in bits 21:17, so it can't be bigger than 31. - */ -static u32 xhci_td_remainder(unsigned int remainder) -{ - u32 max = (1 << (21 - 17 + 1)) - 1; - - if ((remainder >> 10) >= max) - return max << 17; - else - return (remainder >> 10) << 17; -} - /* * For xHCI 1.0 host controllers, TD size is the number of max packet sized * packets remaining in the TD (*not* including this TRB). @@ -3027,30 +3012,36 @@ static u32 xhci_td_remainder(unsigned int remainder) * * TD size = total_packet_count - packets_transferred * - * It must fit in bits 21:17, so it can't be bigger than 31. + * For xHCI 0.96 and older, TD size field should be the remaining bytes + * including this TRB, right shifted by 10 + * + * For all hosts it must fit in bits 21:17, so it can't be bigger than 31. + * This is taken care of in the TRB_TD_SIZE() macro + * * The last TRB in a TD must have the TD size set to zero. */ -static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len, - unsigned int total_packet_count, struct urb *urb, - unsigned int num_trbs_left) +static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, + int trb_buff_len, unsigned int td_total_len, + struct urb *urb, unsigned int num_trbs_left) { - int packets_transferred; + u32 maxp, total_packet_count; + + if (xhci->hci_version < 0x100) + return ((td_total_len - transferred) >> 10); + + maxp = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); + total_packet_count = DIV_ROUND_UP(td_total_len, maxp); /* One TRB with a zero-length data packet. */ - if (num_trbs_left == 0 || (running_total == 0 && trb_buff_len == 0)) + if (num_trbs_left == 0 || (transferred == 0 && trb_buff_len == 0) || + trb_buff_len == td_total_len) return 0; - /* All the TRB queueing functions don't count the current TRB in - * running_total. - */ - packets_transferred = (running_total + trb_buff_len) / - GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc)); - - if ((total_packet_count - packets_transferred) > 31) - return 31 << 17; - return (total_packet_count - packets_transferred) << 17; + /* Queueing functions don't count the current TRB into transferred */ + return (total_packet_count - ((transferred + trb_buff_len) / maxp)); } + static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct urb *urb, int slot_id, unsigned int ep_index) { @@ -3172,17 +3163,12 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags, } /* Set the TRB length, TD size, and interrupter fields. */ - if (xhci->hci_version < 0x100) { - remainder = xhci_td_remainder( - urb->transfer_buffer_length - - running_total); - } else { - remainder = xhci_v1_0_td_remainder(running_total, - trb_buff_len, total_packet_count, urb, - num_trbs - 1); - } + remainder = xhci_td_remainder(xhci, running_total, trb_buff_len, + urb->transfer_buffer_length, + urb, num_trbs - 1); + length_field = TRB_LEN(trb_buff_len) | - remainder | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); if (num_trbs > 1) @@ -3345,17 +3331,12 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, field |= TRB_ISP; /* Set the TRB length, TD size, and interrupter fields. */ - if (xhci->hci_version < 0x100) { - remainder = xhci_td_remainder( - urb->transfer_buffer_length - - running_total); - } else { - remainder = xhci_v1_0_td_remainder(running_total, - trb_buff_len, total_packet_count, urb, - num_trbs - 1); - } + remainder = xhci_td_remainder(xhci, running_total, trb_buff_len, + urb->transfer_buffer_length, + urb, num_trbs - 1); + length_field = TRB_LEN(trb_buff_len) | - remainder | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); if (num_trbs > 1) @@ -3393,7 +3374,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, struct usb_ctrlrequest *setup; struct xhci_generic_trb *start_trb; int start_cycle; - u32 field, length_field; + u32 field, length_field, remainder; struct urb_priv *urb_priv; struct xhci_td *td; @@ -3466,9 +3447,15 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, else field = TRB_TYPE(TRB_DATA); + remainder = xhci_td_remainder(xhci, 0, + urb->transfer_buffer_length, + urb->transfer_buffer_length, + urb, 1); + length_field = TRB_LEN(urb->transfer_buffer_length) | - xhci_td_remainder(urb->transfer_buffer_length) | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); + if (urb->transfer_buffer_length > 0) { if (setup->bRequestType & USB_DIR_IN) field |= TRB_DIR_IN; @@ -3691,17 +3678,12 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, trb_buff_len = td_remain_len; /* Set the TRB length, TD size, & interrupter fields. */ - if (xhci->hci_version < 0x100) { - remainder = xhci_td_remainder( - td_len - running_total); - } else { - remainder = xhci_v1_0_td_remainder( - running_total, trb_buff_len, - total_packet_count, urb, - (trbs_per_td - j - 1)); - } + remainder = xhci_td_remainder(xhci, running_total, + trb_buff_len, td_len, + urb, trbs_per_td - j - 1); + length_field = TRB_LEN(trb_buff_len) | - remainder | + TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); queue_trb(xhci, ep_ring, more_trbs_coming, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 0f26dd2697b6e..925928e9ffc91 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1130,6 +1130,8 @@ enum xhci_setup_dev { /* Normal TRB fields */ /* transfer_len bitmasks - bits 0:16 */ #define TRB_LEN(p) ((p) & 0x1ffff) +/* TD Size, packets remaining in this TD, bits 21:17 (5 bits, so max 31) */ +#define TRB_TD_SIZE(p) (min((p), (u32)31) << 17) /* Interrupter Target - which MSI-X vector to target the completion event at */ #define TRB_INTR_TARGET(p) (((p) & 0x3ff) << 22) #define GET_INTR_TARGET(p) (((p) >> 22) & 0x3ff) From 69b8883209004c05eac885e53f67f37d6b0f8e11 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 26 Jan 2016 17:50:05 +0200 Subject: [PATCH 1851/2091] usb: xhci: handle both SSIC ports in PME stuck quirk [ Upstream commit fa89537783cb442263fa5a14df6c7693eaf32f11 ] Commit abce329c27b3 ("xhci: Workaround to get D3 working in Intel xHCI") adds a workaround for a limitation of PME storm caused by SSIC port in some Intel SoCs. This commit only handled one SSIC port, while there are actually two SSIC ports in the chips. This patch handles both SSIC ports. Without this fix, users still see PME storm. Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Zhuang Jin Can Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 48 ++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index dd41053afd6fe..0c59cf11d88d3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -28,7 +28,9 @@ #include "xhci.h" #include "xhci-trace.h" -#define PORT2_SSIC_CONFIG_REG2 0x883c +#define SSIC_PORT_NUM 2 +#define SSIC_PORT_CFG2 0x880c +#define SSIC_PORT_CFG2_OFFSET 0x30 #define PROG_DONE (1 << 30) #define SSIC_PORT_UNUSED (1 << 31) @@ -317,28 +319,36 @@ static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend) struct pci_dev *pdev = to_pci_dev(hcd->self.controller); u32 val; void __iomem *reg; + int i; if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { - reg = (void __iomem *) xhci->cap_regs + PORT2_SSIC_CONFIG_REG2; - - /* Notify SSIC that SSIC profile programming is not done */ - val = readl(reg) & ~PROG_DONE; - writel(val, reg); - - /* Mark SSIC port as unused(suspend) or used(resume) */ - val = readl(reg); - if (suspend) - val |= SSIC_PORT_UNUSED; - else - val &= ~SSIC_PORT_UNUSED; - writel(val, reg); - - /* Notify SSIC that SSIC profile programming is done */ - val = readl(reg) | PROG_DONE; - writel(val, reg); - readl(reg); + for (i = 0; i < SSIC_PORT_NUM; i++) { + reg = (void __iomem *) xhci->cap_regs + + SSIC_PORT_CFG2 + + i * SSIC_PORT_CFG2_OFFSET; + + /* + * Notify SSIC that SSIC profile programming + * is not done. + */ + val = readl(reg) & ~PROG_DONE; + writel(val, reg); + + /* Mark SSIC port as unused(suspend) or used(resume) */ + val = readl(reg); + if (suspend) + val |= SSIC_PORT_UNUSED; + else + val &= ~SSIC_PORT_UNUSED; + writel(val, reg); + + /* Notify SSIC that SSIC profile programming is done */ + val = readl(reg) | PROG_DONE; + writel(val, reg); + readl(reg); + } } reg = (void __iomem *) xhci->cap_regs + 0x80a4; From 1f8ad5557c9910b9853830ef202d092aa43806c9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 26 Jan 2016 17:50:06 +0200 Subject: [PATCH 1852/2091] usb: xhci: add a quirk bit for ssic port unused [ Upstream commit 7e70cbffe236721051bbaff965e477df06dcb190 ] Two workarounds introduced by commit b8cb91e058cd ("xhci: Workaround for PME stuck issues in Intel xhci") and commit abce329c27b3 ("xhci: Workaround to get D3 working in Intel xHCI") share a single quirk bit XHCI_PME_STUCK_QUIRK. These two workarounds actually are different and might happen on different hardwares. Need to separate them by adding a quirk bit for the later. Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 79 +++++++++++++++++++++---------------- drivers/usb/host/xhci.h | 1 + 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0c59cf11d88d3..2fefc4abf1fd0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -150,6 +150,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { + xhci->quirks |= XHCI_SSIC_PORT_UNUSED; + } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ168) { xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -310,46 +314,47 @@ static void xhci_pci_remove(struct pci_dev *dev) * SSIC PORT need to be marked as "unused" before putting xHCI * into D3. After D3 exit, the SSIC port need to be marked as "used". * Without this change, xHCI might not enter D3 state. - * Make sure PME works on some Intel xHCI controllers by writing 1 to clear - * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 */ -static void xhci_pme_quirk(struct usb_hcd *hcd, bool suspend) +static void xhci_ssic_port_unused_quirk(struct usb_hcd *hcd, bool suspend) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); - struct pci_dev *pdev = to_pci_dev(hcd->self.controller); u32 val; void __iomem *reg; int i; - if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { - - for (i = 0; i < SSIC_PORT_NUM; i++) { - reg = (void __iomem *) xhci->cap_regs + - SSIC_PORT_CFG2 + - i * SSIC_PORT_CFG2_OFFSET; - - /* - * Notify SSIC that SSIC profile programming - * is not done. - */ - val = readl(reg) & ~PROG_DONE; - writel(val, reg); - - /* Mark SSIC port as unused(suspend) or used(resume) */ - val = readl(reg); - if (suspend) - val |= SSIC_PORT_UNUSED; - else - val &= ~SSIC_PORT_UNUSED; - writel(val, reg); - - /* Notify SSIC that SSIC profile programming is done */ - val = readl(reg) | PROG_DONE; - writel(val, reg); - readl(reg); - } + for (i = 0; i < SSIC_PORT_NUM; i++) { + reg = (void __iomem *) xhci->cap_regs + + SSIC_PORT_CFG2 + + i * SSIC_PORT_CFG2_OFFSET; + + /* Notify SSIC that SSIC profile programming is not done. */ + val = readl(reg) & ~PROG_DONE; + writel(val, reg); + + /* Mark SSIC port as unused(suspend) or used(resume) */ + val = readl(reg); + if (suspend) + val |= SSIC_PORT_UNUSED; + else + val &= ~SSIC_PORT_UNUSED; + writel(val, reg); + + /* Notify SSIC that SSIC profile programming is done */ + val = readl(reg) | PROG_DONE; + writel(val, reg); + readl(reg); } +} + +/* + * Make sure PME works on some Intel xHCI controllers by writing 1 to clear + * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 + */ +static void xhci_pme_quirk(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + void __iomem *reg; + u32 val; reg = (void __iomem *) xhci->cap_regs + 0x80a4; val = readl(reg); @@ -370,7 +375,10 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) pdev->no_d3cold = true; if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_quirk(hcd, true); + xhci_pme_quirk(hcd); + + if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) + xhci_ssic_port_unused_quirk(hcd, true); return xhci_suspend(xhci, do_wakeup); } @@ -402,8 +410,11 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) if (pdev->vendor == PCI_VENDOR_ID_INTEL) usb_enable_intel_xhci_ports(pdev); + if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) + xhci_ssic_port_unused_quirk(hcd, false); + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_quirk(hcd, false); + xhci_pme_quirk(hcd); retval = xhci_resume(xhci, hibernated); return retval; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 925928e9ffc91..f18cdf0ec795b 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1570,6 +1570,7 @@ struct xhci_hcd { /* For controllers with a broken beyond repair streams implementation */ #define XHCI_BROKEN_STREAMS (1 << 19) #define XHCI_PME_STUCK_QUIRK (1 << 20) +#define XHCI_SSIC_PORT_UNUSED (1 << 22) unsigned int num_active_eps; unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ From c6fd1e6a4ffcfaf6fd6409e8fc3ada2e0cbd247d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 26 Jan 2016 17:50:07 +0200 Subject: [PATCH 1853/2091] usb: xhci: set SSIC port unused only if xhci_suspend succeeds [ Upstream commit 92149c930cce1865d0d4aca2ab07c2b4b197b418 ] XHCI_SSIC_PORT_UNUSED quirk was applied to the xHCI host controllers in some Intel SoC chips. With this quirk applied, SSIC port is set to "unused" prior to xhci_suspend(). This may cause problem if host fails to suspend. In this case, the port is set to unused without host further entering D3, and the port will not be usable anymore. Cc: stable@vger.kernel.org Signed-off-by: Zhuang Jin Can Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 2fefc4abf1fd0..c3442eed1e5d9 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -366,6 +366,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + int ret; /* * Systems with the TI redriver that loses port status change events @@ -380,7 +381,11 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) xhci_ssic_port_unused_quirk(hcd, true); - return xhci_suspend(xhci, do_wakeup); + ret = xhci_suspend(xhci, do_wakeup); + if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED)) + xhci_ssic_port_unused_quirk(hcd, false); + + return ret; } static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) From 4c54b01e7221db1d14ef2506ac0b81f9ff69e26f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 26 Jan 2016 17:50:08 +0200 Subject: [PATCH 1854/2091] usb: xhci: apply XHCI_PME_STUCK_QUIRK to Intel Broxton-M platforms [ Upstream commit ccc04afb72cddbdf7c0e1c17e92886405a71b754 ] Intel Broxton M was verifed to require XHCI_PME_STUCK_QUIRK quirk as well. Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c3442eed1e5d9..3ff5fcc7c94bd 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -47,6 +47,7 @@ #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI 0x22b5 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f +#define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 static const char hcd_name[] = "xhci_hcd"; @@ -147,7 +148,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && From 8552bea30320cc866c7fb5d9a687c3e6d15fb5c1 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 26 Jan 2016 17:50:12 +0200 Subject: [PATCH 1855/2091] xhci: Fix list corruption in urb dequeue at host removal [ Upstream commit 5c82171167adb8e4ac77b91a42cd49fb211a81a0 ] xhci driver frees data for all devices, both usb2 and and usb3 the first time usb_remove_hcd() is called, including td_list and and xhci_ring structures. When usb_remove_hcd() is called a second time for the second xhci bus it will try to dequeue all pending urbs, and touches td_list which is already freed for that endpoint. Cc: Reported-by: Joe Lawrence Tested-by: Joe Lawrence Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f6bb118e4501f..910f7fac031f8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1559,7 +1559,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "HW died, freeing TD."); urb_priv = urb->hcpriv; - for (i = urb_priv->td_cnt; i < urb_priv->length; i++) { + for (i = urb_priv->td_cnt; + i < urb_priv->length && xhci->devs[urb->dev->slot_id]; + i++) { td = urb_priv->td[i]; if (!list_empty(&td->td_list)) list_del_init(&td->td_list); From b35f45f2ebf224e06836ced6ccebb05ec700a6a9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 3 Feb 2016 17:33:48 -0200 Subject: [PATCH 1856/2091] [media] tda1004x: only update the frontend properties if locked [ Upstream commit e8beb02343e7582980c6705816cd957cf4f74c7a ] The tda1004x was updating the properties cache before locking. If the device is not locked, the data at the registers are just random values with no real meaning. This caused the driver to fail with libdvbv5, as such library calls GET_PROPERTY from time to time, in order to return the DVB stats. Tested with a saa7134 card 78: ASUSTeK P7131 Dual, vendor PCI ID: 1043:4862 Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/tda1004x.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c index a2631be7ffac9..08e0f0dd8728b 100644 --- a/drivers/media/dvb-frontends/tda1004x.c +++ b/drivers/media/dvb-frontends/tda1004x.c @@ -903,9 +903,18 @@ static int tda1004x_get_fe(struct dvb_frontend *fe) { struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache; struct tda1004x_state* state = fe->demodulator_priv; + int status; dprintk("%s\n", __func__); + status = tda1004x_read_byte(state, TDA1004X_STATUS_CD); + if (status == -1) + return -EIO; + + /* Only update the properties cache if device is locked */ + if (!(status & 8)) + return 0; + // inversion status fe_params->inversion = INVERSION_OFF; if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20) From 451e9d9111a2c83d46cdd48d5eea2fa8e8d72d9a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Feb 2016 17:06:13 +0100 Subject: [PATCH 1857/2091] ALSA: timer: Fix leftover link at closing [ Upstream commit 094fd3be87b0f102589e2d5c3fa5d06b7e20496d ] In ALSA timer core, the active timer instance is managed in active_list linked list. Each element is added / removed dynamically at timer start, stop and in timer interrupt. The problem is that snd_timer_interrupt() has a thinko and leaves the element in active_list when it's the last opened element. This eventually leads to list corruption or use-after-free error. This hasn't been revealed because we used to delete the list forcibly in snd_timer_stop() in the past. However, the recent fix avoids the double-stop behavior (in commit [f784beb75ce8: ALSA: timer: Fix link corruption due to double start or stop]), and this leak hits reality. This patch fixes the link management in snd_timer_interrupt(). Now it simply unlinks no matter which stream is. BugLink: http://lkml.kernel.org/r/CACT4Y+Yy2aukHP-EDp8-ziNqNNmb-NTf=jDWXMP7jB8HDa2vng@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 357e86f7804a3..00e8c5f4de177 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -744,8 +744,8 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) ti->cticks = ti->ticks; } else { ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING; - if (--timer->running) - list_del_init(&ti->active_list); + --timer->running; + list_del_init(&ti->active_list); } if ((timer->hw.flags & SNDRV_TIMER_HW_TASKLET) || (ti->flags & SNDRV_TIMER_IFLG_FAST)) From aec01beb5eed22fed5291dc73f9b5f70c4d34218 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 4 Feb 2016 15:59:43 -0200 Subject: [PATCH 1858/2091] [media] saa7134-alsa: Only frees registered sound cards [ Upstream commit ac75fe5d8fe4a0bf063be18fb29684405279e79e ] That prevents this bug: [ 2382.269496] BUG: unable to handle kernel NULL pointer dereference at 0000000000000540 [ 2382.270013] IP: [] snd_card_free+0x36/0x70 [snd] [ 2382.270013] PGD 0 [ 2382.270013] Oops: 0002 [#1] SMP [ 2382.270013] Modules linked in: saa7134_alsa(-) tda1004x saa7134_dvb videobuf2_dvb dvb_core tda827x tda8290 tuner saa7134 tveeprom videobuf2_dma_sg videobuf2_memops videobuf2_v4l2 videobuf2_core v4l2_common videodev media auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc tun bridge stp llc ebtables ip6table_filter ip6_tables nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack it87 hwmon_vid snd_hda_codec_idt snd_hda_codec_generic iTCO_wdt iTCO_vendor_support snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq pcspkr i2c_i801 snd_seq_device snd_pcm snd_timer lpc_ich snd mfd_core soundcore binfmt_misc i915 video i2c_algo_bit drm_kms_helper drm r8169 ata_generic serio_raw pata_acpi mii i2c_core [last unloaded: videobuf2_memops] [ 2382.270013] CPU: 0 PID: 4899 Comm: rmmod Not tainted 4.5.0-rc1+ #4 [ 2382.270013] Hardware name: PCCHIPS P17G/P17G, BIOS 080012 05/14/2008 [ 2382.270013] task: ffff880039c38000 ti: ffff88003c764000 task.ti: ffff88003c764000 [ 2382.270013] RIP: 0010:[] [] snd_card_free+0x36/0x70 [snd] [ 2382.270013] RSP: 0018:ffff88003c767ea0 EFLAGS: 00010286 [ 2382.270013] RAX: ffff88003c767eb8 RBX: 0000000000000000 RCX: 0000000000006260 [ 2382.270013] RDX: ffffffffa020a060 RSI: ffffffffa0206de1 RDI: ffff88003c767eb0 [ 2382.270013] RBP: ffff88003c767ed8 R08: 0000000000019960 R09: ffffffff811a5412 [ 2382.270013] R10: ffffea0000d7c200 R11: 0000000000000000 R12: ffff88003c767ea8 [ 2382.270013] R13: 00007ffe760617f7 R14: 0000000000000000 R15: 0000557625d7f1e0 [ 2382.270013] FS: 00007f80bb1c0700(0000) GS:ffff88003f400000(0000) knlGS:0000000000000000 [ 2382.270013] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 2382.270013] CR2: 0000000000000540 CR3: 000000003c00f000 CR4: 00000000000006f0 [ 2382.270013] Stack: [ 2382.270013] 000000003c767ed8 ffffffff00000000 ffff880000000000 ffff88003c767eb8 [ 2382.270013] ffff88003c767eb8 ffffffffa049a890 00007ffe76060060 ffff88003c767ef0 [ 2382.270013] ffffffffa049889d ffffffffa049a500 ffff88003c767f48 ffffffff8111079c [ 2382.270013] Call Trace: [ 2382.270013] [] saa7134_alsa_exit+0x1d/0x780 [saa7134_alsa] [ 2382.270013] [] SyS_delete_module+0x19c/0x1f0 [ 2382.270013] [] entry_SYSCALL_64_fastpath+0x12/0x71 [ 2382.270013] Code: 20 a0 48 c7 c6 e1 6d 20 a0 48 89 e5 41 54 53 4c 8d 65 d0 48 89 fb 48 83 ec 28 c7 45 d0 00 00 00 00 49 8d 7c 24 08 e8 7a 55 ed e0 <4c> 89 a3 40 05 00 00 48 89 df e8 eb fd ff ff 85 c0 75 1a 48 8d [ 2382.270013] RIP [] snd_card_free+0x36/0x70 [snd] [ 2382.270013] RSP [ 2382.270013] CR2: 0000000000000540 Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/saa7134/saa7134-alsa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c index ac3cd74e824e0..067db727e685f 100644 --- a/drivers/media/pci/saa7134/saa7134-alsa.c +++ b/drivers/media/pci/saa7134/saa7134-alsa.c @@ -1218,6 +1218,8 @@ static int alsa_device_init(struct saa7134_dev *dev) static int alsa_device_exit(struct saa7134_dev *dev) { + if (!snd_saa7134_cards[dev->nr]) + return 1; snd_card_free(snd_saa7134_cards[dev->nr]); snd_saa7134_cards[dev->nr] = NULL; @@ -1267,7 +1269,8 @@ static void saa7134_alsa_exit(void) int idx; for (idx = 0; idx < SNDRV_CARDS; idx++) { - snd_card_free(snd_saa7134_cards[idx]); + if (snd_saa7134_cards[idx]) + snd_card_free(snd_saa7134_cards[idx]); } saa7134_dmasound_init = NULL; From df2aeb317e3e6d1da2de045748064df63bc11944 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 1 Feb 2016 14:18:57 +0100 Subject: [PATCH 1859/2091] ARM: nomadik: fix up SD/MMC DT settings [ Upstream commit 418d5516568b3fdbc4e7b53677dd78aed8514565 ] The DTSI file for the Nomadik does not properly specify how the PL180 levelshifter is connected: the Nomadik actually needs all the five st,sig-dir-* flags set to properly control all lines out. Further this board supports full power cycling of the card, and since this variant has no hardware clock gating, it needs a ridiculously low frequency setting to keep up with the ever overflowing FIFO. The pin configuration set-up is a bit of a mystery, because of course these pins are a mix of inputs and outputs. However the reference implementation sets all pins to "output" with unspecified initial value, so let's do that here as well. Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Acked-by: Ulf Hansson Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 37 ++++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index f182f6538e902..89ed9b45d5337 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -122,22 +122,14 @@ }; mmcsd_default_mode: mmcsd_default { mmcsd_default_cfg1 { - /* MCCLK */ - pins = "GPIO8_B10"; - ste,output = <0>; - }; - mmcsd_default_cfg2 { - /* MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 */ - pins = "GPIO10_C11", "GPIO15_A12", - "GPIO16_C13", "GPIO23_D15"; - ste,output = <1>; - }; - mmcsd_default_cfg3 { - /* MCCMD, MCDAT3-0, MCMSFBCLK */ - pins = "GPIO9_A10", "GPIO11_B11", - "GPIO12_A11", "GPIO13_C12", - "GPIO14_B12", "GPIO24_C15"; - ste,input = <1>; + /* + * MCCLK, MCCMDDIR, MCDAT0DIR, MCDAT31DIR, MCDATDIR2 + * MCCMD, MCDAT3-0, MCMSFBCLK + */ + pins = "GPIO8_B10", "GPIO9_A10", "GPIO10_C11", "GPIO11_B11", + "GPIO12_A11", "GPIO13_C12", "GPIO14_B12", "GPIO15_A12", + "GPIO16_C13", "GPIO23_D15", "GPIO24_C15"; + ste,output = <2>; }; }; }; @@ -802,10 +794,21 @@ clock-names = "mclk", "apb_pclk"; interrupt-parent = <&vica>; interrupts = <22>; - max-frequency = <48000000>; + max-frequency = <400000>; bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + full-pwr-cycle; + /* + * The STw4811 circuit used with the Nomadik strictly + * requires that all of these signal direction pins be + * routed and used for its 4-bit levelshifter. + */ + st,sig-dir-dat0; + st,sig-dir-dat2; + st,sig-dir-dat31; + st,sig-dir-cmd; + st,sig-pin-fbclk; pinctrl-names = "default"; pinctrl-0 = <&mmcsd_default_mux>, <&mmcsd_default_mode>; vmmc-supply = <&vmmc_regulator>; From 81c5fe348992a0896473724c556b6632b8acd4a7 Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Mon, 1 Feb 2016 11:08:29 -0500 Subject: [PATCH 1860/2091] drm: fix missing reference counting decrease [ Upstream commit dabe19540af9e563d526113bb102e1b9b9fa73f9 ] In drm_dp_mst_allocate_vcpi, it returns true in two paths, but in one path, there is no reference couting decrease. Signed-off-by: Insu Yun Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 2d3bad43ad907..253339158acd4 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2396,6 +2396,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn); if (pbn == port->vcpi.pbn) { *slots = port->vcpi.num_slots; + drm_dp_put_port(port); return true; } } From bd31f0f4dfa04f1afe2ac88b32a745dad80718e2 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 22 Jan 2016 17:07:25 -0500 Subject: [PATCH 1861/2091] drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil [ Upstream commit 64566b5e767f9bc3161055ca1b443a51afb52aad ] drm_fixp_from_fraction allows us to create a fixed point directly from a fraction, rather than creating fixed point values and dividing later. This avoids overflow of our 64 bit value for large numbers. drm_fixp2int_ceil allows us to return the ceiling of our fixed point value. [airlied: squash Jordan's fix] 32-bit-build-fix: Jordan Lazare Signed-off-by: Harry Wentland Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- include/drm/drm_fixed.h | 53 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index d639049a613df..553210c02ee0f 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) +#define DRM_FIXED_EPSILON 1LL +#define DRM_FIXED_ALMOST_ONE (DRM_FIXED_ONE - DRM_FIXED_EPSILON) static inline s64 drm_int2fixp(int a) { return ((s64)a) << DRM_FIXED_POINT; } -static inline int drm_fixp2int(int64_t a) +static inline int drm_fixp2int(s64 a) { return ((s64)a) >> DRM_FIXED_POINT; } -static inline unsigned drm_fixp_msbset(int64_t a) +static inline int drm_fixp2int_ceil(s64 a) +{ + if (a > 0) + return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); + else + return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); +} + +static inline unsigned drm_fixp_msbset(s64 a) { unsigned shift, sign = (a >> 63) & 1; @@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b) return result; } +static inline s64 drm_fixp_from_fraction(s64 a, s64 b) +{ + s64 res; + bool a_neg = a < 0; + bool b_neg = b < 0; + u64 a_abs = a_neg ? -a : a; + u64 b_abs = b_neg ? -b : b; + u64 rem; + + /* determine integer part */ + u64 res_abs = div64_u64_rem(a_abs, b_abs, &rem); + + /* determine fractional part */ + { + u32 i = DRM_FIXED_POINT; + + do { + rem <<= 1; + res_abs <<= 1; + if (rem >= b_abs) { + res_abs |= 1; + rem -= b_abs; + } + } while (--i != 0); + } + + /* round up LSB */ + { + u64 summand = (rem << 1) >= b_abs; + + res_abs += summand; + } + + res = (s64) res_abs; + if (a_neg ^ b_neg) + res = -res; + return res; +} + static inline s64 drm_fixp_exp(s64 x) { s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000); From 95b098789dd20a09885c288fe8db200c26490c18 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 22 Jan 2016 17:07:26 -0500 Subject: [PATCH 1862/2091] drm/dp/mst: Calculate MST PBN with 31.32 fixed point [ Upstream commit a9ebb3e46c7ef6112c0da466ef0954673ad36832 ] Our PBN value overflows the 20 bits integer part of the 20.12 fixed point. We need to use 31.32 fixed point to avoid this. This happens with display clocks larger than 293122 (at 24 bpp), which we see with the Sharp (and similar) 4k tiled displays. Signed-off-by: Harry Wentland Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 67 ++++++++++++++++----------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 253339158acd4..6010976a62ec8 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2556,32 +2556,31 @@ EXPORT_SYMBOL(drm_dp_check_act_status); */ int drm_dp_calc_pbn_mode(int clock, int bpp) { - fixed20_12 pix_bw; - fixed20_12 fbpp; - fixed20_12 result; - fixed20_12 margin, tmp; - u32 res; - - pix_bw.full = dfixed_const(clock); - fbpp.full = dfixed_const(bpp); - tmp.full = dfixed_const(8); - fbpp.full = dfixed_div(fbpp, tmp); - - result.full = dfixed_mul(pix_bw, fbpp); - margin.full = dfixed_const(54); - tmp.full = dfixed_const(64); - margin.full = dfixed_div(margin, tmp); - result.full = dfixed_div(result, margin); - - margin.full = dfixed_const(1006); - tmp.full = dfixed_const(1000); - margin.full = dfixed_div(margin, tmp); - result.full = dfixed_mul(result, margin); - - result.full = dfixed_div(result, tmp); - result.full = dfixed_ceil(result); - res = dfixed_trunc(result); - return res; + u64 kbps; + s64 peak_kbps; + u32 numerator; + u32 denominator; + + kbps = clock * bpp; + + /* + * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 + * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on + * common multiplier to render an integer PBN for all link rate/lane + * counts combinations + * calculate + * peak_kbps *= (1006/1000) + * peak_kbps *= (64/54) + * peak_kbps *= 8 convert to bytes + */ + + numerator = 64 * 1006; + denominator = 54 * 8 * 1000 * 1000; + + kbps *= numerator; + peak_kbps = drm_fixp_from_fraction(kbps, denominator); + + return drm_fixp2int_ceil(peak_kbps); } EXPORT_SYMBOL(drm_dp_calc_pbn_mode); @@ -2589,11 +2588,23 @@ static int test_calc_pbn_mode(void) { int ret; ret = drm_dp_calc_pbn_mode(154000, 30); - if (ret != 689) + if (ret != 689) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 154000, 30, 689, ret); return -EINVAL; + } ret = drm_dp_calc_pbn_mode(234000, 30); - if (ret != 1047) + if (ret != 1047) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 234000, 30, 1047, ret); return -EINVAL; + } + ret = drm_dp_calc_pbn_mode(297000, 24); + if (ret != 1063) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 297000, 24, 1063, ret); + return -EINVAL; + } return 0; } From 7d1dc93a85c3593f35985b78ab5325085c2fa2e6 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 11 Aug 2015 09:54:29 +0200 Subject: [PATCH 1863/2091] drm/dp/mst: Remove port after removing connector. [ Upstream commit 4772ff03df8094fd99d28de5fcf5df3a3e9c68bb ] The port is removed synchronously, but the connector delayed. This causes a use after free which can cause a kernel BUG with slug_debug=FPZU. This is fixed by freeing the port after the connector. This fixes a regression introduced with 6b8eeca65b18ae77e175cc2b6571731f0ee413bf "drm/dp/mst: close deadlock in connector destruction." Cc: stable@vger.kernel.org Cc: Dave Airlie Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 19 +++++++++++++------ include/drm/drm_crtc.h | 2 -- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 6010976a62ec8..d7d70e873488b 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -871,9 +871,10 @@ static void drm_dp_destroy_port(struct kref *kref) from an EDID retrieval */ if (port->connector) { mutex_lock(&mgr->destroy_connector_lock); - list_add(&port->connector->destroy_list, &mgr->destroy_connector_list); + list_add(&port->next, &mgr->destroy_connector_list); mutex_unlock(&mgr->destroy_connector_lock); schedule_work(&mgr->destroy_connector_work); + return; } drm_dp_port_teardown_pdt(port, port->pdt); @@ -2738,7 +2739,7 @@ static void drm_dp_tx_work(struct work_struct *work) static void drm_dp_destroy_connector_work(struct work_struct *work) { struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); - struct drm_connector *connector; + struct drm_dp_mst_port *port; /* * Not a regular list traverse as we have to drop the destroy @@ -2747,15 +2748,21 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) */ for (;;) { mutex_lock(&mgr->destroy_connector_lock); - connector = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_connector, destroy_list); - if (!connector) { + port = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_dp_mst_port, next); + if (!port) { mutex_unlock(&mgr->destroy_connector_lock); break; } - list_del(&connector->destroy_list); + list_del(&port->next); mutex_unlock(&mgr->destroy_connector_lock); - mgr->cbs->destroy_connector(mgr, connector); + mgr->cbs->destroy_connector(mgr, port->connector); + + drm_dp_port_teardown_pdt(port, port->pdt); + + if (!port->input && port->vcpi.vcpi > 0) + drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); + kfree(port); } } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 54233583c6cb9..ca71c03143d1b 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -731,8 +731,6 @@ struct drm_connector { uint8_t num_h_tile, num_v_tile; uint8_t tile_h_loc, tile_v_loc; uint16_t tile_h_size, tile_v_size; - - struct list_head destroy_list; }; /** From 06f4d7cebea5027bd8773df0557c6bbaaf2c77a2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Sep 2015 10:37:28 +1000 Subject: [PATCH 1864/2091] drm/dp/mst: fixup handling hotplug on port removal. [ Upstream commit df4839fdc9b3c922586b945f062f38cbbda022bb ] output ports should always have a connector, unless in the rare case connector allocation fails in the driver. In this case we only need to teardown the pdt, and free the struct, and there is no need to send a hotplug msg. In the case were we add the port to the destroy list we need to send a hotplug if we destroy any connectors, so userspace knows to reprobe stuff. this patch also handles port->connector allocation failing which should be a rare event, but makes the code consistent. Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_dp_mst_topology.c | 36 +++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index d7d70e873488b..0ec9ad50ba7c3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -861,29 +861,33 @@ static void drm_dp_destroy_port(struct kref *kref) { struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref); struct drm_dp_mst_topology_mgr *mgr = port->mgr; + if (!port->input) { port->vcpi.num_slots = 0; kfree(port->cached_edid); - /* we can't destroy the connector here, as - we might be holding the mode_config.mutex - from an EDID retrieval */ + /* + * The only time we don't have a connector + * on an output port is if the connector init + * fails. + */ if (port->connector) { + /* we can't destroy the connector here, as + * we might be holding the mode_config.mutex + * from an EDID retrieval */ + mutex_lock(&mgr->destroy_connector_lock); list_add(&port->next, &mgr->destroy_connector_list); mutex_unlock(&mgr->destroy_connector_lock); schedule_work(&mgr->destroy_connector_work); return; } + /* no need to clean up vcpi + * as if we have no connector we never setup a vcpi */ drm_dp_port_teardown_pdt(port, port->pdt); - - if (!port->input && port->vcpi.vcpi > 0) - drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); } kfree(port); - - (*mgr->cbs->hotplug)(mgr); } static void drm_dp_put_port(struct drm_dp_mst_port *port) @@ -1113,12 +1117,21 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, char proppath[255]; build_mst_prop_path(port, mstb, proppath, sizeof(proppath)); port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr, port, proppath); - + if (!port->connector) { + /* remove it from the port list */ + mutex_lock(&mstb->mgr->lock); + list_del(&port->next); + mutex_unlock(&mstb->mgr->lock); + /* drop port list reference */ + drm_dp_put_port(port); + goto out; + } if (port->port_num >= 8) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); } } +out: /* put reference to this port */ drm_dp_put_port(port); } @@ -2740,7 +2753,7 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) { struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); struct drm_dp_mst_port *port; - + bool send_hotplug = false; /* * Not a regular list traverse as we have to drop the destroy * connector lock before destroying the connector, to avoid AB->BA @@ -2763,7 +2776,10 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) if (!port->input && port->vcpi.vcpi > 0) drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); kfree(port); + send_hotplug = true; } + if (send_hotplug) + (*mgr->cbs->hotplug)(mgr); } /** From fde3cda03b3f681ecf3d92a56f045104174493e3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 9 Jul 2015 23:44:32 +0200 Subject: [PATCH 1865/2091] drm/radeon: Take all modeset locks for DP MST hotplug [ Upstream commit 2ee6bcdcfa4d8b56b20bc6308cd5f9bced5b5324 ] Similar with the i915 take all modeset locks for mst hotplug. This is needed to make sure radeon holds both mode_config.mutex and mode_config.connection_mutex when updating the connector_list, which is the new (interim) locking regime we want for that. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_dp_mst.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index 42986130cc63a..c9ff4cf4c4e70 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -287,9 +287,9 @@ static struct drm_connector *radeon_dp_add_mst_connector(struct drm_dp_mst_topol drm_mode_connector_set_path_property(connector, pathprop); drm_reinit_primary_mode_group(dev); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); radeon_fb_add_connector(rdev, connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_connector_register(connector); return connector; @@ -304,12 +304,12 @@ static void radeon_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_connector_unregister(connector); /* need to nuke the connector */ - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); /* dpms off */ radeon_fb_remove_connector(rdev, connector); drm_connector_cleanup(connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_reinit_primary_mode_group(dev); From 9e3ba11cb727f20a2aa74e90f1858c7c49ed8e64 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 9 Jul 2015 23:44:31 +0200 Subject: [PATCH 1866/2091] drm/i915: Take all modeset locks for DP MST hotplug [ Upstream commit 8bb4da1df54a20d68c34427356e34315ba122c0f ] While auditing various users of the connector/encoder lists I realized that the atomic code is a very prolific user of them. And it only ever grabs the mode_config->connection_mutex, but not the mode_config->mutex like all the other code walking encoder/connector lists. The problem is that we can't grab the mode_config.mutex late in atomic code since that would lead to locking inversions. And we don't want to grab it unconditionally like the legacy set_config modeset path since that would render all the fine-grained locking moot. Instead just grab more locks in the dp mst hotplug code. Note that drm_connector_init (which is the one adding the connector to these lists) already uses drm_modeset_lock_all. The other reason for grabbing all locks is that the dpms off in the unplug function amounts to a modeset, so better to take all required locks for that. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/intel_dp_mst.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 5cb47482d29fb..88c557551b897 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -439,9 +439,9 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_mode_connector_set_path_property(connector, pathprop); drm_reinit_primary_mode_group(dev); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_connector_add_to_fbdev(intel_connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_connector_register(&intel_connector->base); return connector; } @@ -452,16 +452,16 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; /* need to nuke the connector */ - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_connector_dpms(connector, DRM_MODE_DPMS_OFF); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); intel_connector->unregister(intel_connector); - mutex_lock(&dev->mode_config.mutex); + drm_modeset_lock_all(dev); intel_connector_remove_from_fbdev(intel_connector); drm_connector_cleanup(connector); - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); drm_reinit_primary_mode_group(dev); From 8a4ebc74857af2377d9b3f756714ded686e01b66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Feb 2016 15:36:16 -0800 Subject: [PATCH 1867/2091] dump_stack: avoid potential deadlocks [ Upstream commit d7ce36924344ace0dbdc855b1206cacc46b36d45 ] Some servers experienced fatal deadlocks because of a combination of bugs, leading to multiple cpus calling dump_stack(). The checksumming bug was fixed in commit 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated"). The second problem is a faulty locking in dump_stack() CPU1 runs in process context and calls dump_stack(), grabs dump_lock. CPU2 receives a TCP packet under softirq, grabs socket spinlock, and call dump_stack() from netdev_rx_csum_fault(). dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since dump_lock is owned by CPU1 While dumping its stack, CPU1 is interrupted by a softirq, and happens to process a packet for the TCP socket locked by CPU2. CPU1 spins forever in spin_lock() : deadlock Stack trace on CPU1 looked like : NMI backtrace for cpu 1 RIP: _raw_spin_lock+0x25/0x30 ... Call Trace: tcp_v6_rcv+0x243/0x620 ip6_input_finish+0x11f/0x330 ip6_input+0x38/0x40 ip6_rcv_finish+0x3c/0x90 ipv6_rcv+0x2a9/0x500 process_backlog+0x461/0xaa0 net_rx_action+0x147/0x430 __do_softirq+0x167/0x2d0 call_softirq+0x1c/0x30 do_softirq+0x3f/0x80 irq_exit+0x6e/0xc0 smp_call_function_single_interrupt+0x35/0x40 call_function_single_interrupt+0x6a/0x70 printk+0x4d/0x4f printk_address+0x31/0x33 print_trace_address+0x33/0x3c print_context_stack+0x7f/0x119 dump_trace+0x26b/0x28e show_trace_log_lvl+0x4f/0x5c show_stack_log_lvl+0x104/0x113 show_stack+0x42/0x44 dump_stack+0x46/0x58 netdev_rx_csum_fault+0x38/0x3c __skb_checksum_complete_head+0x6e/0x80 __skb_checksum_complete+0x11/0x20 tcp_rcv_established+0x2bd5/0x2fd0 tcp_v6_do_rcv+0x13c/0x620 sk_backlog_rcv+0x15/0x30 release_sock+0xd2/0x150 tcp_recvmsg+0x1c1/0xfc0 inet_recvmsg+0x7d/0x90 sock_recvmsg+0xaf/0xe0 ___sys_recvmsg+0x111/0x3b0 SyS_recvmsg+0x5c/0xb0 system_call_fastpath+0x16/0x1b Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()") Signed-off-by: Eric Dumazet Cc: Alex Thorlton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/dump_stack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 6745c6230db34..c30d07e99dba4 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1); asmlinkage __visible void dump_stack(void) { + unsigned long flags; int was_locked; int old; int cpu; @@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void) * Permit this cpu to perform nested stack dumps while serialising * against other CPUs */ - preempt_disable(); - retry: + local_irq_save(flags); cpu = smp_processor_id(); old = atomic_cmpxchg(&dump_lock, -1, cpu); if (old == -1) { @@ -43,6 +43,7 @@ asmlinkage __visible void dump_stack(void) } else if (old == cpu) { was_locked = 1; } else { + local_irq_restore(flags); cpu_relax(); goto retry; } @@ -52,7 +53,7 @@ asmlinkage __visible void dump_stack(void) if (!was_locked) atomic_set(&dump_lock, -1); - preempt_enable(); + local_irq_restore(flags); } #else asmlinkage __visible void dump_stack(void) From ff5ecd2953a635bf70101dac550fcb01ced863e9 Mon Sep 17 00:00:00 2001 From: xuejiufei Date: Fri, 5 Feb 2016 15:36:47 -0800 Subject: [PATCH 1868/2091] ocfs2/dlm: clear refmap bit of recovery lock while doing local recovery cleanup [ Upstream commit c95a51807b730e4681e2ecbdfd669ca52601959e ] When recovery master down, dlm_do_local_recovery_cleanup() only remove the $RECOVERY lock owned by dead node, but do not clear the refmap bit. Which will make umount thread falling in dead loop migrating $RECOVERY to the dead node. Signed-off-by: xuejiufei Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/dlm/dlmrecovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 3d90ad7ff91fe..f25ff5d3a2f99 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2360,6 +2360,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) break; } } + dlm_lockres_clear_refmap_bit(dlm, res, + dead_node); spin_unlock(&res->spinlock); continue; } From 4b5eaa857d64ea737c3bed1eb9b3dd201dd2cecf Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Feb 2016 15:36:50 -0800 Subject: [PATCH 1869/2091] mm: replace vma_lock_anon_vma with anon_vma_lock_read/write [ Upstream commit 12352d3cae2cebe18805a91fab34b534d7444231 ] Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if anon_vma appeared between lock and unlock. We have to check anon_vma first or call anon_vma_prepare() to be sure that it's here. There are only few users of these legacy helpers. Let's get rid of them. This patch fixes anon_vma lock imbalance in validate_mm(). Write lock isn't required here, read lock is enough. And reorders expand_downwards/expand_upwards: security_mmap_addr() and wrapping-around check don't have to be under anon vma lock. Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com Signed-off-by: Konstantin Khlebnikov Reported-by: Dmitry Vyukov Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/rmap.h | 14 ----------- mm/mmap.c | 55 ++++++++++++++++++++------------------------ 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c89c53a113a8d..6f48ddc4b2b55 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -105,20 +105,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma) __put_anon_vma(anon_vma); } -static inline void vma_lock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - down_write(&anon_vma->root->rwsem); -} - -static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - up_write(&anon_vma->root->rwsem); -} - static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { down_write(&anon_vma->root->rwsem); diff --git a/mm/mmap.c b/mm/mmap.c index bb50cacc3ea57..b639fa2721d8d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -440,12 +440,16 @@ static void validate_mm(struct mm_struct *mm) struct vm_area_struct *vma = mm->mmap; while (vma) { + struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; - vma_lock_anon_vma(vma); - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - anon_vma_interval_tree_verify(avc); - vma_unlock_anon_vma(vma); + if (anon_vma) { + anon_vma_lock_read(anon_vma); + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_verify(avc); + anon_vma_unlock_read(anon_vma); + } + highest_address = vma->vm_end; vma = vma->vm_next; i++; @@ -2141,32 +2145,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns */ int expand_upwards(struct vm_area_struct *vma, unsigned long address) { - int error; + int error = 0; if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; - /* - * We must make sure the anon_vma is allocated - * so that the anon_vma locking is not a noop. - */ + /* Guard against wrapping around to address 0. */ + if (address < PAGE_ALIGN(address+4)) + address = PAGE_ALIGN(address+4); + else + return -ENOMEM; + + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; - vma_lock_anon_vma(vma); /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. - * Also guard against wrapping around to address 0. */ - if (address < PAGE_ALIGN(address+4)) - address = PAGE_ALIGN(address+4); - else { - vma_unlock_anon_vma(vma); - return -ENOMEM; - } - error = 0; + anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address > vma->vm_end) { @@ -2184,7 +2183,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) * updates, but we only hold a shared mmap_sem * lock here, so we need to protect against * concurrent vma expansions. - * vma_lock_anon_vma() doesn't help here, as + * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard @@ -2204,7 +2203,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } } } - vma_unlock_anon_vma(vma); + anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(vma->vm_mm); return error; @@ -2219,25 +2218,21 @@ int expand_downwards(struct vm_area_struct *vma, { int error; - /* - * We must make sure the anon_vma is allocated - * so that the anon_vma locking is not a noop. - */ - if (unlikely(anon_vma_prepare(vma))) - return -ENOMEM; - address &= PAGE_MASK; error = security_mmap_addr(address); if (error) return error; - vma_lock_anon_vma(vma); + /* We must make sure the anon_vma is allocated. */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need the * anon_vma lock to serialize against concurrent expand_stacks. */ + anon_vma_lock_write(vma->anon_vma); /* Somebody else might have raced and expanded it already */ if (address < vma->vm_start) { @@ -2255,7 +2250,7 @@ int expand_downwards(struct vm_area_struct *vma, * updates, but we only hold a shared mmap_sem * lock here, so we need to protect against * concurrent vma expansions. - * vma_lock_anon_vma() doesn't help here, as + * anon_vma_lock_write() doesn't help here, as * we don't guarantee that all growable vmas * in a mm share the same root anon vma. * So, we reuse mm->page_table_lock to guard @@ -2273,7 +2268,7 @@ int expand_downwards(struct vm_area_struct *vma, } } } - vma_unlock_anon_vma(vma); + anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(vma->vm_mm); return error; From 42362e1f5a73d36fcaf3eb4950dfc17f633ea0d3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Feb 2016 15:37:01 -0800 Subject: [PATCH 1870/2091] radix-tree: fix oops after radix_tree_iter_retry [ Upstream commit 732042821cfa106b3c20b9780e4c60fee9d68900 ] Helper radix_tree_iter_retry() resets next_index to the current index. In following radix_tree_next_slot current chunk size becomes zero. This isn't checked and it tries to dereference null pointer in slot. Tagged iterator is fine because retry happens only at slot 0 where tag bitmask in iter->tags is filled with single bit. Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup") Signed-off-by: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Jeremiah Mahler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/radix-tree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 1a2b2276ffb3a..5d5174b59802a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -391,7 +391,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) * @iter: pointer to radix tree iterator * Returns: current chunk size */ -static __always_inline unsigned +static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { return iter->next_index - iter->index; @@ -425,9 +425,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return slot + offset + 1; } } else { - unsigned size = radix_tree_chunk_size(iter) - 1; + long size = radix_tree_chunk_size(iter); - while (size--) { + while (--size > 0) { slot++; iter->index++; if (likely(*slot)) From 0479b826457382c39d369acc95347621a347414e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Sep 2015 16:31:46 +0100 Subject: [PATCH 1871/2091] X.509: Don't strip leading 00's from key ID when constructing key description [ Upstream commit e7c87bef7de2417b219d4dbfe8d33a0098a8df54 ] Don't strip leading zeros from the crypto key ID when using it to construct the struct key description as the signature in kernels up to and including 4.2 matched this aspect of the key. This means that 1 in 256 keys won't actually match if their key ID begins with 00. The key ID is stored in the module signature as binary and so must be converted to text in order to invoke request_key() - but it isn't stripped at this point. Something like this is likely to be observed in dmesg when the key is loaded: [ 1.572423] Loaded X.509 cert 'Build time autogenerated kernel key: 62a7c3d2da278be024da4af8652c071f3fea33' followed by this when we try and use it: [ 1.646153] Request for unknown module key 'Build time autogenerated kernel key: 0062a7c3d2da278be024da4af8652c071f3fea33' err -11 The 'Loaded' line should show an extra '00' on the front of the hex string. This problem should not affect 4.3-rc1 and onwards because there the key should be matched on one of its auxiliary identities rather than the key struct's description string. Reported-by: Arjan van de Ven Reported-by: Andy Whitcroft Signed-off-by: David Howells Signed-off-by: Sasha Levin --- crypto/asymmetric_keys/x509_public_key.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 24f17e6c59048..4c850ac474e20 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -307,10 +307,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) srlen = cert->raw_serial_size; q = cert->raw_serial; } - if (srlen > 1 && *q == 0) { - srlen--; - q++; - } ret = -ENOMEM; desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); From 83fdace666f72dbfc4a7681a04e3689b61dae3b9 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 15 Feb 2016 15:46:24 -0500 Subject: [PATCH 1872/2091] Linux 4.1.18 Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d398dd440bc90..001375cfd815e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 17 +SUBLEVEL = 18 EXTRAVERSION = NAME = Series 4800 From 789a2e5a69bc3dd5197b8f51451359d06c4ac8a1 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Wed, 13 Jan 2016 21:15:03 +0000 Subject: [PATCH 1873/2091] iommu/vt-d: Fix 64-bit accesses to 32-bit DMAR_GSTS_REG [ Upstream commit fda3bec12d0979aae3f02ee645913d66fbc8a26e ] This is a 32-bit register. Apparently harmless on real hardware, but causing justified warnings in simulation. Signed-off-by: CQ Tang Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/iommu/dmar.c | 2 +- drivers/iommu/intel_irq_remapping.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9847613085e15..5a2ec39e1fd9b 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1342,7 +1342,7 @@ void dmar_disable_qi(struct intel_iommu *iommu) raw_spin_lock_irqsave(&iommu->register_lock, flags); - sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + sts = readl(iommu->reg + DMAR_GSTS_REG); if (!(sts & DMA_GSTS_QIES)) goto end; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 5709ae9c3e771..04b39be8f1f36 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -544,7 +544,7 @@ static void iommu_disable_irq_remapping(struct intel_iommu *iommu) raw_spin_lock_irqsave(&iommu->register_lock, flags); - sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + sts = readl(iommu->reg + DMAR_GSTS_REG); if (!(sts & DMA_GSTS_IRES)) goto end; From 69cc9251b85d6ddfd8e94b6be60aacdceec98b22 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 21 Jan 2016 13:13:40 +0800 Subject: [PATCH 1874/2091] ASoC: rt5645: fix the shift bit of IN1 boost [ Upstream commit b28785fa9cede0d4f47310ca0dd2a4e1d50478b5 ] The shift bit of IN1 boost gain control is 12. Signed-off-by: Bard Liao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5645.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 2ee44abd56a6b..6cbd03a5e53db 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -487,7 +487,7 @@ static const struct snd_kcontrol_new rt5645_snd_controls[] = { /* IN1/IN2 Control */ SOC_SINGLE_TLV("IN1 Boost", RT5645_IN1_CTRL1, - RT5645_BST_SFT1, 8, 0, bst_tlv), + RT5645_BST_SFT1, 12, 0, bst_tlv), SOC_SINGLE_TLV("IN2 Boost", RT5645_IN2_CTRL, RT5645_BST_SFT2, 8, 0, bst_tlv), From 13a16b9e3907607e4a324373362701c26f6e1cea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 15:38:40 -0400 Subject: [PATCH 1875/2091] cgroup: separate out include/linux/cgroup-defs.h [ Upstream commit b4a04ab7a37b490cad48e69abfe14288cacb669c ] From 2d728f74bfc071df06773e2fd7577dd5dab6425d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 15:37:01 -0400 This patch separates out cgroup-defs.h from cgroup.h which has grown a lot of dependencies. cgroup-defs.h currently only contains constant and type definitions and can be used to break circular include dependency. While moving, definitions are reordered so that cgroup-defs.h has consistent logical structure. This patch is pure reorganization. Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- include/linux/cgroup-defs.h | 464 ++++++++++++++++++++++++++++++++++++ include/linux/cgroup.h | 455 +---------------------------------- 2 files changed, 466 insertions(+), 453 deletions(-) create mode 100644 include/linux/cgroup-defs.h diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h new file mode 100644 index 0000000000000..55f3120fb9527 --- /dev/null +++ b/include/linux/cgroup-defs.h @@ -0,0 +1,464 @@ +/* + * linux/cgroup-defs.h - basic definitions for cgroup + * + * This file provides basic type and interface. Include this file directly + * only if necessary to avoid cyclic dependencies. + */ +#ifndef _LINUX_CGROUP_DEFS_H +#define _LINUX_CGROUP_DEFS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_CGROUPS + +struct cgroup; +struct cgroup_root; +struct cgroup_subsys; +struct cgroup_taskset; +struct kernfs_node; +struct kernfs_ops; +struct kernfs_open_file; + +#define MAX_CGROUP_TYPE_NAMELEN 32 +#define MAX_CGROUP_ROOT_NAMELEN 64 +#define MAX_CFTYPE_NAME 64 + +/* define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _cgrp_id, +enum cgroup_subsys_id { +#include + CGROUP_SUBSYS_COUNT, +}; +#undef SUBSYS + +/* bits in struct cgroup_subsys_state flags field */ +enum { + CSS_NO_REF = (1 << 0), /* no reference counting for this css */ + CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ + CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ +}; + +/* bits in struct cgroup flags field */ +enum { + /* Control Group requires release notifications to userspace */ + CGRP_NOTIFY_ON_RELEASE, + /* + * Clone the parent's configuration when creating a new child + * cpuset cgroup. For historical reasons, this option can be + * specified at mount time and thus is implemented here. + */ + CGRP_CPUSET_CLONE_CHILDREN, +}; + +/* cgroup_root->flags */ +enum { + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ + CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ + CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ +}; + +/* cftype->flags */ +enum { + CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ + CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ + CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + + /* internal flags, do not use outside cgroup core proper */ + __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ + __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ +}; + +/* + * Per-subsystem/per-cgroup state maintained by the system. This is the + * fundamental structural building block that controllers deal with. + * + * Fields marked with "PI:" are public and immutable and may be accessed + * directly without synchronization. + */ +struct cgroup_subsys_state { + /* PI: the cgroup that this css is attached to */ + struct cgroup *cgroup; + + /* PI: the cgroup subsystem that this css is attached to */ + struct cgroup_subsys *ss; + + /* reference count - access via css_[try]get() and css_put() */ + struct percpu_ref refcnt; + + /* PI: the parent css */ + struct cgroup_subsys_state *parent; + + /* siblings list anchored at the parent's ->children */ + struct list_head sibling; + struct list_head children; + + /* + * PI: Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). + */ + int id; + + unsigned int flags; + + /* + * Monotonically increasing unique serial number which defines a + * uniform order among all csses. It's guaranteed that all + * ->children lists are in the ascending order of ->serial_nr and + * used to allow interrupting and resuming iterations. + */ + u64 serial_nr; + + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; + struct work_struct destroy_work; +}; + +/* + * A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire cgroup + * set for a task. + */ +struct css_set { + /* Reference count */ + atomic_t refcount; + + /* + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock + */ + struct hlist_node hlist; + + /* + * Lists running through all tasks using this cgroup group. + * mg_tasks lists tasks which belong to this cset but are in the + * process of being migrated out or in. Protected by + * css_set_rwsem, but, during migration, once tasks are moved to + * mg_tasks, it can be read safely while holding cgroup_mutex. + */ + struct list_head tasks; + struct list_head mg_tasks; + + /* + * List of cgrp_cset_links pointing at cgroups referenced from this + * css_set. Protected by css_set_lock. + */ + struct list_head cgrp_links; + + /* the default cgroup associated with this css_set */ + struct cgroup *dfl_cgrp; + + /* + * Set of subsystem states, one for each subsystem. This array is + * immutable after creation apart from the init_css_set during + * subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* + * List of csets participating in the on-going migration either as + * source or destination. Protected by cgroup_mutex. + */ + struct list_head mg_preload_node; + struct list_head mg_node; + + /* + * If this cset is acting as the source of migration the following + * two fields are set. mg_src_cgrp is the source cgroup of the + * on-going migration and mg_dst_cset is the destination cset the + * target tasks on this cset should be migrated to. Protected by + * cgroup_mutex. + */ + struct cgroup *mg_src_cgrp; + struct css_set *mg_dst_cset; + + /* + * On the default hierarhcy, ->subsys[ssid] may point to a css + * attached to an ancestor instead of the cgroup this css_set is + * associated with. The following node is anchored at + * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to + * iterate through all css's attached to a given cgroup. + */ + struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; +}; + +struct cgroup { + /* self css with NULL ->ss, points back to this cgroup */ + struct cgroup_subsys_state self; + + unsigned long flags; /* "unsigned long" so bitops work */ + + /* + * idr allocated in-hierarchy ID. + * + * ID 0 is not used, the ID of the root cgroup is always 1, and a + * new cgroup will be assigned with a smallest available ID. + * + * Allocating/Removing ID must be protected by cgroup_mutex. + */ + int id; + + /* + * If this cgroup contains any tasks, it contributes one to + * populated_cnt. All children with non-zero popuplated_cnt of + * their own contribute one. The count is zero iff there's no task + * in this cgroup or its subtree. + */ + int populated_cnt; + + struct kernfs_node *kn; /* cgroup kernfs entry */ + struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + + /* + * The bitmask of subsystems enabled on the child cgroups. + * ->subtree_control is the one configured through + * "cgroup.subtree_control" while ->child_subsys_mask is the + * effective one which may have more subsystems enabled. + * Controller knobs are made available iff it's enabled in + * ->subtree_control. + */ + unsigned int subtree_control; + unsigned int child_subsys_mask; + + /* Private pointers for each registered subsystem */ + struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; + + struct cgroup_root *root; + + /* + * List of cgrp_cset_links pointing at css_sets with tasks in this + * cgroup. Protected by css_set_lock. + */ + struct list_head cset_links; + + /* + * On the default hierarchy, a css_set for a cgroup with some + * susbsys disabled will point to css's which are associated with + * the closest ancestor which has the subsys enabled. The + * following lists all css_sets which point to this cgroup's css + * for the given subsystem. + */ + struct list_head e_csets[CGROUP_SUBSYS_COUNT]; + + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; + + /* used to wait for offlining of csses */ + wait_queue_head_t offline_waitq; + + /* used to schedule release agent */ + struct work_struct release_agent_work; +}; + +/* + * A cgroup_root represents the root of a cgroup hierarchy, and may be + * associated with a kernfs_root to form an active hierarchy. This is + * internal to cgroup core. Don't access directly from controllers. + */ +struct cgroup_root { + struct kernfs_root *kf_root; + + /* The bitmask of subsystems attached to this hierarchy */ + unsigned int subsys_mask; + + /* Unique id for this hierarchy. */ + int hierarchy_id; + + /* The root cgroup. Root is destroyed on its release. */ + struct cgroup cgrp; + + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ + atomic_t nr_cgrps; + + /* A list running through the active hierarchies */ + struct list_head root_list; + + /* Hierarchy-specific flags */ + unsigned int flags; + + /* IDs for cgroups in this hierarchy */ + struct idr cgroup_idr; + + /* The path to use for release notifications. */ + char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; +}; + +/* + * struct cftype: handler definitions for cgroup control files + * + * When reading/writing to a file: + * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_path.dentry->d_fsdata + */ +struct cftype { + /* + * By convention, the name should begin with the name of the + * subsystem, followed by a period. Zero length string indicates + * end of cftype array. + */ + char name[MAX_CFTYPE_NAME]; + int private; + /* + * If not 0, file mode is set to this value, otherwise it will + * be figured out automatically + */ + umode_t mode; + + /* + * The maximum length of string, excluding trailing nul, that can + * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. + */ + size_t max_write_len; + + /* CFTYPE_* flags */ + unsigned int flags; + + /* + * Fields used for internal bookkeeping. Initialized automatically + * during registration. + */ + struct cgroup_subsys *ss; /* NULL for cgroup core files */ + struct list_head node; /* anchored at ss->cfts */ + struct kernfs_ops *kf_ops; + + /* + * read_u64() is a shortcut for the common case of returning a + * single integer. Use it in place of read() + */ + u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); + /* + * read_s64() is a signed version of read_u64() + */ + s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); + + /* generic seq_file read interface */ + int (*seq_show)(struct seq_file *sf, void *v); + + /* optional ops, implement all or none */ + void *(*seq_start)(struct seq_file *sf, loff_t *ppos); + void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); + void (*seq_stop)(struct seq_file *sf, void *v); + + /* + * write_u64() is a shortcut for the common case of accepting + * a single integer (as parsed by simple_strtoull) from + * userspace. Use in place of write(); return 0 or error. + */ + int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, + u64 val); + /* + * write_s64() is a signed version of write_u64() + */ + int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val); + + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. Use + * of_css/cft() to access the associated css and cft. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lockdep_key; +#endif +}; + +/* + * Control Group subsystem type. + * See Documentation/cgroups/cgroups.txt for details + */ +struct cgroup_subsys { + struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); + int (*css_online)(struct cgroup_subsys_state *css); + void (*css_offline)(struct cgroup_subsys_state *css); + void (*css_released)(struct cgroup_subsys_state *css); + void (*css_free)(struct cgroup_subsys_state *css); + void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_e_css_changed)(struct cgroup_subsys_state *css); + + int (*can_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*fork)(struct task_struct *task); + void (*exit)(struct cgroup_subsys_state *css, + struct cgroup_subsys_state *old_css, + struct task_struct *task); + void (*bind)(struct cgroup_subsys_state *root_css); + + int disabled; + int early_init; + + /* + * If %false, this subsystem is properly hierarchical - + * configuration, resource accounting and restriction on a parent + * cgroup cover those of its children. If %true, hierarchy support + * is broken in some ways - some subsystems ignore hierarchy + * completely while others are only implemented half-way. + * + * It's now disallowed to create nested cgroups if the subsystem is + * broken and cgroup core will emit a warning message on such + * cases. Eventually, all subsystems will be made properly + * hierarchical and this will go away. + */ + bool broken_hierarchy; + bool warned_broken_hierarchy; + + /* the following two fields are initialized automtically during boot */ + int id; + const char *name; + + /* link to parent, protected by cgroup_lock() */ + struct cgroup_root *root; + + /* idr for css->id */ + struct idr css_idr; + + /* + * List of cftypes. Each entry is the first entry of an array + * terminated by zero length name. + */ + struct list_head cfts; + + /* + * Base cftypes which are automatically registered. The two can + * point to the same array. + */ + struct cftype *dfl_cftypes; /* for the default hierarchy */ + struct cftype *legacy_cftypes; /* for the legacy hierarchies */ + + /* + * A subsystem may depend on other subsystems. When such subsystem + * is enabled on a cgroup, the depended-upon subsystems are enabled + * together if available. Subsystems enabled due to dependency are + * not visible to userland until explicitly enabled. The following + * specifies the mask of subsystems that this one depends on. + */ + unsigned int depends_on; +}; + +#endif /* CONFIG_CGROUPS */ +#endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b9cb94c3102a4..96a2ecd5aa696 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -11,23 +11,16 @@ #include #include #include -#include #include #include #include -#include -#include #include -#include #include #include -#include -#ifdef CONFIG_CGROUPS +#include -struct cgroup_root; -struct cgroup_subsys; -struct cgroup; +#ifdef CONFIG_CGROUPS extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -40,66 +33,6 @@ extern int cgroupstats_build(struct cgroupstats *stats, extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); -/* define the enumeration of all cgroup subsystems */ -#define SUBSYS(_x) _x ## _cgrp_id, -enum cgroup_subsys_id { -#include - CGROUP_SUBSYS_COUNT, -}; -#undef SUBSYS - -/* - * Per-subsystem/per-cgroup state maintained by the system. This is the - * fundamental structural building block that controllers deal with. - * - * Fields marked with "PI:" are public and immutable and may be accessed - * directly without synchronization. - */ -struct cgroup_subsys_state { - /* PI: the cgroup that this css is attached to */ - struct cgroup *cgroup; - - /* PI: the cgroup subsystem that this css is attached to */ - struct cgroup_subsys *ss; - - /* reference count - access via css_[try]get() and css_put() */ - struct percpu_ref refcnt; - - /* PI: the parent css */ - struct cgroup_subsys_state *parent; - - /* siblings list anchored at the parent's ->children */ - struct list_head sibling; - struct list_head children; - - /* - * PI: Subsys-unique ID. 0 is unused and root is always 1. The - * matching css can be looked up using css_from_id(). - */ - int id; - - unsigned int flags; - - /* - * Monotonically increasing unique serial number which defines a - * uniform order among all csses. It's guaranteed that all - * ->children lists are in the ascending order of ->serial_nr and - * used to allow interrupting and resuming iterations. - */ - u64 serial_nr; - - /* percpu_ref killing and RCU release */ - struct rcu_head rcu_head; - struct work_struct destroy_work; -}; - -/* bits in struct cgroup_subsys_state flags field */ -enum { - CSS_NO_REF = (1 << 0), /* no reference counting for this css */ - CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ - CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ -}; - /** * css_get - obtain a reference on the specified css * @css: target css @@ -185,307 +118,6 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } -/* bits in struct cgroup flags field */ -enum { - /* Control Group requires release notifications to userspace */ - CGRP_NOTIFY_ON_RELEASE, - /* - * Clone the parent's configuration when creating a new child - * cpuset cgroup. For historical reasons, this option can be - * specified at mount time and thus is implemented here. - */ - CGRP_CPUSET_CLONE_CHILDREN, -}; - -struct cgroup { - /* self css with NULL ->ss, points back to this cgroup */ - struct cgroup_subsys_state self; - - unsigned long flags; /* "unsigned long" so bitops work */ - - /* - * idr allocated in-hierarchy ID. - * - * ID 0 is not used, the ID of the root cgroup is always 1, and a - * new cgroup will be assigned with a smallest available ID. - * - * Allocating/Removing ID must be protected by cgroup_mutex. - */ - int id; - - /* - * If this cgroup contains any tasks, it contributes one to - * populated_cnt. All children with non-zero popuplated_cnt of - * their own contribute one. The count is zero iff there's no task - * in this cgroup or its subtree. - */ - int populated_cnt; - - struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ - - /* - * The bitmask of subsystems enabled on the child cgroups. - * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. - */ - unsigned int subtree_control; - unsigned int child_subsys_mask; - - /* Private pointers for each registered subsystem */ - struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; - - struct cgroup_root *root; - - /* - * List of cgrp_cset_links pointing at css_sets with tasks in this - * cgroup. Protected by css_set_lock. - */ - struct list_head cset_links; - - /* - * On the default hierarchy, a css_set for a cgroup with some - * susbsys disabled will point to css's which are associated with - * the closest ancestor which has the subsys enabled. The - * following lists all css_sets which point to this cgroup's css - * for the given subsystem. - */ - struct list_head e_csets[CGROUP_SUBSYS_COUNT]; - - /* - * list of pidlists, up to two for each namespace (one for procs, one - * for tasks); created on demand. - */ - struct list_head pidlists; - struct mutex pidlist_mutex; - - /* used to wait for offlining of csses */ - wait_queue_head_t offline_waitq; - - /* used to schedule release agent */ - struct work_struct release_agent_work; -}; - -#define MAX_CGROUP_ROOT_NAMELEN 64 - -/* cgroup_root->flags */ -enum { - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ - CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ - CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ -}; - -/* - * A cgroup_root represents the root of a cgroup hierarchy, and may be - * associated with a kernfs_root to form an active hierarchy. This is - * internal to cgroup core. Don't access directly from controllers. - */ -struct cgroup_root { - struct kernfs_root *kf_root; - - /* The bitmask of subsystems attached to this hierarchy */ - unsigned int subsys_mask; - - /* Unique id for this hierarchy. */ - int hierarchy_id; - - /* The root cgroup. Root is destroyed on its release. */ - struct cgroup cgrp; - - /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ - atomic_t nr_cgrps; - - /* A list running through the active hierarchies */ - struct list_head root_list; - - /* Hierarchy-specific flags */ - unsigned int flags; - - /* IDs for cgroups in this hierarchy */ - struct idr cgroup_idr; - - /* The path to use for release notifications. */ - char release_agent_path[PATH_MAX]; - - /* The name for this hierarchy - may be empty */ - char name[MAX_CGROUP_ROOT_NAMELEN]; -}; - -/* - * A css_set is a structure holding pointers to a set of - * cgroup_subsys_state objects. This saves space in the task struct - * object and speeds up fork()/exit(), since a single inc/dec and a - * list_add()/del() can bump the reference count on the entire cgroup - * set for a task. - */ - -struct css_set { - - /* Reference count */ - atomic_t refcount; - - /* - * List running through all cgroup groups in the same hash - * slot. Protected by css_set_lock - */ - struct hlist_node hlist; - - /* - * Lists running through all tasks using this cgroup group. - * mg_tasks lists tasks which belong to this cset but are in the - * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to - * mg_tasks, it can be read safely while holding cgroup_mutex. - */ - struct list_head tasks; - struct list_head mg_tasks; - - /* - * List of cgrp_cset_links pointing at cgroups referenced from this - * css_set. Protected by css_set_lock. - */ - struct list_head cgrp_links; - - /* the default cgroup associated with this css_set */ - struct cgroup *dfl_cgrp; - - /* - * Set of subsystem states, one for each subsystem. This array is - * immutable after creation apart from the init_css_set during - * subsystem registration (at boot time). - */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - - /* - * List of csets participating in the on-going migration either as - * source or destination. Protected by cgroup_mutex. - */ - struct list_head mg_preload_node; - struct list_head mg_node; - - /* - * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. - */ - struct cgroup *mg_src_cgrp; - struct css_set *mg_dst_cset; - - /* - * On the default hierarhcy, ->subsys[ssid] may point to a css - * attached to an ancestor instead of the cgroup this css_set is - * associated with. The following node is anchored at - * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to - * iterate through all css's attached to a given cgroup. - */ - struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; - - /* For RCU-protected deletion */ - struct rcu_head rcu_head; -}; - -/* - * struct cftype: handler definitions for cgroup control files - * - * When reading/writing to a file: - * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata - * - the 'cftype' of the file is file->f_path.dentry->d_fsdata - */ - -/* cftype->flags */ -enum { - CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ - CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ - CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ - - /* internal flags, do not use outside cgroup core proper */ - __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ - __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ -}; - -#define MAX_CFTYPE_NAME 64 - -struct cftype { - /* - * By convention, the name should begin with the name of the - * subsystem, followed by a period. Zero length string indicates - * end of cftype array. - */ - char name[MAX_CFTYPE_NAME]; - int private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; - - /* - * The maximum length of string, excluding trailing nul, that can - * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. - */ - size_t max_write_len; - - /* CFTYPE_* flags */ - unsigned int flags; - - /* - * Fields used for internal bookkeeping. Initialized automatically - * during registration. - */ - struct cgroup_subsys *ss; /* NULL for cgroup core files */ - struct list_head node; /* anchored at ss->cfts */ - struct kernfs_ops *kf_ops; - - /* - * read_u64() is a shortcut for the common case of returning a - * single integer. Use it in place of read() - */ - u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); - /* - * read_s64() is a signed version of read_u64() - */ - s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); - - /* generic seq_file read interface */ - int (*seq_show)(struct seq_file *sf, void *v); - - /* optional ops, implement all or none */ - void *(*seq_start)(struct seq_file *sf, loff_t *ppos); - void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); - void (*seq_stop)(struct seq_file *sf, void *v); - - /* - * write_u64() is a shortcut for the common case of accepting - * a single integer (as parsed by simple_strtoull) from - * userspace. Use in place of write(); return 0 or error. - */ - int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, - u64 val); - /* - * write_s64() is a signed version of write_u64() - */ - int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, - s64 val); - - /* - * write() is the generic write callback which maps directly to - * kernfs write operation and overrides all other operations. - * Maximum write size is determined by ->max_write_len. Use - * of_css/cft() to access the associated css and cft. - */ - ssize_t (*write)(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lockdep_key; -#endif -}; - extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; @@ -612,11 +244,6 @@ int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -/* - * Control Group taskset, used to pass around set of tasks to cgroup_subsys - * methods. - */ -struct cgroup_taskset; struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); @@ -629,84 +256,6 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) -/* - * Control Group subsystem type. - * See Documentation/cgroups/cgroups.txt for details - */ - -struct cgroup_subsys { - struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); - int (*css_online)(struct cgroup_subsys_state *css); - void (*css_offline)(struct cgroup_subsys_state *css); - void (*css_released)(struct cgroup_subsys_state *css); - void (*css_free)(struct cgroup_subsys_state *css); - void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); - - int (*can_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); - void (*exit)(struct cgroup_subsys_state *css, - struct cgroup_subsys_state *old_css, - struct task_struct *task); - void (*bind)(struct cgroup_subsys_state *root_css); - - int disabled; - int early_init; - - /* - * If %false, this subsystem is properly hierarchical - - * configuration, resource accounting and restriction on a parent - * cgroup cover those of its children. If %true, hierarchy support - * is broken in some ways - some subsystems ignore hierarchy - * completely while others are only implemented half-way. - * - * It's now disallowed to create nested cgroups if the subsystem is - * broken and cgroup core will emit a warning message on such - * cases. Eventually, all subsystems will be made properly - * hierarchical and this will go away. - */ - bool broken_hierarchy; - bool warned_broken_hierarchy; - - /* the following two fields are initialized automtically during boot */ - int id; -#define MAX_CGROUP_TYPE_NAMELEN 32 - const char *name; - - /* link to parent, protected by cgroup_lock() */ - struct cgroup_root *root; - - /* idr for css->id */ - struct idr css_idr; - - /* - * List of cftypes. Each entry is the first entry of an array - * terminated by zero length name. - */ - struct list_head cfts; - - /* - * Base cftypes which are automatically registered. The two can - * point to the same array. - */ - struct cftype *dfl_cftypes; /* for the default hierarchy */ - struct cftype *legacy_cftypes; /* for the legacy hierarchies */ - - /* - * A subsystem may depend on other subsystems. When such subsystem - * is enabled on a cgroup, the depended-upon subsystems are enabled - * together if available. Subsystems enabled due to dependency are - * not visible to userland until explicitly enabled. The following - * specifies the mask of subsystems that this one depends on. - */ - unsigned int depends_on; -}; - #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include #undef SUBSYS From 2b65014b4a1083a8715f3425b292723d9729e3d0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Jan 2016 15:31:11 -0500 Subject: [PATCH 1876/2091] cgroup: make sure a parent css isn't offlined before its children [ Upstream commit aa226ff4a1ce79f229c6b7a4c0a14e17fececd01 ] There are three subsystem callbacks in css shutdown path - css_offline(), css_released() and css_free(). Except for css_released(), cgroup core didn't guarantee the order of invocation. css_offline() or css_free() could be called on a parent css before its children. This behavior is unexpected and led to bugs in cpu and memory controller. This patch updates offline path so that a parent css is never offlined before its children. Each css keeps online_cnt which reaches zero iff itself and all its children are offline and offline_css() is invoked only after online_cnt reaches zero. This fixes the memory controller bug and allows the fix for cpu controller. Signed-off-by: Tejun Heo Reported-and-tested-by: Christian Borntraeger Reported-by: Brian Christiansen Link: http://lkml.kernel.org/g/5698A023.9070703@de.ibm.com Link: http://lkml.kernel.org/g/CAKB58ikDkzc8REt31WBkD99+hxNzjK4+FBmhkgS+NVrC9vjMSg@mail.gmail.com Cc: Heiko Carstens Cc: Peter Zijlstra Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- include/linux/cgroup-defs.h | 6 ++++++ kernel/cgroup.c | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 55f3120fb9527..8d9c7e7a64323 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -115,6 +115,12 @@ struct cgroup_subsys_state { */ u64 serial_nr; + /* + * Incremented by online self and children. Used to guarantee that + * parents are not offlined before their children. + */ + atomic_t online_cnt; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4d65b66ae60d4..359da3abb004f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4481,6 +4481,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); css->serial_nr = css_serial_nr_next++; + atomic_set(&css->online_cnt, 0); if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); @@ -4503,6 +4504,10 @@ static int online_css(struct cgroup_subsys_state *css) if (!ret) { css->flags |= CSS_ONLINE; rcu_assign_pointer(css->cgroup->subsys[ss->id], css); + + atomic_inc(&css->online_cnt); + if (css->parent) + atomic_inc(&css->parent->online_cnt); } return ret; } @@ -4740,10 +4745,15 @@ static void css_killed_work_fn(struct work_struct *work) container_of(work, struct cgroup_subsys_state, destroy_work); mutex_lock(&cgroup_mutex); - offline_css(css); - mutex_unlock(&cgroup_mutex); - css_put(css); + do { + offline_css(css); + css_put(css); + /* @css can't go away while we're holding cgroup_mutex */ + css = css->parent; + } while (css && atomic_dec_and_test(&css->online_cnt)); + + mutex_unlock(&cgroup_mutex); } /* css kill confirmation processing requires process context, bounce */ @@ -4752,8 +4762,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); - INIT_WORK(&css->destroy_work, css_killed_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + if (atomic_dec_and_test(&css->online_cnt)) { + INIT_WORK(&css->destroy_work, css_killed_work_fn); + queue_work(cgroup_destroy_wq, &css->destroy_work); + } } /** From 1e7516689817b56f78dc60497a4dd7c80319417e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Jan 2016 10:08:00 -0600 Subject: [PATCH 1877/2091] PCI/AER: Flush workqueue on device remove to avoid use-after-free [ Upstream commit 4ae2182b1e3407de369f8c5d799543b7db74221b ] A Root Port's AER structure (rpc) contains a queue of events. aer_irq() enqueues AER status information and schedules aer_isr() to dequeue and process it. When we remove a device, aer_remove() waits for the queue to be empty, then frees the rpc struct. But aer_isr() references the rpc struct after dequeueing and possibly emptying the queue, which can cause a use-after-free error as in the following scenario with two threads, aer_isr() on the left and a concurrent aer_remove() on the right: Thread A Thread B -------- -------- aer_irq(): rpc->prod_idx++ aer_remove(): wait_event(rpc->prod_idx == rpc->cons_idx) # now blocked until queue becomes empty aer_isr(): # ... rpc->cons_idx++ # unblocked because queue is now empty ... kfree(rpc) mutex_unlock(&rpc->rpc_mutex) To prevent this problem, use flush_work() to wait until the last scheduled instance of aer_isr() has completed before freeing the rpc struct in aer_remove(). I reproduced this use-after-free by flashing a device FPGA and re-enumerating the bus to find the new device. With SLUB debug, this crashes with 0x6b bytes (POISON_FREE, the use-after-free magic number) in GPR25: pcieport 0000:00:00.0: AER: Multiple Corrected error received: id=0000 Unable to handle kernel paging request for data at address 0x27ef9e3e Workqueue: events aer_isr GPR24: dd6aa000 6b6b6b6b 605f8378 605f8360 d99b12c0 604fc674 606b1704 d99b12c0 NIP [602f5328] pci_walk_bus+0xd4/0x104 [bhelgaas: changelog, stable tag] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/pci/pcie/aer/aerdrv.c | 4 +--- drivers/pci/pcie/aer/aerdrv.h | 1 - drivers/pci/pcie/aer/aerdrv_core.c | 2 -- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 0bf82a20a0fb4..48d21e0edd568 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -262,7 +262,6 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) rpc->rpd = dev; INIT_WORK(&rpc->dpc_handler, aer_isr); mutex_init(&rpc->rpc_mutex); - init_waitqueue_head(&rpc->wait_release); /* Use PCIe bus function to store rpc into PCIe device */ set_service_data(dev, rpc); @@ -285,8 +284,7 @@ static void aer_remove(struct pcie_device *dev) if (rpc->isr) free_irq(dev->irq, dev); - wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx); - + flush_work(&rpc->dpc_handler); aer_disable_rootport(rpc); kfree(rpc); set_service_data(dev, NULL); diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 84420b7c9456e..945c939a86c5c 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -72,7 +72,6 @@ struct aer_rpc { * recovery on the same * root port hierarchy */ - wait_queue_head_t wait_release; }; struct aer_broadcast_data { diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 5653ea94547fc..b60a325234c52 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -784,8 +784,6 @@ void aer_isr(struct work_struct *work) while (get_e_source(rpc, &e_src)) aer_isr_one_error(p_device, &e_src); mutex_unlock(&rpc->rpc_mutex); - - wake_up(&rpc->wait_release); } /** From c77c01982668678a5fafe3036be4e757b7eddbaf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Jan 2016 15:13:05 -0500 Subject: [PATCH 1878/2091] libata: disable forced PORTS_IMPL for >= AHCI 1.3 [ Upstream commit 566d1827df2ef0cbe921d3d6946ac3007b1a6938 ] Some early controllers incorrectly reported zero ports in PORTS_IMPL register and the ahci driver fabricates PORTS_IMPL from the number of ports in those cases. This hasn't mattered but with the new nvme controllers there are cases where zero PORTS_IMPL is valid and should be honored. Disable the workaround for >= AHCI 1.3. Signed-off-by: Tejun Heo Reported-by: Andy Lutomirski Link: http://lkml.kernel.org/g/CALCETrU7yMvXEDhjAUShoHEhDwifJGapdw--BKxsP0jmjKGmRw@mail.gmail.com Cc: Sergei Shtylyov Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/ata/libahci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 287c4ba0219f7..49840264dd578 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -495,8 +495,8 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) } } - /* fabricate port_map from cap.nr_ports */ - if (!port_map) { + /* fabricate port_map from cap.nr_ports for < AHCI 1.3 */ + if (!port_map && vers < 0x10300) { port_map = (1 << ahci_nr_ports(cap)) - 1; dev_warn(dev, "forcing PORTS_IMPL to 0x%x\n", port_map); From f5fa85b4e599a45e6d056d44eb920eb42ed63849 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 16 Jan 2016 10:04:49 -0800 Subject: [PATCH 1879/2091] Input: vmmouse - fix absolute device registration [ Upstream commit d4f1b06d685d11ebdaccf11c0db1cb3c78736862 ] We should set device's capabilities first, and then register it, otherwise various handlers already present in the kernel will not be able to connect to the device. Reported-by: Lauri Kasanen Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/vmmouse.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c index e272f06258cef..a3f0f5a47490e 100644 --- a/drivers/input/mouse/vmmouse.c +++ b/drivers/input/mouse/vmmouse.c @@ -458,8 +458,6 @@ int vmmouse_init(struct psmouse *psmouse) priv->abs_dev = abs_dev; psmouse->private = priv; - input_set_capability(rel_dev, EV_REL, REL_WHEEL); - /* Set up and register absolute device */ snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys); @@ -475,10 +473,6 @@ int vmmouse_init(struct psmouse *psmouse) abs_dev->id.version = psmouse->model; abs_dev->dev.parent = &psmouse->ps2dev.serio->dev; - error = input_register_device(priv->abs_dev); - if (error) - goto init_fail; - /* Set absolute device capabilities */ input_set_capability(abs_dev, EV_KEY, BTN_LEFT); input_set_capability(abs_dev, EV_KEY, BTN_RIGHT); @@ -488,6 +482,13 @@ int vmmouse_init(struct psmouse *psmouse) input_set_abs_params(abs_dev, ABS_X, 0, VMMOUSE_MAX_X, 0, 0); input_set_abs_params(abs_dev, ABS_Y, 0, VMMOUSE_MAX_Y, 0, 0); + error = input_register_device(priv->abs_dev); + if (error) + goto init_fail; + + /* Add wheel capability to the relative device */ + input_set_capability(rel_dev, EV_REL, REL_WHEEL); + psmouse->protocol_handler = vmmouse_process_byte; psmouse->disconnect = vmmouse_disconnect; psmouse->reconnect = vmmouse_reconnect; From ea116368a3680fc4ea10196736adae2109f96301 Mon Sep 17 00:00:00 2001 From: Swapnil Nagle Date: Tue, 14 Jul 2015 16:00:43 -0400 Subject: [PATCH 1880/2091] qla2xxx: cleanup cmd in qla workqueue before processing TMR [ Upstream commit 8b2f5ff3d05c2c48b722c3cc67b8226f1601042b ] Since cmds go into qla_tgt_wq and TMRs don't, it's possible that TMR like TASK_ABORT can be queued over the cmd for which it was meant. To avoid this race, use a per-port list to keep track of cmds that are enqueued to qla_tgt_wq but not yet processed. When a TMR arrives, iterate through this list and remove any cmds that match the TMR. This patch supports TASK_ABORT and LUN_RESET. Cc: # v3.18+ Signed-off-by: Swapnil Nagle Signed-off-by: Alexei Potashnik Acked-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_def.h | 5 ++ drivers/scsi/qla2xxx/qla_os.c | 3 + drivers/scsi/qla2xxx/qla_target.c | 123 +++++++++++++++++++++++++++-- drivers/scsi/qla2xxx/qla_target.h | 12 +++ drivers/scsi/qla2xxx/tcm_qla2xxx.c | 8 +- 6 files changed, 140 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index e9ae6b924c705..e63aa0780b528 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -68,7 +68,7 @@ * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | * | Target Mode | 0xe079 | | - * | Target Mode Management | 0xf080 | 0xf002 | + * | Target Mode Management | 0xf083 | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000b | | * ---------------------------------------------------------------------- diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index e86201d3b8c6d..9340c7f741ad9 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3579,6 +3579,11 @@ typedef struct scsi_qla_host { uint16_t fcoe_fcf_idx; uint8_t fcoe_vn_port_mac[6]; + /* list of commands waiting on workqueue */ + struct list_head qla_cmd_list; + struct list_head qla_sess_op_cmd_list; + spinlock_t cmd_list_lock; + uint32_t vp_abort_cnt; struct fc_vport *fc_vport; /* holds fc_vport * for each vport */ diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7462dd70b1506..abfea56296630 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3763,8 +3763,11 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, INIT_LIST_HEAD(&vha->vp_fcports); INIT_LIST_HEAD(&vha->work_list); INIT_LIST_HEAD(&vha->list); + INIT_LIST_HEAD(&vha->qla_cmd_list); + INIT_LIST_HEAD(&vha->qla_sess_op_cmd_list); spin_lock_init(&vha->work_lock); + spin_lock_init(&vha->cmd_list_lock); sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no); ql_dbg(ql_dbg_init, vha, 0x0041, diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 496a733d0ca38..73a21322505c9 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1170,6 +1170,70 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha, FCP_TMF_CMPL, true); } +static int abort_cmd_for_tag(struct scsi_qla_host *vha, uint32_t tag) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + + spin_lock(&vha->cmd_list_lock); + + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + if (tag == op->atio.u.isp24.exchange_addr) { + op->aborted = true; + spin_unlock(&vha->cmd_list_lock); + return 1; + } + } + + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + if (tag == cmd->atio.u.isp24.exchange_addr) { + cmd->state = QLA_TGT_STATE_ABORTED; + spin_unlock(&vha->cmd_list_lock); + return 1; + } + } + + spin_unlock(&vha->cmd_list_lock); + return 0; +} + +/* drop cmds for the given lun + * XXX only looks for cmds on the port through which lun reset was recieved + * XXX does not go through the list of other port (which may have cmds + * for the same lun) + */ +static void abort_cmds_for_lun(struct scsi_qla_host *vha, + uint32_t lun, uint8_t *s_id) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + uint32_t key; + + key = sid_to_key(s_id); + spin_lock(&vha->cmd_list_lock); + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + uint32_t op_key; + uint32_t op_lun; + + op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id); + op_lun = scsilun_to_int( + (struct scsi_lun *)&op->atio.u.isp24.fcp_cmnd.lun); + if (op_key == key && op_lun == lun) + op->aborted = true; + } + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + uint32_t cmd_key; + uint32_t cmd_lun; + + cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); + cmd_lun = scsilun_to_int( + (struct scsi_lun *)&cmd->atio.u.isp24.fcp_cmnd.lun); + if (cmd_key == key && cmd_lun == lun) + cmd->state = QLA_TGT_STATE_ABORTED; + } + spin_unlock(&vha->cmd_list_lock); +} + /* ha->hardware_lock supposed to be held on entry */ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, struct abts_recv_from_24xx *abts, struct qla_tgt_sess *sess) @@ -1194,8 +1258,19 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, } spin_unlock(&se_sess->sess_cmd_lock); - if (!found_lun) - return -ENOENT; + /* cmd not in LIO lists, look in qla list */ + if (!found_lun) { + if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) { + /* send TASK_ABORT response immediately */ + qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_CMPL, false); + return 0; + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf081, + "unable to find cmd in driver or LIO for tag 0x%x\n", + abts->exchange_addr_to_abort); + return -ENOENT; + } + } ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00f, "qla_target(%d): task abort (tag=%d)\n", @@ -3265,6 +3340,13 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) if (tgt->tgt_stop) goto out_term; + if (cmd->state == QLA_TGT_STATE_ABORTED) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf082, + "cmd with tag %u is aborted\n", + cmd->atio.u.isp24.exchange_addr); + goto out_term; + } + cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( @@ -3318,6 +3400,12 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) static void qlt_do_work(struct work_struct *work) { struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); + scsi_qla_host_t *vha = cmd->vha; + unsigned long flags; + + spin_lock_irqsave(&vha->cmd_list_lock, flags); + list_del(&cmd->cmd_list); + spin_unlock_irqrestore(&vha->cmd_list_lock, flags); __qlt_do_work(cmd); } @@ -3369,14 +3457,25 @@ static void qlt_create_sess_from_atio(struct work_struct *work) unsigned long flags; uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id; + spin_lock_irqsave(&vha->cmd_list_lock, flags); + list_del(&op->cmd_list); + spin_unlock_irqrestore(&vha->cmd_list_lock, flags); + + if (op->aborted) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf083, + "sess_op with tag %u is aborted\n", + op->atio.u.isp24.exchange_addr); + goto out_term; + } + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022, - "qla_target(%d): Unable to find wwn login" - " (s_id %x:%x:%x), trying to create it manually\n", - vha->vp_idx, s_id[0], s_id[1], s_id[2]); + "qla_target(%d): Unable to find wwn login" + " (s_id %x:%x:%x), trying to create it manually\n", + vha->vp_idx, s_id[0], s_id[1], s_id[2]); if (op->atio.u.raw.entry_count > 1) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023, - "Dropping multy entry atio %p\n", &op->atio); + "Dropping multy entry atio %p\n", &op->atio); goto out_term; } @@ -3441,6 +3540,11 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, memcpy(&op->atio, atio, sizeof(*atio)); op->vha = vha; + + spin_lock(&vha->cmd_list_lock); + list_add_tail(&op->cmd_list, &vha->qla_sess_op_cmd_list); + spin_unlock(&vha->cmd_list_lock); + INIT_WORK(&op->work, qlt_create_sess_from_atio); queue_work(qla_tgt_wq, &op->work); return 0; @@ -3460,6 +3564,11 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, cmd->cmd_in_wq = 1; cmd->cmd_flags |= BIT_0; + + spin_lock(&vha->cmd_list_lock); + list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list); + spin_unlock(&vha->cmd_list_lock); + INIT_WORK(&cmd->work, qlt_do_work); queue_work(qla_tgt_wq, &cmd->work); return 0; @@ -3473,6 +3582,7 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, struct scsi_qla_host *vha = sess->vha; struct qla_hw_data *ha = vha->hw; struct qla_tgt_mgmt_cmd *mcmd; + struct atio_from_isp *a = (struct atio_from_isp *)iocb; int res; uint8_t tmr_func; @@ -3513,6 +3623,7 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, ql_dbg(ql_dbg_tgt_tmr, vha, 0x10002, "qla_target(%d): LUN_RESET received\n", sess->vha->vp_idx); tmr_func = TMR_LUN_RESET; + abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id); break; case QLA_TGT_CLEAR_TS: diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 332086776dfe9..e5304639f2432 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -874,6 +874,8 @@ struct qla_tgt_sess_op { struct scsi_qla_host *vha; struct atio_from_isp atio; struct work_struct work; + struct list_head cmd_list; + bool aborted; }; /* @@ -1074,6 +1076,16 @@ static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha) ha->host->active_mode |= MODE_INITIATOR; } +static inline uint32_t sid_to_key(const uint8_t *s_id) +{ + uint32_t key; + + key = (((unsigned long)s_id[0] << 16) | + ((unsigned long)s_id[1] << 8) | + (unsigned long)s_id[2]); + return key; +} + /* * Exported symbols from qla_target.c LLD logic used by qla2xxx code.. */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 5c9e680aa375a..e736bde1aba00 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1323,9 +1323,7 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id( return NULL; } - key = (((unsigned long)s_id[0] << 16) | - ((unsigned long)s_id[1] << 8) | - (unsigned long)s_id[2]); + key = sid_to_key(s_id); pr_debug("find_sess_by_s_id: 0x%06x\n", key); se_nacl = btree_lookup32(&lport->lport_fcport_map, key); @@ -1360,9 +1358,7 @@ static void tcm_qla2xxx_set_sess_by_s_id( void *slot; int rc; - key = (((unsigned long)s_id[0] << 16) | - ((unsigned long)s_id[1] << 8) | - (unsigned long)s_id[2]); + key = sid_to_key(s_id); pr_debug("set_sess_by_s_id: %06x\n", key); slot = btree_lookup32(&lport->lport_fcport_map, key); From f89ace487f87aa90b7426ec94872cc38ee11fd1e Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Tue, 14 Jul 2015 16:00:44 -0400 Subject: [PATCH 1881/2091] qla2xxx: delay plogi/prli ack until existing sessions are deleted [ Upstream commit a6ca88780dd66b0700d89419abd17b6b4bb49483 ] - keep qla_tgt_sess object on the session list until it's freed - modify use of sess->deleted flag to differentiate delayed session deletion that can be cancelled from irreversible one: QLA_SESS_DELETION_PENDING vs QLA_SESS_DELETION_IN_PROGRESS - during IN_PROGRESS deletion all newly arrived commands and TMRs will be rejected, existing commands and TMRs will be terminated when given by the core to the fabric or simply dropped if session logout has already happened (logout terminates all existing exchanges) - new PLOGI will initiate deletion of the following sessions (unless deletion is already IN_PROGRESS): - with the same port_name (with logout) - different port_name, different loop_id but the same port_id (with logout) - different port_name, different port_id, but the same loop_id (without logout) - additionally each new PLOGI will store imm notify iocb in the same port_name session being deleted. When deletion process completes this iocb will be acked. Only the most recent PLOGI iocb is stored. The older ones will be terminated when replaced. - new PRLI will initiate deletion of the following sessions (unless deletion is already IN_PROGRESS): - different port_name, different port_id, but the same loop_id (without logout) Cc: # v3.18+ Signed-off-by: Alexei Potashnik Acked-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_dbg.c | 6 +- drivers/scsi/qla2xxx/qla_def.h | 2 + drivers/scsi/qla2xxx/qla_init.c | 7 +- drivers/scsi/qla2xxx/qla_iocb.c | 3 + drivers/scsi/qla2xxx/qla_target.c | 438 +++++++++++++++++++++++++++-- drivers/scsi/qla2xxx/qla_target.h | 43 ++- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 4 + 7 files changed, 481 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index e63aa0780b528..05793b7199a4f 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -67,10 +67,10 @@ * | | | 0xd031-0xd0ff | * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | - * | Target Mode | 0xe079 | | - * | Target Mode Management | 0xf083 | 0xf002 | + * | Target Mode | 0xe080 | | + * | Target Mode Management | 0xf091 | 0xf002 | * | | | 0xf046-0xf049 | - * | Target Mode Task Management | 0x1000b | | + * | Target Mode Task Management | 0x1000d | | * ---------------------------------------------------------------------- */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9340c7f741ad9..54c201d06ba2f 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -274,6 +274,7 @@ #define RESPONSE_ENTRY_CNT_FX00 256 /* Number of response entries.*/ struct req_que; +struct qla_tgt_sess; /* * (sd.h is not exported, hence local inclusion) @@ -2026,6 +2027,7 @@ typedef struct fc_port { uint16_t port_id; unsigned long retry_delay_timestamp; + struct qla_tgt_sess *tgt_session; } fc_port_t; #include "qla_mr.h" diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 998498e2341b3..ec179aa358807 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -115,6 +115,8 @@ qla2x00_async_iocb_timeout(void *data) QLA_LOGIO_LOGIN_RETRIED : 0; qla2x00_post_async_login_done_work(fcport->vha, fcport, lio->u.logio.data); + } else if (sp->type == SRB_LOGOUT_CMD) { + qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT); } } @@ -497,7 +499,10 @@ void qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport, uint16_t *data) { - qla2x00_mark_device_lost(vha, fcport, 1, 0); + /* Don't re-login in target mode */ + if (!fcport->tgt_session) + qla2x00_mark_device_lost(vha, fcport, 1, 0); + qlt_logo_completion_handler(fcport, data[0]); return; } diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index a1ab25fca8742..dc96f31a88315 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1943,6 +1943,9 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio) logio->entry_type = LOGINOUT_PORT_IOCB_TYPE; logio->control_flags = cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO); + if (!sp->fcport->tgt_session || + !sp->fcport->tgt_session->keep_nport_handle) + logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT); logio->nport_handle = cpu_to_le16(sp->fcport->loop_id); logio->port_id[0] = sp->fcport->d_id.b.al_pa; logio->port_id[1] = sp->fcport->d_id.b.area; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 73a21322505c9..e3e4f0a142ac4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -114,6 +114,10 @@ static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, struct atio_from_isp *atio, uint16_t status, int qfull); static void qlt_disable_vha(struct scsi_qla_host *vha); static void qlt_clear_tgt_db(struct qla_tgt *tgt); +static void qlt_send_notify_ack(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *ntfy, + uint32_t add_flags, uint16_t resp_code, int resp_code_valid, + uint16_t srr_flags, uint16_t srr_reject_code, uint8_t srr_explan); /* * Global Variables */ @@ -382,14 +386,73 @@ static void qlt_free_session_done(struct work_struct *work) struct qla_tgt *tgt = sess->tgt; struct scsi_qla_host *vha = sess->vha; struct qla_hw_data *ha = vha->hw; + unsigned long flags; + bool logout_started = false; + fc_port_t fcport; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084, + "%s: se_sess %p / sess %p from port %8phC loop_id %#04x" + " s_id %02x:%02x:%02x logout %d keep %d plogi %d\n", + __func__, sess->se_sess, sess, sess->port_name, sess->loop_id, + sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa, + sess->logout_on_delete, sess->keep_nport_handle, + sess->plogi_ack_needed); BUG_ON(!tgt); + + if (sess->logout_on_delete) { + int rc; + + memset(&fcport, 0, sizeof(fcport)); + fcport.loop_id = sess->loop_id; + fcport.d_id = sess->s_id; + memcpy(fcport.port_name, sess->port_name, WWN_SIZE); + fcport.vha = vha; + fcport.tgt_session = sess; + + rc = qla2x00_post_async_logout_work(vha, &fcport, NULL); + if (rc != QLA_SUCCESS) + ql_log(ql_log_warn, vha, 0xf085, + "Schedule logo failed sess %p rc %d\n", + sess, rc); + else + logout_started = true; + } + /* * Release the target session for FC Nexus from fabric module code. */ if (sess->se_sess != NULL) ha->tgt.tgt_ops->free_session(sess); + if (logout_started) { + bool traced = false; + + while (!ACCESS_ONCE(sess->logout_completed)) { + if (!traced) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf086, + "%s: waiting for sess %p logout\n", + __func__, sess); + traced = true; + } + msleep(100); + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf087, + "%s: sess %p logout completed\n", + __func__, sess); + } + + spin_lock_irqsave(&ha->hardware_lock, flags); + + if (sess->plogi_ack_needed) + qlt_send_notify_ack(vha, &sess->tm_iocb, + 0, 0, 0, 0, 0, 0); + + list_del(&sess->sess_list_entry); + + spin_unlock_irqrestore(&ha->hardware_lock, flags); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001, "Unregistration of sess %p finished\n", sess); @@ -410,9 +473,9 @@ void qlt_unreg_sess(struct qla_tgt_sess *sess) vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess); - list_del(&sess->sess_list_entry); - if (sess->deleted) - list_del(&sess->del_list_entry); + if (!list_empty(&sess->del_list_entry)) + list_del_init(&sess->del_list_entry); + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; INIT_WORK(&sess->free_work, qlt_free_session_done); schedule_work(&sess->free_work); @@ -490,27 +553,36 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess, struct qla_tgt *tgt = sess->tgt; uint32_t dev_loss_tmo = tgt->ha->port_down_retry_count + 5; - if (sess->deleted) - return; + if (sess->deleted) { + /* Upgrade to unconditional deletion in case it was temporary */ + if (immediate && sess->deleted == QLA_SESS_DELETION_PENDING) + list_del(&sess->del_list_entry); + else + return; + } ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, "Scheduling sess %p for deletion\n", sess); - list_add_tail(&sess->del_list_entry, &tgt->del_sess_list); - sess->deleted = 1; - if (immediate) + if (immediate) { dev_loss_tmo = 0; + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; + list_add(&sess->del_list_entry, &tgt->del_sess_list); + } else { + sess->deleted = QLA_SESS_DELETION_PENDING; + list_add_tail(&sess->del_list_entry, &tgt->del_sess_list); + } sess->expires = jiffies + dev_loss_tmo * HZ; ql_dbg(ql_dbg_tgt, sess->vha, 0xe048, "qla_target(%d): session for port %8phC (loop ID %d) scheduled for " - "deletion in %u secs (expires: %lu) immed: %d\n", + "deletion in %u secs (expires: %lu) immed: %d, logout: %d\n", sess->vha->vp_idx, sess->port_name, sess->loop_id, dev_loss_tmo, - sess->expires, immediate); + sess->expires, immediate, sess->logout_on_delete); if (immediate) - schedule_delayed_work(&tgt->sess_del_work, 0); + mod_delayed_work(system_wq, &tgt->sess_del_work, 0); else schedule_delayed_work(&tgt->sess_del_work, sess->expires - jiffies); @@ -579,9 +651,9 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id, /* ha->hardware_lock supposed to be held on entry */ static void qlt_undelete_sess(struct qla_tgt_sess *sess) { - BUG_ON(!sess->deleted); + BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING); - list_del(&sess->del_list_entry); + list_del_init(&sess->del_list_entry); sess->deleted = 0; } @@ -600,7 +672,9 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) del_list_entry); elapsed = jiffies; if (time_after_eq(elapsed, sess->expires)) { - qlt_undelete_sess(sess); + /* No turning back */ + list_del_init(&sess->del_list_entry); + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, "Timeout: sess %p about to be deleted\n", @@ -644,6 +718,13 @@ static struct qla_tgt_sess *qlt_create_sess( fcport->d_id.b.al_pa, fcport->d_id.b.area, fcport->loop_id); + /* Cannot undelete at this point */ + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + spin_unlock_irqrestore(&ha->hardware_lock, + flags); + return NULL; + } + if (sess->deleted) qlt_undelete_sess(sess); @@ -674,6 +755,14 @@ static struct qla_tgt_sess *qlt_create_sess( sess->s_id = fcport->d_id; sess->loop_id = fcport->loop_id; sess->local = local; + INIT_LIST_HEAD(&sess->del_list_entry); + + /* Under normal circumstances we want to logout from firmware when + * session eventually ends and release corresponding nport handle. + * In the exception cases (e.g. when new PLOGI is waiting) corresponding + * code will adjust these flags as necessary. */ + sess->logout_on_delete = 1; + sess->keep_nport_handle = 0; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006, "Adding sess %p to tgt %p via ->check_initiator_node_acl()\n", @@ -751,6 +840,10 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) mutex_unlock(&vha->vha_tgt.tgt_mutex); spin_lock_irqsave(&ha->hardware_lock, flags); + } else if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + /* Point of no return */ + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return; } else { kref_get(&sess->se_sess->sess_kref); @@ -2373,6 +2466,19 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, unsigned long flags = 0; int res; + spin_lock_irqsave(&ha->hardware_lock, flags); + if (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + cmd->state = QLA_TGT_STATE_PROCESSED; + if (cmd->sess->logout_completed) + /* no need to terminate. FW already freed exchange. */ + qlt_abort_cmd_on_host_reset(cmd->vha, cmd); + else + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return 0; + } + spin_unlock_irqrestore(&ha->hardware_lock, flags); + memset(&prm, 0, sizeof(prm)); qlt_check_srr_debug(cmd, &xmit_type); @@ -2534,7 +2640,8 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&ha->hardware_lock, flags); - if (qla2x00_reset_active(vha) || cmd->reset_count != ha->chip_reset) { + if (qla2x00_reset_active(vha) || (cmd->reset_count != ha->chip_reset) || + (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)) { /* * Either a chip reset is active or this request was from * previous life, just abort the processing. @@ -2725,6 +2832,89 @@ qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, } +/* If hardware_lock held on entry, might drop it, then reaquire */ +/* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ +static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *ntfy) +{ + struct nack_to_isp *nack; + struct qla_hw_data *ha = vha->hw; + request_t *pkt; + int ret = 0; + + ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c, + "Sending TERM ELS CTIO (ha=%p)\n", ha); + + pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL); + if (pkt == NULL) { + ql_dbg(ql_dbg_tgt, vha, 0xe080, + "qla_target(%d): %s failed: unable to allocate " + "request packet\n", vha->vp_idx, __func__); + return -ENOMEM; + } + + pkt->entry_type = NOTIFY_ACK_TYPE; + pkt->entry_count = 1; + pkt->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; + + nack = (struct nack_to_isp *)pkt; + nack->ox_id = ntfy->ox_id; + + nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle; + if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) { + nack->u.isp24.flags = ntfy->u.isp24.flags & + __constant_cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB); + } + + /* terminate */ + nack->u.isp24.flags |= + __constant_cpu_to_le16(NOTIFY_ACK_FLAGS_TERMINATE); + + nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; + nack->u.isp24.status = ntfy->u.isp24.status; + nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode; + nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle; + nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address; + nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs; + nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui; + nack->u.isp24.vp_index = ntfy->u.isp24.vp_index; + + qla2x00_start_iocbs(vha, vha->req); + return ret; +} + +static void qlt_send_term_imm_notif(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *imm, int ha_locked) +{ + unsigned long flags = 0; + int rc; + + if (qlt_issue_marker(vha, ha_locked) < 0) + return; + + if (ha_locked) { + rc = __qlt_send_term_imm_notif(vha, imm); + +#if 0 /* Todo */ + if (rc == -ENOMEM) + qlt_alloc_qfull_cmd(vha, imm, 0, 0); +#endif + goto done; + } + + spin_lock_irqsave(&vha->hw->hardware_lock, flags); + rc = __qlt_send_term_imm_notif(vha, imm); + +#if 0 /* Todo */ + if (rc == -ENOMEM) + qlt_alloc_qfull_cmd(vha, imm, 0, 0); +#endif + +done: + if (!ha_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); +} + /* If hardware_lock held on entry, might drop it, then reaquire */ /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, @@ -3777,22 +3967,237 @@ static int qlt_abort_task(struct scsi_qla_host *vha, return __qlt_abort_task(vha, iocb, sess); } +void qlt_logo_completion_handler(fc_port_t *fcport, int rc) +{ + if (fcport->tgt_session) { + if (rc != MBS_COMMAND_COMPLETE) { + ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf088, + "%s: se_sess %p / sess %p from" + " port %8phC loop_id %#04x s_id %02x:%02x:%02x" + " LOGO failed: %#x\n", + __func__, + fcport->tgt_session->se_sess, + fcport->tgt_session, + fcport->port_name, fcport->loop_id, + fcport->d_id.b.domain, fcport->d_id.b.area, + fcport->d_id.b.al_pa, rc); + } + + fcport->tgt_session->logout_completed = 1; + } +} + +static void qlt_swap_imm_ntfy_iocb(struct imm_ntfy_from_isp *a, + struct imm_ntfy_from_isp *b) +{ + struct imm_ntfy_from_isp tmp; + memcpy(&tmp, a, sizeof(struct imm_ntfy_from_isp)); + memcpy(a, b, sizeof(struct imm_ntfy_from_isp)); + memcpy(b, &tmp, sizeof(struct imm_ntfy_from_isp)); +} + +/* +* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) +* +* Schedules sessions with matching port_id/loop_id but different wwn for +* deletion. Returns existing session with matching wwn if present. +* Null otherwise. +*/ +static struct qla_tgt_sess * +qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn, + port_id_t port_id, uint16_t loop_id) +{ + struct qla_tgt_sess *sess = NULL, *other_sess; + uint64_t other_wwn; + + list_for_each_entry(other_sess, &tgt->sess_list, sess_list_entry) { + + other_wwn = wwn_to_u64(other_sess->port_name); + + if (wwn == other_wwn) { + WARN_ON(sess); + sess = other_sess; + continue; + } + + /* find other sess with nport_id collision */ + if (port_id.b24 == other_sess->s_id.b24) { + if (loop_id != other_sess->loop_id) { + ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c, + "Invalidating sess %p loop_id %d wwn %llx.\n", + other_sess, other_sess->loop_id, other_wwn); + + /* + * logout_on_delete is set by default, but another + * session that has the same s_id/loop_id combo + * might have cleared it when requested this session + * deletion, so don't touch it + */ + qlt_schedule_sess_for_deletion(other_sess, true); + } else { + /* + * Another wwn used to have our s_id/loop_id + * combo - kill the session, but don't log out + */ + sess->logout_on_delete = 0; + qlt_schedule_sess_for_deletion(other_sess, + true); + } + continue; + } + + /* find other sess with nport handle collision */ + if (loop_id == other_sess->loop_id) { + ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000d, + "Invalidating sess %p loop_id %d wwn %llx.\n", + other_sess, other_sess->loop_id, other_wwn); + + /* Same loop_id but different s_id + * Ok to kill and logout */ + qlt_schedule_sess_for_deletion(other_sess, true); + } + } + + return sess; +} + /* * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire */ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, struct imm_ntfy_from_isp *iocb) { + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_tgt_sess *sess = NULL; + uint64_t wwn; + port_id_t port_id; + uint16_t loop_id; + uint16_t wd3_lo; int res = 0; + wwn = wwn_to_u64(iocb->u.isp24.port_name); + + port_id.b.domain = iocb->u.isp24.port_id[2]; + port_id.b.area = iocb->u.isp24.port_id[1]; + port_id.b.al_pa = iocb->u.isp24.port_id[0]; + port_id.b.rsvd_1 = 0; + + loop_id = le16_to_cpu(iocb->u.isp24.nport_handle); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf026, "qla_target(%d): Port ID: 0x%3phC ELS opcode: 0x%02x\n", vha->vp_idx, iocb->u.isp24.port_id, iocb->u.isp24.status_subcode); + /* res = 1 means ack at the end of thread + * res = 0 means ack async/later. + */ switch (iocb->u.isp24.status_subcode) { case ELS_PLOGI: - case ELS_FLOGI: + + if (wwn) + sess = qlt_find_sess_invalidate_other(tgt, wwn, + port_id, loop_id); + + if (!sess || IS_SW_RESV_ADDR(sess->s_id)) { + res = 1; + break; + } + + if (sess->plogi_ack_needed) { + /* + * Initiator sent another PLOGI before last PLOGI could + * finish. Swap plogi iocbs and terminate old one + * without acking, new one will get acked when session + * deletion completes. + */ + ql_log(ql_log_warn, sess->vha, 0xf089, + "sess %p received double plogi.\n", sess); + + qlt_swap_imm_ntfy_iocb(iocb, &sess->tm_iocb); + + qlt_send_term_imm_notif(vha, iocb, 1); + + res = 0; + break; + } + + res = 0; + + /* + * Save immediate Notif IOCB for Ack when sess is done + * and being deleted. + */ + memcpy(&sess->tm_iocb, iocb, sizeof(sess->tm_iocb)); + sess->plogi_ack_needed = 1; + + /* + * Under normal circumstances we want to release nport handle + * during LOGO process to avoid nport handle leaks inside FW. + * The exception is when LOGO is done while another PLOGI with + * the same nport handle is waiting as might be the case here. + * Note: there is always a possibily of a race where session + * deletion has already started for other reasons (e.g. ACL + * removal) and now PLOGI arrives: + * 1. if PLOGI arrived in FW after nport handle has been freed, + * FW must have assigned this PLOGI a new/same handle and we + * can proceed ACK'ing it as usual when session deletion + * completes. + * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT + * bit reached it, the handle has now been released. We'll + * get an error when we ACK this PLOGI. Nothing will be sent + * back to initiator. Initiator should eventually retry + * PLOGI and situation will correct itself. + */ + sess->keep_nport_handle = ((sess->loop_id == loop_id) && + (sess->s_id.b24 == port_id.b24)); + qlt_schedule_sess_for_deletion(sess, true); + break; + case ELS_PRLI: + wd3_lo = le16_to_cpu(iocb->u.isp24.u.prli.wd3_lo); + + if (wwn) + sess = qlt_find_sess_invalidate_other(tgt, wwn, port_id, + loop_id); + + if (sess != NULL) { + if (sess->deleted) { + /* + * Impatient initiator sent PRLI before last + * PLOGI could finish. Will force him to re-try, + * while last one finishes. + */ + ql_log(ql_log_warn, sess->vha, 0xf090, + "sess %p PRLI received, before plogi ack.\n", + sess); + qlt_send_term_imm_notif(vha, iocb, 1); + res = 0; + break; + } + + /* + * This shouldn't happen under normal circumstances, + * since we have deleted the old session during PLOGI + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf091, + "PRLI (loop_id %#04x) for existing sess %p (loop_id %#04x)\n", + sess->loop_id, sess, iocb->u.isp24.nport_handle); + + sess->local = 0; + sess->loop_id = loop_id; + sess->s_id = port_id; + + if (wd3_lo & BIT_7) + sess->conf_compl_supported = 1; + + res = 1; + } else { + /* todo: else - create sess here. */ + res = 1; /* send notify ack */ + } + + break; + case ELS_LOGO: case ELS_PRLO: res = qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS_SESS); @@ -3810,6 +4215,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, break; } + case ELS_FLOGI: /* should never happen */ default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf061, "qla_target(%d): Unsupported ELS command %x " diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index e5304639f2432..13b3b0d9b8199 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -167,7 +167,24 @@ struct imm_ntfy_from_isp { uint32_t srr_rel_offs; uint16_t srr_ui; uint16_t srr_ox_id; - uint8_t reserved_4[19]; + union { + struct { + uint8_t node_name[8]; + } plogi; /* PLOGI/ADISC/PDISC */ + struct { + /* PRLI word 3 bit 0-15 */ + uint16_t wd3_lo; + uint8_t resv0[6]; + } prli; + struct { + uint8_t port_id[3]; + uint8_t resv1; + uint16_t nport_handle; + uint16_t resv2; + } req_els; + } u; + uint8_t port_name[8]; + uint8_t resv3[3]; uint8_t vp_index; uint32_t reserved_5; uint8_t port_id[3]; @@ -234,6 +251,7 @@ struct nack_to_isp { uint8_t reserved[2]; uint16_t ox_id; } __packed; +#define NOTIFY_ACK_FLAGS_TERMINATE BIT_3 #define NOTIFY_ACK_SRR_FLAGS_ACCEPT 0 #define NOTIFY_ACK_SRR_FLAGS_REJECT 1 @@ -878,6 +896,13 @@ struct qla_tgt_sess_op { bool aborted; }; +enum qla_sess_deletion { + QLA_SESS_DELETION_NONE = 0, + QLA_SESS_DELETION_PENDING = 1, /* hopefully we can get rid of + * this one */ + QLA_SESS_DELETION_IN_PROGRESS = 2, +}; + /* * Equivilant to IT Nexus (Initiator-Target) */ @@ -886,8 +911,13 @@ struct qla_tgt_sess { port_id_t s_id; unsigned int conf_compl_supported:1; - unsigned int deleted:1; + unsigned int deleted:2; unsigned int local:1; + unsigned int logout_on_delete:1; + unsigned int plogi_ack_needed:1; + unsigned int keep_nport_handle:1; + + unsigned char logout_completed; struct se_session *se_sess; struct scsi_qla_host *vha; @@ -899,6 +929,10 @@ struct qla_tgt_sess { uint8_t port_name[WWN_SIZE]; struct work_struct free_work; + + union { + struct imm_ntfy_from_isp tm_iocb; + }; }; struct qla_tgt_cmd { @@ -1029,6 +1063,10 @@ struct qla_tgt_srr_ctio { struct qla_tgt_cmd *cmd; }; +/* Check for Switch reserved address */ +#define IS_SW_RESV_ADDR(_s_id) \ + ((_s_id.b.domain == 0xff) && (_s_id.b.area == 0xfc)) + #define QLA_TGT_XMIT_DATA 1 #define QLA_TGT_XMIT_STATUS 2 #define QLA_TGT_XMIT_ALL (QLA_TGT_XMIT_STATUS|QLA_TGT_XMIT_DATA) @@ -1122,5 +1160,6 @@ extern void qlt_stop_phase2(struct qla_tgt *); extern irqreturn_t qla83xx_msix_atio_q(int, void *); extern void qlt_83xx_iospace_config(struct qla_hw_data *); extern int qlt_free_qfull_cmds(struct scsi_qla_host *); +extern void qlt_logo_completion_handler(fc_port_t *, int); #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index e736bde1aba00..12b58efb126bc 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1714,6 +1714,10 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id, } sess->conf_compl_supported = conf_compl_supported; + + /* Reset logout parameters to default */ + sess->logout_on_delete = 1; + sess->keep_nport_handle = 0; } /* From 772d56fecb6dde547473cf7b556f457555897da7 Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Tue, 14 Jul 2015 16:00:48 -0400 Subject: [PATCH 1882/2091] qla2xxx: drop cmds/tmrs arrived while session is being deleted [ Upstream commit e52a8b45b9c782937f74b701f8c656d4e5619eb5 ] If a new initiator (different WWN) shows up on the same fcport, old initiator's session is scheduled for deletion. But there is a small window between it being marked with QLA_SESS_DELETION_IN_PROGRESS and qlt_unret_sess getting called when new session's commands will keep finding old session in the fcport map. This patch drops cmds/tmrs if they find session in the progress of being deleted. Cc: # v3.18+ Signed-off-by: Alexei Potashnik Acked-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index e3e4f0a142ac4..71cc3818fbe51 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1447,6 +1447,11 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha, return; } + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_REJECTED, false); + return; + } + rc = __qlt_24xx_handle_abts(vha, abts, sess); if (rc != 0) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf054, @@ -3739,6 +3744,16 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, queue_work(qla_tgt_wq, &op->work); return 0; } + + /* Another WWN used to have our s_id. Our PLOGI scheduled its + * session deletion, but it's still in sess_del_work wq */ + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + ql_dbg(ql_dbg_io, vha, 0x3061, + "New command while old session %p is being deleted\n", + sess); + return -EFAULT; + } + /* * Do kref_get() before returning + dropping qla_hw_data->hardware_lock. */ @@ -3902,6 +3917,9 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb) sizeof(struct atio_from_isp)); } + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) + return -EFAULT; + return qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0); } @@ -5533,6 +5551,11 @@ static void qlt_abort_work(struct qla_tgt *tgt, if (!sess) goto out_term; } else { + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + sess = NULL; + goto out_term; + } + kref_get(&sess->se_sess->sess_kref); } @@ -5587,6 +5610,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt, if (!sess) goto out_term; } else { + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + sess = NULL; + goto out_term; + } + kref_get(&sess->se_sess->sess_kref); } From 1226aefc12eef8b339bdaff0bfe11d72eb4978df Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Tue, 14 Jul 2015 16:00:45 -0400 Subject: [PATCH 1883/2091] qla2xxx: Abort stale cmds on qla_tgt_wq when plogi arrives [ Upstream commit daddf5cf9b5c68b81b2bb7133f1dd0fda4552d0b ] cancel any commands from initiator's s_id that are still waiting on qla_tgt_wq when PLOGI arrives. Cc: # v3.18+ Signed-off-by: Alexei Potashnik Acked-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 71cc3818fbe51..f4db50653fe9d 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4079,6 +4079,38 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn, return sess; } +/* Abort any commands for this s_id waiting on qla_tgt_wq workqueue */ +static int abort_cmds_for_s_id(struct scsi_qla_host *vha, port_id_t *s_id) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + uint32_t key; + int count = 0; + + key = (((u32)s_id->b.domain << 16) | + ((u32)s_id->b.area << 8) | + ((u32)s_id->b.al_pa)); + + spin_lock(&vha->cmd_list_lock); + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id); + if (op_key == key) { + op->aborted = true; + count++; + } + } + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + uint32_t cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); + if (cmd_key == key) { + cmd->state = QLA_TGT_STATE_ABORTED; + count++; + } + } + spin_unlock(&vha->cmd_list_lock); + + return count; +} + /* * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire */ @@ -4112,6 +4144,9 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, switch (iocb->u.isp24.status_subcode) { case ELS_PLOGI: + /* Mark all stale commands in qla_tgt_wq for deletion */ + abort_cmds_for_s_id(vha, &port_id); + if (wwn) sess = qlt_find_sess_invalidate_other(tgt, wwn, port_id, loop_id); From eaaecc5e28b49dc00c4c36f1f5fd1acf1f4f4be1 Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Tue, 14 Jul 2015 16:00:46 -0400 Subject: [PATCH 1884/2091] qla2xxx: added sess generations to detect RSCN update races [ Upstream commit df673274fa4896f25f0bf348d2a3535d74b4cbec ] RSCN processing in qla2xxx driver can run in parallel with ELS/IO processing. As such the decision to remove disappeared fc port's session could be stale, because a new login sequence has occurred since and created a brand new session. Previous mechanism of dealing with this by delaying deletion request was prone to erroneous deletions if the event that was supposed to cancel the deletion never arrived or has been delayed in processing. New mechanism relies on a time-like generation counter to serialize RSCN updates relative to ELS/IO updates. Cc: # v3.18+ Signed-off-by: Alexei Potashnik Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_def.h | 5 +++ drivers/scsi/qla2xxx/qla_init.c | 32 ++++++++++------- drivers/scsi/qla2xxx/qla_os.c | 5 ++- drivers/scsi/qla2xxx/qla_target.c | 60 +++++++++++++++++++++++++------ drivers/scsi/qla2xxx/qla_target.h | 5 ++- 6 files changed, 83 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 05793b7199a4f..8b011aef12bd5 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -68,7 +68,7 @@ * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | * | Target Mode | 0xe080 | | - * | Target Mode Management | 0xf091 | 0xf002 | + * | Target Mode Management | 0xf096 | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000d | | * ---------------------------------------------------------------------- diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 54c201d06ba2f..90d926ca12000 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3586,6 +3586,11 @@ typedef struct scsi_qla_host { struct list_head qla_sess_op_cmd_list; spinlock_t cmd_list_lock; + /* Counter to detect races between ELS and RSCN events */ + atomic_t generation_tick; + /* Time when global fcport update has been scheduled */ + int total_fcport_update_gen; + uint32_t vp_abort_cnt; struct fc_vport *fc_vport; /* holds fc_vport * for each vport */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index ec179aa358807..b323ad04ffb18 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2927,24 +2927,14 @@ qla2x00_rport_del(void *data) { fc_port_t *fcport = data; struct fc_rport *rport; - scsi_qla_host_t *vha = fcport->vha; unsigned long flags; - unsigned long vha_flags; spin_lock_irqsave(fcport->vha->host->host_lock, flags); rport = fcport->drport ? fcport->drport: fcport->rport; fcport->drport = NULL; spin_unlock_irqrestore(fcport->vha->host->host_lock, flags); - if (rport) { + if (rport) fc_remote_port_delete(rport); - /* - * Release the target mode FC NEXUS in qla_target.c code - * if target mod is enabled. - */ - spin_lock_irqsave(&vha->hw->hardware_lock, vha_flags); - qlt_fc_port_deleted(vha, fcport); - spin_unlock_irqrestore(&vha->hw->hardware_lock, vha_flags); - } } /** @@ -3384,6 +3374,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) LIST_HEAD(new_fcports); struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + int discovery_gen; /* If FL port exists, then SNS is present */ if (IS_FWI2_CAPABLE(ha)) @@ -3454,6 +3445,14 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) fcport->scan_state = QLA_FCPORT_SCAN; } + /* Mark the time right before querying FW for connected ports. + * This process is long, asynchronous and by the time it's done, + * collected information might not be accurate anymore. E.g. + * disconnected port might have re-connected and a brand new + * session has been created. In this case session's generation + * will be newer than discovery_gen. */ + qlt_do_generation_tick(vha, &discovery_gen); + rval = qla2x00_find_all_fabric_devs(vha, &new_fcports); if (rval != QLA_SUCCESS) break; @@ -3505,7 +3504,8 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) atomic_read(&fcport->state), fcport->flags, fcport->fc4_type, fcport->scan_state); - qlt_fc_port_deleted(vha, fcport); + qlt_fc_port_deleted(vha, fcport, + discovery_gen); } } } @@ -4282,6 +4282,14 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) atomic_read(&fcport->state) != FCS_UNCONFIGURED) { spin_unlock_irqrestore(&ha->vport_slock, flags); qla2x00_rport_del(fcport); + + /* + * Release the target mode FC NEXUS in + * qla_target.c, if target mod is enabled. + */ + qlt_fc_port_deleted(vha, fcport, + base_vha->total_fcport_update_gen); + spin_lock_irqsave(&ha->vport_slock, flags); } } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index abfea56296630..e7a97a57a1ee1 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3229,11 +3229,14 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport, spin_lock_irqsave(vha->host->host_lock, flags); fcport->drport = rport; spin_unlock_irqrestore(vha->host->host_lock, flags); + qlt_do_generation_tick(vha, &base_vha->total_fcport_update_gen); set_bit(FCPORT_UPDATE_NEEDED, &base_vha->dpc_flags); qla2xxx_wake_dpc(base_vha); } else { + int now; fc_remote_port_delete(rport); - qlt_fc_port_deleted(vha, fcport); + qlt_do_generation_tick(vha, &now); + qlt_fc_port_deleted(vha, fcport, now); } } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f4db50653fe9d..e2c201982ec75 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -127,6 +127,16 @@ static struct workqueue_struct *qla_tgt_wq; static DEFINE_MUTEX(qla_tgt_mutex); static LIST_HEAD(qla_tgt_glist); +/* This API intentionally takes dest as a parameter, rather than returning + * int value to avoid caller forgetting to issue wmb() after the store */ +void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest) +{ + scsi_qla_host_t *base_vha = pci_get_drvdata(vha->hw->pdev); + *dest = atomic_inc_return(&base_vha->generation_tick); + /* memory barrier */ + wmb(); +} + /* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) */ static struct qla_tgt_sess *qlt_find_sess_by_port_name( struct qla_tgt *tgt, @@ -576,10 +586,12 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess, sess->expires = jiffies + dev_loss_tmo * HZ; ql_dbg(ql_dbg_tgt, sess->vha, 0xe048, - "qla_target(%d): session for port %8phC (loop ID %d) scheduled for " - "deletion in %u secs (expires: %lu) immed: %d, logout: %d\n", - sess->vha->vp_idx, sess->port_name, sess->loop_id, dev_loss_tmo, - sess->expires, immediate, sess->logout_on_delete); + "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)" + " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n", + sess->vha->vp_idx, sess->port_name, sess->loop_id, + sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa, + dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete, + sess->generation); if (immediate) mod_delayed_work(system_wq, &tgt->sess_del_work, 0); @@ -734,6 +746,9 @@ static struct qla_tgt_sess *qlt_create_sess( if (sess->local && !local) sess->local = 0; + + qlt_do_generation_tick(vha, &sess->generation); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return sess; @@ -795,6 +810,7 @@ static struct qla_tgt_sess *qlt_create_sess( spin_lock_irqsave(&ha->hardware_lock, flags); list_add_tail(&sess->sess_list_entry, &vha->vha_tgt.qla_tgt->sess_list); vha->vha_tgt.qla_tgt->sess_count++; + qlt_do_generation_tick(vha, &sess->generation); spin_unlock_irqrestore(&ha->hardware_lock, flags); ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04b, @@ -808,7 +824,7 @@ static struct qla_tgt_sess *qlt_create_sess( } /* - * Called from drivers/scsi/qla2xxx/qla_init.c:qla2x00_reg_remote_port() + * Called from qla2x00_reg_remote_port() */ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) { @@ -874,7 +890,12 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) spin_unlock_irqrestore(&ha->hardware_lock, flags); } -void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) +/* + * max_gen - specifies maximum session generation + * at which this deletion requestion is still valid + */ +void +qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen) { struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; struct qla_tgt_sess *sess; @@ -893,6 +914,15 @@ void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) return; } + if (max_gen - sess->generation < 0) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf092, + "Ignoring stale deletion request for se_sess %p / sess %p" + " for port %8phC, req_gen %d, sess_gen %d\n", + sess->se_sess, sess, sess->port_name, max_gen, + sess->generation); + return; + } + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf008, "qla_tgt_fc_port_deleted %p", sess); sess->local = 1; @@ -3989,7 +4019,7 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc) { if (fcport->tgt_session) { if (rc != MBS_COMMAND_COMPLETE) { - ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf088, + ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093, "%s: se_sess %p / sess %p from" " port %8phC loop_id %#04x s_id %02x:%02x:%02x" " LOGO failed: %#x\n", @@ -4118,6 +4148,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, struct imm_ntfy_from_isp *iocb) { struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_hw_data *ha = vha->hw; struct qla_tgt_sess *sess = NULL; uint64_t wwn; port_id_t port_id; @@ -4163,7 +4194,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, * without acking, new one will get acked when session * deletion completes. */ - ql_log(ql_log_warn, sess->vha, 0xf089, + ql_log(ql_log_warn, sess->vha, 0xf094, "sess %p received double plogi.\n", sess); qlt_swap_imm_ntfy_iocb(iocb, &sess->tm_iocb); @@ -4220,7 +4251,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, * PLOGI could finish. Will force him to re-try, * while last one finishes. */ - ql_log(ql_log_warn, sess->vha, 0xf090, + ql_log(ql_log_warn, sess->vha, 0xf095, "sess %p PRLI received, before plogi ack.\n", sess); qlt_send_term_imm_notif(vha, iocb, 1); @@ -4232,7 +4263,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, * This shouldn't happen under normal circumstances, * since we have deleted the old session during PLOGI */ - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf091, + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf096, "PRLI (loop_id %#04x) for existing sess %p (loop_id %#04x)\n", sess->loop_id, sess, iocb->u.isp24.nport_handle); @@ -4243,7 +4274,14 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, if (wd3_lo & BIT_7) sess->conf_compl_supported = 1; - res = 1; + } + res = 1; /* send notify ack */ + + /* Make session global (not used in fabric mode) */ + if (ha->current_topology != ISP_CFG_F) { + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); } else { /* todo: else - create sess here. */ res = 1; /* send notify ack */ diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 13b3b0d9b8199..dfc60c78102d9 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -919,6 +919,8 @@ struct qla_tgt_sess { unsigned char logout_completed; + int generation; + struct se_session *se_sess; struct scsi_qla_host *vha; struct qla_tgt *tgt; @@ -1084,7 +1086,7 @@ extern int qlt_lport_register(void *, u64, u64, u64, extern void qlt_lport_deregister(struct scsi_qla_host *); extern void qlt_unreg_sess(struct qla_tgt_sess *); extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *); -extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *); +extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *, int); extern int __init qlt_init(void); extern void qlt_exit(void); extern void qlt_update_vp_map(struct scsi_qla_host *, int); @@ -1161,5 +1163,6 @@ extern irqreturn_t qla83xx_msix_atio_q(int, void *); extern void qlt_83xx_iospace_config(struct qla_hw_data *); extern int qlt_free_qfull_cmds(struct scsi_qla_host *); extern void qlt_logo_completion_handler(fc_port_t *, int); +extern void qlt_do_generation_tick(struct scsi_qla_host *, int *); #endif /* __QLA_TARGET_H */ From ce4f0721444eea05c31c072981a165750b8b243b Mon Sep 17 00:00:00 2001 From: Alexei Potashnik Date: Tue, 14 Jul 2015 16:00:49 -0400 Subject: [PATCH 1885/2091] qla2xxx: terminate exchange when command is aborted by LIO [ Upstream commit 7359df25a53386dd33c223672bbd12cb49d0ce4f ] The newly introduced aborted_task TFO callback has to terminate exchange with QLogic driver, since command is being deleted and no status will be queued to the driver at a later point. This patch also moves the burden of releasing one cmd refcount to the aborted_task handler. Changed iSCSI aborted_task logic to satisfy the above requirement. Cc: # v3.18+ Signed-off-by: Alexei Potashnik Acked-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_target.c | 36 +++++++++++++++--------------- drivers/scsi/qla2xxx/qla_target.h | 9 +------- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 11 +-------- 3 files changed, 20 insertions(+), 36 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index e2c201982ec75..df6193b481773 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1924,21 +1924,6 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, struct qla_hw_data *ha = vha->hw; struct se_cmd *se_cmd = &cmd->se_cmd; - if (unlikely(cmd->aborted)) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, - "qla_target(%d): terminating exchange " - "for aborted cmd=%p (se_cmd=%p, tag=%d)", vha->vp_idx, cmd, - se_cmd, cmd->tag); - - cmd->state = QLA_TGT_STATE_ABORTED; - cmd->cmd_flags |= BIT_6; - - qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); - - /* !! At this point cmd could be already freed !! */ - return QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED; - } - prm->cmd = cmd; prm->tgt = tgt; prm->rq_result = scsi_status; @@ -2526,9 +2511,6 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, &full_req_cnt); if (unlikely(res != 0)) { - if (res == QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED) - return 0; - return res; } @@ -3094,6 +3076,24 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha) } +void qlt_abort_cmd(struct qla_tgt_cmd *cmd) +{ + struct qla_tgt *tgt = cmd->tgt; + struct scsi_qla_host *vha = tgt->vha; + struct se_cmd *se_cmd = &cmd->se_cmd; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, + "qla_target(%d): terminating exchange for aborted cmd=%p " + "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, + cmd->tag); + + cmd->state = QLA_TGT_STATE_ABORTED; + cmd->cmd_flags |= BIT_6; + + qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); +} +EXPORT_SYMBOL(qlt_abort_cmd); + void qlt_free_cmd(struct qla_tgt_cmd *cmd) { struct qla_tgt_sess *sess = cmd->sess; diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index dfc60c78102d9..d30c60a1d522a 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -808,13 +808,6 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define FC_TM_REJECT 4 #define FC_TM_FAILED 5 -/* - * Error code of qlt_pre_xmit_response() meaning that cmd's exchange was - * terminated, so no more actions is needed and success should be returned - * to target. - */ -#define QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED 0x1717 - #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) #define pci_dma_lo32(a) (a & 0xffffffff) #define pci_dma_hi32(a) ((((a) >> 16)>>16) & 0xffffffff) @@ -950,7 +943,6 @@ struct qla_tgt_cmd { unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; unsigned int free_sg:1; - unsigned int aborted:1; /* Needed in case of SRR */ unsigned int write_data_transferred:1; unsigned int ctx_dsd_alloced:1; unsigned int q_full:1; @@ -1132,6 +1124,7 @@ static inline uint32_t sid_to_key(const uint8_t *s_id) extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *); extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); +extern void qlt_abort_cmd(struct qla_tgt_cmd *); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 12b58efb126bc..e37e430ccd5f1 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -669,7 +669,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_4; cmd->bufflen = se_cmd->data_length; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); cmd->sg_cnt = se_cmd->t_data_nents; cmd->sg = se_cmd->t_data_sg; @@ -699,7 +698,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg_cnt = 0; cmd->offset = 0; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); if (cmd->cmd_flags & BIT_5) { pr_crit("Bit_5 already set for cmd = %p.\n", cmd); dump_stack(); @@ -764,14 +762,7 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; - struct qla_hw_data *ha = vha->hw; - - if (!cmd->sg_mapped) - return; - - pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction); - cmd->sg_mapped = 0; + qlt_abort_cmd(cmd); } static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, From abd62631a46c8a27fc607ea5525d3034e8d69781 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Jan 2016 12:24:25 +0300 Subject: [PATCH 1886/2091] intel_scu_ipcutil: underflow in scu_reg_access() [ Upstream commit b1d353ad3d5835b16724653b33c05124e1b5acf1 ] "count" is controlled by the user and it can be negative. Let's prevent that by making it unsigned. You have to have CAP_SYS_RAWIO to call this function so the bug is not as serious as it could be. Fixes: 5369c02d951a ('intel_scu_ipc: Utility driver for intel scu ipc') Signed-off-by: Dan Carpenter Cc: stable@vger.kernel.org Signed-off-by: Darren Hart Signed-off-by: Sasha Levin --- drivers/platform/x86/intel_scu_ipcutil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_scu_ipcutil.c b/drivers/platform/x86/intel_scu_ipcutil.c index 02bc5a6343c3f..aa454241489c9 100644 --- a/drivers/platform/x86/intel_scu_ipcutil.c +++ b/drivers/platform/x86/intel_scu_ipcutil.c @@ -49,7 +49,7 @@ struct scu_ipc_data { static int scu_reg_access(u32 cmd, struct scu_ipc_data *data) { - int count = data->count; + unsigned int count = data->count; if (count == 0 || count == 3 || count > 4) return -EINVAL; From 3fa9638c67f6e7f3c4a242ecb21eaacda9dcff91 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Feb 2016 11:33:21 -0500 Subject: [PATCH 1887/2091] libata: fix sff host state machine locking while polling [ Upstream commit 8eee1d3ed5b6fc8e14389567c9a6f53f82bb7224 ] The bulk of ATA host state machine is implemented by ata_sff_hsm_move(). The function is called from either the interrupt handler or, if polling, a work item. Unlike from the interrupt path, the polling path calls the function without holding the host lock and ata_sff_hsm_move() selectively grabs the lock. This is completely broken. If an IRQ triggers while polling is in progress, the two can easily race and end up accessing the hardware and updating state machine state at the same time. This can put the state machine in an illegal state and lead to a crash like the following. kernel BUG at drivers/ata/libata-sff.c:1302! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Modules linked in: CPU: 1 PID: 10679 Comm: syz-executor Not tainted 4.5.0-rc1+ #300 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88002bd00000 ti: ffff88002e048000 task.ti: ffff88002e048000 RIP: 0010:[] [] ata_sff_hsm_move+0x619/0x1c60 ... Call Trace: [] __ata_sff_port_intr+0x1e1/0x3a0 drivers/ata/libata-sff.c:1584 [] ata_bmdma_port_intr+0x71/0x400 drivers/ata/libata-sff.c:2877 [< inline >] __ata_sff_interrupt drivers/ata/libata-sff.c:1629 [] ata_bmdma_interrupt+0x253/0x580 drivers/ata/libata-sff.c:2902 [] handle_irq_event_percpu+0x108/0x7e0 kernel/irq/handle.c:157 [] handle_irq_event+0xa7/0x140 kernel/irq/handle.c:205 [] handle_edge_irq+0x1e3/0x8d0 kernel/irq/chip.c:623 [< inline >] generic_handle_irq_desc include/linux/irqdesc.h:146 [] handle_irq+0x10c/0x2a0 arch/x86/kernel/irq_64.c:78 [] do_IRQ+0x7d/0x1a0 arch/x86/kernel/irq.c:240 [] common_interrupt+0x8c/0x8c arch/x86/entry/entry_64.S:520 [< inline >] rcu_lock_acquire include/linux/rcupdate.h:490 [< inline >] rcu_read_lock include/linux/rcupdate.h:874 [] filemap_map_pages+0x131/0xba0 mm/filemap.c:2145 [< inline >] do_fault_around mm/memory.c:2943 [< inline >] do_read_fault mm/memory.c:2962 [< inline >] do_fault mm/memory.c:3133 [< inline >] handle_pte_fault mm/memory.c:3308 [< inline >] __handle_mm_fault mm/memory.c:3418 [] handle_mm_fault+0x2516/0x49a0 mm/memory.c:3447 [] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1238 [] trace_do_page_fault+0xe8/0x420 arch/x86/mm/fault.c:1331 [] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264 [] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986 Fix it by ensuring that the polling path is holding the host lock before entering ata_sff_hsm_move() so that all hardware accesses and state updates are performed under the host lock. Signed-off-by: Tejun Heo Reported-and-tested-by: Dmitry Vyukov Link: http://lkml.kernel.org/g/CACT4Y+b_JsOxJu2EZyEf+mOXORc_zid5V1-pLZSroJVxyWdSpw@mail.gmail.com Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/ata/libata-sff.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index cdf6215a9a22b..7dbba387d12a7 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -997,12 +997,9 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap, static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) { struct ata_port *ap = qc->ap; - unsigned long flags; if (ap->ops->error_handler) { if (in_wq) { - spin_lock_irqsave(ap->lock, flags); - /* EH might have kicked in while host lock is * released. */ @@ -1014,8 +1011,6 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) } else ata_port_freeze(ap); } - - spin_unlock_irqrestore(ap->lock, flags); } else { if (likely(!(qc->err_mask & AC_ERR_HSM))) ata_qc_complete(qc); @@ -1024,10 +1019,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) } } else { if (in_wq) { - spin_lock_irqsave(ap->lock, flags); ata_sff_irq_on(ap); ata_qc_complete(qc); - spin_unlock_irqrestore(ap->lock, flags); } else ata_qc_complete(qc); } @@ -1048,9 +1041,10 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, { struct ata_link *link = qc->dev->link; struct ata_eh_info *ehi = &link->eh_info; - unsigned long flags = 0; int poll_next; + lockdep_assert_held(ap->lock); + WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0); /* Make sure ata_sff_qc_issue() does not throw things @@ -1112,14 +1106,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, } } - /* Send the CDB (atapi) or the first data block (ata pio out). - * During the state transition, interrupt handler shouldn't - * be invoked before the data transfer is complete and - * hsm_task_state is changed. Hence, the following locking. - */ - if (in_wq) - spin_lock_irqsave(ap->lock, flags); - if (qc->tf.protocol == ATA_PROT_PIO) { /* PIO data out protocol. * send first data block. @@ -1135,9 +1121,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, /* send CDB */ atapi_send_cdb(ap, qc); - if (in_wq) - spin_unlock_irqrestore(ap->lock, flags); - /* if polling, ata_sff_pio_task() handles the rest. * otherwise, interrupt handler takes over from here. */ @@ -1361,12 +1344,14 @@ static void ata_sff_pio_task(struct work_struct *work) u8 status; int poll_next; + spin_lock_irq(ap->lock); + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ qc = ata_qc_from_tag(ap, link->active_tag); if (!qc) { ap->sff_pio_task_link = NULL; - return; + goto out_unlock; } fsm_start: @@ -1381,11 +1366,14 @@ static void ata_sff_pio_task(struct work_struct *work) */ status = ata_sff_busy_wait(ap, ATA_BUSY, 5); if (status & ATA_BUSY) { + spin_unlock_irq(ap->lock); ata_msleep(ap, 2); + spin_lock_irq(ap->lock); + status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); - return; + goto out_unlock; } } @@ -1402,6 +1390,8 @@ static void ata_sff_pio_task(struct work_struct *work) */ if (poll_next) goto fsm_start; +out_unlock: + spin_unlock_irq(ap->lock); } /** From 35eacd10a054fb9658f8c15edd5cac19a476db9d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 25 Jan 2016 20:32:03 +0000 Subject: [PATCH 1888/2091] MIPS: Fix buffer overflow in syscall_get_arguments() [ Upstream commit f4dce1ffd2e30fa31756876ef502ce6d2324be35 ] Since commit 4c21b8fd8f14 ("MIPS: seccomp: Handle indirect system calls (o32)"), syscall_get_arguments() attempts to handle o32 indirect syscall arguments by incrementing both the start argument number and the number of arguments to fetch. However only the start argument number needs to be incremented. The number of arguments does not change, they're just shifted up by one, and in fact the output array is provided by the caller and is likely only n entries long, so reading more arguments overflows the output buffer. In the case of seccomp, this results in it fetching 7 arguments starting at the 2nd one, which overflows the unsigned long args[6] in populate_seccomp_data(). This clobbers the $s0 register from syscall_trace_enter() which __seccomp_phase1_filter() saved onto the stack, into which syscall_trace_enter() had placed its syscall number argument. This caused Chromium to crash. Credit goes to Milko for tracking it down as far as $s0 being clobbered. Fixes: 4c21b8fd8f14 ("MIPS: seccomp: Handle indirect system calls (o32)") Reported-by: Milko Leporis Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Cc: # 3.15- Patchwork: https://patchwork.linux-mips.org/patch/12213/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/include/asm/syscall.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6499d93ae68d7..47bc45a67e9ba 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -101,10 +101,8 @@ static inline void syscall_get_arguments(struct task_struct *task, /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */ if ((config_enabled(CONFIG_32BIT) || test_tsk_thread_flag(task, TIF_32BIT_REGS)) && - (regs->regs[2] == __NR_syscall)) { + (regs->regs[2] == __NR_syscall)) i++; - n++; - } while (n--) ret |= mips_get_syscall_arg(args++, task, regs, i++); From 992eab6ecc8255a005c6bfc9505eca9a8d08206a Mon Sep 17 00:00:00 2001 From: zengtao Date: Tue, 2 Feb 2016 11:38:34 +0800 Subject: [PATCH 1889/2091] cputime: Prevent 32bit overflow in time[val|spec]_to_cputime() [ Upstream commit 0f26922fe5dc5724b1adbbd54b21bad03590b4f3 ] The datatype __kernel_time_t is u32 on 32bit platform, so its subject to overflows in the timeval/timespec to cputime conversion. Currently the following functions are affected: 1. setitimer() 2. timer_create/timer_settime() 3. sys_clock_nanosleep This can happen on MIPS32 and ARM32 with "Full dynticks CPU time accounting" enabled, which is required for CONFIG_NO_HZ_FULL. Enforce u64 conversion to prevent the overflow. Fixes: 31c1fc818715 ("ARM: Kconfig: allow full nohz CPU accounting") Signed-off-by: zengtao Reviewed-by: Arnd Bergmann Cc: Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1454384314-154784-1-git-send-email-prime.zeng@huawei.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- include/asm-generic/cputime_nsecs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 0419485891f2a..0f1c6f315cdc5 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -75,7 +75,7 @@ typedef u64 __nocast cputime64_t; */ static inline cputime_t timespec_to_cputime(const struct timespec *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec; return (__force cputime_t) ret; } static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) @@ -91,7 +91,8 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) */ static inline cputime_t timeval_to_cputime(const struct timeval *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + + val->tv_usec * NSEC_PER_USEC; return (__force cputime_t) ret; } static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) From 988ba6758fbc851c71505c3df18eb8af1e730fa7 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 1 Feb 2016 22:26:40 +0530 Subject: [PATCH 1890/2091] ASoC: dpcm: fix the BE state on hw_free [ Upstream commit 5e82d2be6ee53275c72e964507518d7964c82753 ] While performing hw_free, DPCM checks the BE state but leaves out the suspend state. The suspend state needs to be checked as well, as we might be suspended and then usermode closes rather than resuming the audio stream. This was found by a stress testing of system with playback in loop and killed after few seconds running in background and second script running suspend-resume test in loop Signed-off-by: Vinod Koul Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- sound/soc/soc-pcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 35fe58f4fa862..52fe7eb2dea1f 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1661,7 +1661,8 @@ int dpcm_be_dai_hw_free(struct snd_soc_pcm_runtime *fe, int stream) (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PREPARE) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED) && - (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP)) + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND)) continue; dev_dbg(be->dev, "ASoC: hw_free BE %s\n", From 6f04c5131ca6a50b8b7747eed79bf5efe9c51e8b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 27 Apr 2015 13:52:36 +0200 Subject: [PATCH 1891/2091] target: Remove first argument of target_{get,put}_sess_cmd() [ Upstream commit afc16604c06414223478df3e42301ab630b9960a ] The first argument of these two functions is always identical to se_cmd->se_sess. Hence remove the first argument. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Cc: Andy Grover Cc: Cc: Felipe Balbi Cc: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/isert/ib_isert.c | 6 +++--- drivers/infiniband/ulp/srpt/ib_srpt.c | 10 +++++----- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 +- drivers/target/iscsi/iscsi_target.c | 15 +++++++-------- drivers/target/iscsi/iscsi_target_configfs.c | 2 +- drivers/target/iscsi/iscsi_target_util.c | 4 ++-- drivers/target/target_core_tmr.c | 2 +- drivers/target/target_core_transport.c | 20 ++++++++++---------- drivers/vhost/scsi.c | 2 +- include/target/target_core_fabric.h | 4 ++-- 10 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index c32a934f76936..353e2ab090eef 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1349,7 +1349,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); else if (dump_payload && imm_data) - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1774,7 +1774,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { struct se_cmd *se_cmd = &cmd->se_cmd; - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } } @@ -1947,7 +1947,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, spin_unlock_bh(&cmd->istate_lock); if (ret) { - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); } else { diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9b84b4c0a000a..6fbc7bc824d29 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1334,7 +1334,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) BUG_ON(ch->sess == NULL); - target_put_sess_cmd(ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); goto out; } @@ -1365,11 +1365,11 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; default: WARN(1, "Unexpected command state (%d)", state); @@ -1679,7 +1679,7 @@ static int srpt_check_stop_free(struct se_cmd *cmd) struct srpt_send_ioctx *ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); - return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + return target_put_sess_cmd(&ioctx->cmd); } /** @@ -3074,7 +3074,7 @@ static void srpt_queue_response(struct se_cmd *cmd) ioctx->tag); srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); } } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index e37e430ccd5f1..fdad875ca7778 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -429,7 +429,7 @@ static int tcm_qla2xxx_check_stop_free(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_14; } - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } /* tcm_qla2xxx_release_cmd - Callback from TCM Core to release underlying diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 330bbe8310665..2e58279fab609 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -712,7 +712,7 @@ static int iscsit_add_reject_from_cmd( */ if (cmd->se_cmd.se_tfo != NULL) { pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } return -1; } @@ -998,7 +998,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -1064,7 +1064,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return -1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } } @@ -1080,7 +1080,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (!cmd->sense_reason) return 0; - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1111,7 +1111,6 @@ static int iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, bool dump_payload) { - struct iscsi_conn *conn = cmd->conn; int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. @@ -1138,7 +1137,7 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, rc = iscsit_dump_data_payload(cmd->conn, cmd->first_burst_len, 1); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return rc; } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); @@ -1807,7 +1806,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); sess_ref = true; switch (function) { @@ -1949,7 +1948,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, */ if (sess_ref) { pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 5a8add721741a..83bb55b944348 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1981,7 +1981,7 @@ static void lio_set_default_node_attributes(struct se_node_acl *se_acl) static int lio_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void lio_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b18edda3e8af8..231e2e0e58945 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -746,7 +746,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; case ISCSI_OP_REJECT: @@ -762,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; } diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 315ec3458eebc..b2e169fba3c68 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -153,7 +153,7 @@ void core_tmr_abort_task( cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_cmd_finish_abort(se_cmd, true); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 675f2d9d1f14c..7a948943efc9c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1419,7 +1419,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess * for fabrics using TARGET_SCF_ACK_KREF that expect a second * kref_put() to happen during fabric packet acknowledgement. */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) return ret; /* @@ -1433,7 +1433,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess rc = transport_lookup_cmd_lun(se_cmd, unpacked_lun); if (rc) { transport_send_check_condition_and_sense(se_cmd, rc, 0); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return 0; } @@ -1584,7 +1584,7 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, se_cmd->se_tmr_req->ref_task_tag = tag; /* See target_submit_cmd for commentary */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) { core_tmr_release_req(se_cmd->se_tmr_req); return ret; @@ -2227,7 +2227,7 @@ static int transport_release_cmd(struct se_cmd *cmd) * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - return target_put_sess_cmd(cmd->se_sess, cmd); + return target_put_sess_cmd(cmd); } /** @@ -2471,13 +2471,12 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) EXPORT_SYMBOL(transport_generic_free_cmd); /* target_get_sess_cmd - Add command to active ->sess_cmd_list - * @se_sess: session to reference * @se_cmd: command descriptor to add * @ack_kref: Signal that fabric will perform an ack target_put_sess_cmd() */ -int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, - bool ack_kref) +int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) { + struct se_session *se_sess = se_cmd->se_sess; unsigned long flags; int ret = 0; @@ -2499,7 +2498,7 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); if (ret && ack_kref) - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return ret; } @@ -2528,11 +2527,12 @@ static void target_release_cmd_kref(struct kref *kref) } /* target_put_sess_cmd - Check for active I/O shutdown via kref_put - * @se_sess: session to reference * @se_cmd: command descriptor to drop */ -int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd) +int target_put_sess_cmd(struct se_cmd *se_cmd) { + struct se_session *se_sess = se_cmd->se_sess; + if (!se_sess) { se_cmd->se_tfo->release_cmd(se_cmd); return 1; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ea32b386797f5..636435b412936 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -607,7 +607,7 @@ static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd) static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 0f4dc3768587b..24c8d9d0d9463 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -155,8 +155,8 @@ bool transport_wait_for_tasks(struct se_cmd *); int transport_check_aborted_status(struct se_cmd *, int); int transport_send_check_condition_and_sense(struct se_cmd *, sense_reason_t, int); -int target_get_sess_cmd(struct se_session *, struct se_cmd *, bool); -int target_put_sess_cmd(struct se_session *, struct se_cmd *); +int target_get_sess_cmd(struct se_cmd *, bool); +int target_put_sess_cmd(struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); void target_wait_for_sess_cmds(struct se_session *); From 422d2ab7e8cc67ed57d79a297f6832c74c0a0d80 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 11 Jan 2016 21:53:05 -0800 Subject: [PATCH 1892/2091] target: Fix LUN_RESET active TMR descriptor handling [ Upstream commit a6d9bb1c9605cd4f44e2d8290dc4d0e88f20292d ] This patch fixes a NULL pointer se_cmd->cmd_kref < 0 refcount bug during TMR LUN_RESET with active TMRs, triggered during se_cmd + se_tmr_req descriptor shutdown + release via core_tmr_drain_tmr_list(). To address this bug, go ahead and obtain a local kref_get_unless_zero(&se_cmd->cmd_kref) for active I/O to set CMD_T_ABORTED, and transport_wait_for_tasks() followed by the final target_put_sess_cmd() to drop the local ->cmd_kref. Also add two new checks within target_tmr_work() to avoid CMD_T_ABORTED -> TFO->queue_tm_rsp() callbacks ahead of invoking the backend -> fabric put in transport_cmd_check_stop_to_fabric(). For good measure, also change core_tmr_release_req() to use list_del_init() ahead of se_tmr_req memory free. Reviewed-by: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_tmr.c | 22 +++++++++++++++++++++- drivers/target/target_core_transport.c | 17 +++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index b2e169fba3c68..adb8016955c49 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -71,7 +71,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) if (dev) { spin_lock_irqsave(&dev->se_tmr_lock, flags); - list_del(&tmr->tmr_list); + list_del_init(&tmr->tmr_list); spin_unlock_irqrestore(&dev->se_tmr_lock, flags); } @@ -175,9 +175,11 @@ static void core_tmr_drain_tmr_list( struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_tmr_list); + struct se_session *sess; struct se_tmr_req *tmr_p, *tmr_pp; struct se_cmd *cmd; unsigned long flags; + bool rc; /* * Release all pending and outgoing TMRs aside from the received * LUN_RESET tmr.. @@ -203,17 +205,31 @@ static void core_tmr_drain_tmr_list( if (target_check_cdb_and_preempt(preempt_and_abort_list, cmd)) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); spin_lock(&cmd->t_state_lock); if (!(cmd->transport_state & CMD_T_ACTIVE)) { spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); continue; } if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) { spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); continue; } + cmd->transport_state |= CMD_T_ABORTED; spin_unlock(&cmd->t_state_lock); + rc = kref_get_unless_zero(&cmd->cmd_kref); + spin_unlock(&sess->sess_cmd_lock); + if (!rc) { + printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n"); + continue; + } list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); } spin_unlock_irqrestore(&dev->se_tmr_lock, flags); @@ -227,7 +243,11 @@ static void core_tmr_drain_tmr_list( (preempt_and_abort_list) ? "Preempt" : "", tmr_p, tmr_p->function, tmr_p->response, cmd->t_state); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); + transport_cmd_finish_abort(cmd, 1); + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7a948943efc9c..3881504b40d83 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3025,8 +3025,17 @@ static void target_tmr_work(struct work_struct *work) struct se_cmd *cmd = container_of(work, struct se_cmd, work); struct se_device *dev = cmd->se_dev; struct se_tmr_req *tmr = cmd->se_tmr_req; + unsigned long flags; int ret; + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) { + tmr->response = TMR_FUNCTION_REJECTED; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto check_stop; + } + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + switch (tmr->function) { case TMR_ABORT_TASK: core_tmr_abort_task(dev, tmr, cmd->se_sess); @@ -3054,9 +3063,17 @@ static void target_tmr_work(struct work_struct *work) break; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) { + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto check_stop; + } cmd->t_state = TRANSPORT_ISTATE_PROCESSING; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cmd->se_tfo->queue_tm_rsp(cmd); +check_stop: transport_cmd_check_stop_to_fabric(cmd); } From 79179a68458a7fddcf35adb16d1f7a7880aa41e7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 3 Feb 2016 19:17:27 +0000 Subject: [PATCH 1893/2091] Btrfs: fix hang on extent buffer lock caused by the inode_paths ioctl [ Upstream commit 0c0fe3b0fa45082cd752553fdb3a4b42503a118e ] While doing some tests I ran into an hang on an extent buffer's rwlock that produced the following trace: [39389.800012] NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s! [fdm-stress:32166] [39389.800016] NMI watchdog: BUG: soft lockup - CPU#14 stuck for 22s! [fdm-stress:32165] [39389.800016] Modules linked in: btrfs dm_mod ppdev xor sha256_generic hmac raid6_pq drbg ansi_cprng aesni_intel i2c_piix4 acpi_cpufreq aes_x86_64 ablk_helper tpm_tis parport_pc i2c_core sg cryptd evdev psmouse lrw tpm parport gf128mul serio_raw pcspkr glue_helper processor button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel scsi_mod e1000 virtio floppy [last unloaded: btrfs] [39389.800016] irq event stamp: 0 [39389.800016] hardirqs last enabled at (0): [< (null)>] (null) [39389.800016] hardirqs last disabled at (0): [] copy_process+0x638/0x1a35 [39389.800016] softirqs last enabled at (0): [] copy_process+0x638/0x1a35 [39389.800016] softirqs last disabled at (0): [< (null)>] (null) [39389.800016] CPU: 14 PID: 32165 Comm: fdm-stress Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [39389.800016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [39389.800016] task: ffff880175b1ca40 ti: ffff8800a185c000 task.ti: ffff8800a185c000 [39389.800016] RIP: 0010:[] [] queued_spin_lock_slowpath+0x57/0x158 [39389.800016] RSP: 0018:ffff8800a185fb80 EFLAGS: 00000202 [39389.800016] RAX: 0000000000000101 RBX: ffff8801710c4e9c RCX: 0000000000000101 [39389.800016] RDX: 0000000000000100 RSI: 0000000000000001 RDI: 0000000000000001 [39389.800016] RBP: ffff8800a185fb98 R08: 0000000000000001 R09: 0000000000000000 [39389.800016] R10: ffff8800a185fb68 R11: 6db6db6db6db6db7 R12: ffff8801710c4e98 [39389.800016] R13: ffff880175b1ca40 R14: ffff8800a185fc10 R15: ffff880175b1ca40 [39389.800016] FS: 00007f6d37fff700(0000) GS:ffff8802be9c0000(0000) knlGS:0000000000000000 [39389.800016] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [39389.800016] CR2: 00007f6d300019b8 CR3: 0000000037c93000 CR4: 00000000001406e0 [39389.800016] Stack: [39389.800016] ffff8801710c4e98 ffff8801710c4e98 ffff880175b1ca40 ffff8800a185fbb0 [39389.800016] ffffffff81091e11 ffff8801710c4e98 ffff8800a185fbc8 ffffffff81091895 [39389.800016] ffff8801710c4e98 ffff8800a185fbe8 ffffffff81486c5c ffffffffa067288c [39389.800016] Call Trace: [39389.800016] [] queued_read_lock_slowpath+0x46/0x60 [39389.800016] [] do_raw_read_lock+0x3e/0x41 [39389.800016] [] _raw_read_lock+0x3d/0x44 [39389.800016] [] ? btrfs_tree_read_lock+0x54/0x125 [btrfs] [39389.800016] [] btrfs_tree_read_lock+0x54/0x125 [btrfs] [39389.800016] [] ? btrfs_find_item+0xa7/0xd2 [btrfs] [39389.800016] [] btrfs_ref_to_path+0xd6/0x174 [btrfs] [39389.800016] [] inode_to_path+0x53/0xa2 [btrfs] [39389.800016] [] paths_from_inode+0x117/0x2ec [btrfs] [39389.800016] [] btrfs_ioctl+0xd5b/0x2793 [btrfs] [39389.800016] [] ? arch_local_irq_save+0x9/0xc [39389.800016] [] ? __this_cpu_preempt_check+0x13/0x15 [39389.800016] [] ? arch_local_irq_save+0x9/0xc [39389.800016] [] ? rcu_read_unlock+0x3e/0x5d [39389.800016] [] do_vfs_ioctl+0x42b/0x4ea [39389.800016] [] ? __fget_light+0x62/0x71 [39389.800016] [] SyS_ioctl+0x57/0x79 [39389.800016] [] entry_SYSCALL_64_fastpath+0x12/0x6f [39389.800016] Code: b9 01 01 00 00 f7 c6 00 ff ff ff 75 32 83 fe 01 89 ca 89 f0 0f 45 d7 f0 0f b1 13 39 f0 74 04 89 c6 eb e2 ff ca 0f 84 fa 00 00 00 <8b> 03 84 c0 74 04 f3 90 eb f6 66 c7 03 01 00 e9 e6 00 00 00 e8 [39389.800012] Modules linked in: btrfs dm_mod ppdev xor sha256_generic hmac raid6_pq drbg ansi_cprng aesni_intel i2c_piix4 acpi_cpufreq aes_x86_64 ablk_helper tpm_tis parport_pc i2c_core sg cryptd evdev psmouse lrw tpm parport gf128mul serio_raw pcspkr glue_helper processor button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel scsi_mod e1000 virtio floppy [last unloaded: btrfs] [39389.800012] irq event stamp: 0 [39389.800012] hardirqs last enabled at (0): [< (null)>] (null) [39389.800012] hardirqs last disabled at (0): [] copy_process+0x638/0x1a35 [39389.800012] softirqs last enabled at (0): [] copy_process+0x638/0x1a35 [39389.800012] softirqs last disabled at (0): [< (null)>] (null) [39389.800012] CPU: 15 PID: 32166 Comm: fdm-stress Tainted: G L 4.4.0-rc6-btrfs-next-18+ #1 [39389.800012] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [39389.800012] task: ffff880179294380 ti: ffff880034a60000 task.ti: ffff880034a60000 [39389.800012] RIP: 0010:[] [] queued_write_lock_slowpath+0x62/0x72 [39389.800012] RSP: 0018:ffff880034a639f0 EFLAGS: 00000206 [39389.800012] RAX: 0000000000000101 RBX: ffff8801710c4e98 RCX: 0000000000000000 [39389.800012] RDX: 00000000000000ff RSI: 0000000000000000 RDI: ffff8801710c4e9c [39389.800012] RBP: ffff880034a639f8 R08: 0000000000000001 R09: 0000000000000000 [39389.800012] R10: ffff880034a639b0 R11: 0000000000001000 R12: ffff8801710c4e98 [39389.800012] R13: 0000000000000001 R14: ffff880172cbc000 R15: ffff8801710c4e00 [39389.800012] FS: 00007f6d377fe700(0000) GS:ffff8802be9e0000(0000) knlGS:0000000000000000 [39389.800012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [39389.800012] CR2: 00007f6d3d3c1000 CR3: 0000000037c93000 CR4: 00000000001406e0 [39389.800012] Stack: [39389.800012] ffff8801710c4e98 ffff880034a63a10 ffffffff81091963 ffff8801710c4e98 [39389.800012] ffff880034a63a30 ffffffff81486f1b ffffffffa0672cb3 ffff8801710c4e00 [39389.800012] ffff880034a63a78 ffffffffa0672cb3 ffff8801710c4e00 ffff880034a63a58 [39389.800012] Call Trace: [39389.800012] [] do_raw_write_lock+0x72/0x8c [39389.800012] [] _raw_write_lock+0x3a/0x41 [39389.800012] [] ? btrfs_tree_lock+0x119/0x251 [btrfs] [39389.800012] [] btrfs_tree_lock+0x119/0x251 [btrfs] [39389.800012] [] ? rcu_read_unlock+0x5b/0x5d [btrfs] [39389.800012] [] ? btrfs_root_node+0xda/0xe6 [btrfs] [39389.800012] [] btrfs_lock_root_node+0x22/0x42 [btrfs] [39389.800012] [] btrfs_search_slot+0x1b8/0x758 [btrfs] [39389.800012] [] ? time_hardirqs_on+0x15/0x28 [39389.800012] [] btrfs_lookup_inode+0x31/0x95 [btrfs] [39389.800012] [] ? trace_hardirqs_on+0xd/0xf [39389.800012] [] ? mutex_lock_nested+0x397/0x3bc [39389.800012] [] __btrfs_update_delayed_inode+0x59/0x1c0 [btrfs] [39389.800012] [] __btrfs_commit_inode_delayed_items+0x194/0x5aa [btrfs] [39389.800012] [] ? _raw_spin_unlock+0x31/0x44 [39389.800012] [] __btrfs_run_delayed_items+0xa4/0x15c [btrfs] [39389.800012] [] btrfs_run_delayed_items+0x11/0x13 [btrfs] [39389.800012] [] btrfs_commit_transaction+0x234/0x96e [btrfs] [39389.800012] [] btrfs_sync_fs+0x145/0x1ad [btrfs] [39389.800012] [] btrfs_ioctl+0x11d2/0x2793 [btrfs] [39389.800012] [] ? arch_local_irq_save+0x9/0xc [39389.800012] [] ? __might_fault+0x4c/0xa7 [39389.800012] [] ? __might_fault+0x4c/0xa7 [39389.800012] [] ? arch_local_irq_save+0x9/0xc [39389.800012] [] ? rcu_read_unlock+0x3e/0x5d [39389.800012] [] do_vfs_ioctl+0x42b/0x4ea [39389.800012] [] ? __fget_light+0x62/0x71 [39389.800012] [] SyS_ioctl+0x57/0x79 [39389.800012] [] entry_SYSCALL_64_fastpath+0x12/0x6f [39389.800012] Code: f0 0f b1 13 85 c0 75 ef eb 2a f3 90 8a 03 84 c0 75 f8 f0 0f b0 13 84 c0 75 f0 ba ff 00 00 00 eb 0a f0 0f b1 13 ff c8 74 0b f3 90 <8b> 03 83 f8 01 75 f7 eb ed c6 43 04 00 5b 5d c3 0f 1f 44 00 00 This happens because in the code path executed by the inode_paths ioctl we end up nesting two calls to read lock a leaf's rwlock when after the first call to read_lock() and before the second call to read_lock(), another task (running the delayed items as part of a transaction commit) has already called write_lock() against the leaf's rwlock. This situation is illustrated by the following diagram: Task A Task B btrfs_ref_to_path() btrfs_commit_transaction() read_lock(&eb->lock); btrfs_run_delayed_items() __btrfs_commit_inode_delayed_items() __btrfs_update_delayed_inode() btrfs_lookup_inode() write_lock(&eb->lock); --> task waits for lock read_lock(&eb->lock); --> makes this task hang forever (and task B too of course) So fix this by avoiding doing the nested read lock, which is easily avoidable. This issue does not happen if task B calls write_lock() after task A does the second call to read_lock(), however there does not seem to exist anything in the documentation that mentions what is the expected behaviour for recursive locking of rwlocks (leaving the idea that doing so is not a good usage of rwlocks). Also, as a side effect necessary for this fix, make sure we do not needlessly read lock extent buffers when the input path has skip_locking set (used when called from send). Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Signed-off-by: Sasha Levin --- fs/btrfs/backref.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 723470850b946..30bc9fa763bd5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1369,7 +1369,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, read_extent_buffer(eb, dest + bytes_left, name_off, name_len); if (eb != eb_in) { - btrfs_tree_read_unlock_blocking(eb); + if (!path->skip_locking) + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } ret = btrfs_find_item(fs_root, path, parent, 0, @@ -1389,9 +1390,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ if (eb != eb_in) { - atomic_inc(&eb->refs); - btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (!path->skip_locking) + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->nodes[0] = NULL; + path->locks[0] = 0; } btrfs_release_path(path); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); From d1d1545f542c5ab95d2bbec49f7b425fcf1f4166 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 22 Jan 2016 15:42:41 +0100 Subject: [PATCH 1894/2091] scsi_dh_rdac: always retry MODE SELECT on command lock violation [ Upstream commit d2d06d4fe0f2cc2df9b17fefec96e6e1a1271d91 ] If MODE SELECT returns with sense '05/91/36' (command lock violation) it should always be retried without counting the number of retries. During an HBA upgrade or similar circumstances one might see a flood of MODE SELECT command from various HBAs, which will easily trigger the sense code and exceed the retry count. Cc: Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/device_handler/scsi_dh_rdac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index b46ace3d4bf0c..dd0c133aa3127 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -568,7 +568,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev, /* * Command Lock contention */ - err = SCSI_DH_RETRY; + err = SCSI_DH_IMM_RETRY; break; default: break; @@ -618,6 +618,8 @@ static void send_mode_select(struct work_struct *work) err = mode_select_handle_sense(sdev, h->sense); if (err == SCSI_DH_RETRY && retry_cnt--) goto retry; + if (err == SCSI_DH_IMM_RETRY) + goto retry; } if (err == SCSI_DH_OK) { h->state = RDAC_STATE_ACTIVE; From 75b546e5147b81fe064454ba8a06ee58590f0f0d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 27 Jan 2016 16:19:13 +0200 Subject: [PATCH 1895/2091] SCSI: Add Marvell Console to VPD blacklist [ Upstream commit 82c43310508eb19eb41fe7862e89afeb74030b84 ] I have a Marvell 88SE9230 SATA Controller that has some sort of integrated console SCSI device attached to one of the ports. ata14: SATA link up 1.5 Gbps (SStatus 113 SControl 300) ata14.00: ATAPI: MARVELL VIRTUALL, 1.09, max UDMA/66 ata14.00: configured for UDMA/66 scsi 13:0:0:0: Processor Marvell Console 1.01 PQ: 0 ANSI: 5 Sending it VPD INQUIRY command seem to always fail with following error: ata14.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 ata14.00: irq_stat 0x40000001 ata14.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 2 dma 16640 in Inquiry 12 01 00 00 ff 00res 00/00:00:00:00:00/00:00:00:00:00/00 Emask 0x3 (HSM violation) ata14: hard resetting link This has been minor annoyance (only error printed on dmesg) until commit 09e2b0b14690 ("scsi: rescan VPD attributes") added call to scsi_attach_vpd() in scsi_rescan_device(). The commit causes the system to splat out following errors continuously without ever reaching the UI: ata14.00: configured for UDMA/66 ata14: EH complete ata14.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 ata14.00: irq_stat 0x40000001 ata14.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 6 dma 16640 in Inquiry 12 01 00 00 ff 00res 00/00:00:00:00:00/00:00:00:00:00/00 Emask 0x3 (HSM violation) ata14: hard resetting link ata14: SATA link up 1.5 Gbps (SStatus 113 SControl 300) ata14.00: configured for UDMA/66 ata14: EH complete ata14.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 ata14.00: irq_stat 0x40000001 ata14.00: cmd a0/01:00:00:00:01/00:00:00:00:00/a0 tag 7 dma 16640 in Inquiry 12 01 00 00 ff 00res 00/00:00:00:00:00/00:00:00:00:00/00 Emask 0x3 (HSM violation) Without in-depth understanding of SCSI layer and the Marvell controller, I suspect this happens because when the link goes down (because of an error) we schedule scsi_rescan_device() which again fails to read VPD data... ad infinitum. Since VPD data cannot be read from the device anyway we prevent the SCSI layer from even trying by blacklisting the device. This gets away the error and the system starts up normally. [mkp: Widened the match to all revisions of this device] Cc: Signed-off-by: Mika Westerberg Reported-by: Kirill A. Shutemov Reported-by: Alexander Duyck Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 64ed88a67e6e9..ac418e73536d7 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -205,6 +205,7 @@ static struct { {"Intel", "Multi-Flex", NULL, BLIST_NO_RSOC}, {"iRiver", "iFP Mass Driver", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, {"LASOUND", "CDX7405", "3.10", BLIST_MAX5LUN | BLIST_SINGLELUN}, + {"Marvell", "Console", NULL, BLIST_SKIP_VPD_PAGES}, {"MATSHITA", "PD-1", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"MATSHITA", "DMC-LC5", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, {"MATSHITA", "DMC-LC40", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, From 6cf9d4c32e8e2716eb8e9cb578dbcb2b2cad04b3 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 5 Feb 2016 09:05:41 +0100 Subject: [PATCH 1896/2091] ALSA: hda - Fix static checker warning in patch_hdmi.c [ Upstream commit 360a8245680053619205a3ae10e6bfe624a5da1d ] The static checker warning is: sound/pci/hda/patch_hdmi.c:460 hdmi_eld_ctl_get() error: __memcpy() 'eld->eld_buffer' too small (256 vs 512) I have a hard time figuring out if this can ever cause an information leak (I don't think so), but nonetheless it does not hurt to increase the robustness of the code. Fixes: 68e03de98507 ('ALSA: hda - hdmi: Do not expose eld data when eld is invalid') Reported-by: Dan Carpenter Signed-off-by: David Henningsson Cc: # v3.9+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_hdmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index d02eccd51f6e8..063d82345bb45 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -433,7 +433,8 @@ static int hdmi_eld_ctl_get(struct snd_kcontrol *kcontrol, eld = &per_pin->sink_eld; mutex_lock(&per_pin->lock); - if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data)) { + if (eld->eld_size > ARRAY_SIZE(ucontrol->value.bytes.data) || + eld->eld_size > ELD_MAX_SIZE) { mutex_unlock(&per_pin->lock); snd_BUG(); return -EINVAL; From 0951360a3c8e4633016836a1adec7c4b1f090f3c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Feb 2016 20:12:24 +0100 Subject: [PATCH 1897/2091] Revert "ALSA: hda - Fix noise on Gigabyte Z170X mobo" [ Upstream commit 6c361d10e0eb859233c71954abcd20d2d8700587 ] This reverts commit 0c25ad80408e95e0a4fbaf0056950206e95f726f. The original commit disabled the aamixer path due to the noise problem, but it turned out that some mobo with the same PCI SSID doesn't suffer from the issue, and the disabled function (analog loopback) is still demanded by users. Since the recent commit [e7fdd52779a6: ALSA: hda - Implement loopback control switch for Realtek and other codecs], we have the dynamic mixer switch to enable/disable the aamix path, and we don't have to disable the path statically any longer. So, let's revert the disablement, so that only the user suffering from the noise problem can turn off the aamix on the fly. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=108301 Reported-by: Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8189f02f84462..e589d5ff1d0bb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1792,7 +1792,6 @@ enum { ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, ALC887_FIXUP_BASS_CHMAP, - ALC882_FIXUP_DISABLE_AAMIX, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -1954,8 +1953,6 @@ static void alc882_fixup_no_primary_hp(struct hda_codec *codec, static void alc_fixup_bass_chmap(struct hda_codec *codec, const struct hda_fixup *fix, int action); -static void alc_fixup_disable_aamix(struct hda_codec *codec, - const struct hda_fixup *fix, int action); static const struct hda_fixup alc882_fixups[] = { [ALC882_FIXUP_ABIT_AW9D_MAX] = { @@ -2193,10 +2190,6 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_bass_chmap, }, - [ALC882_FIXUP_DISABLE_AAMIX] = { - .type = HDA_FIXUP_FUNC, - .v.func = alc_fixup_disable_aamix, - }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2264,7 +2257,6 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), - SND_PCI_QUIRK(0x1458, 0xa182, "Gigabyte Z170X-UD3", ALC882_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), From 09b09ced1e4e3afb2767ee96edc9d1d57fc1c55b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 1 Feb 2016 14:27:30 +0100 Subject: [PATCH 1898/2091] crypto: user - lock crypto_alg_list on alg dump [ Upstream commit 63e41ebc6630f39422d87f8a4bade1e793f37a01 ] We miss to take the crypto_alg_sem semaphore when traversing the crypto_alg_list for CRYPTO_MSG_GETALG dumps. This allows a race with crypto_unregister_alg() removing algorithms from the list while we're still traversing it, thereby leading to a use-after-free as show below: [ 3482.071639] general protection fault: 0000 [#1] SMP [ 3482.075639] Modules linked in: aes_x86_64 glue_helper lrw ablk_helper cryptd gf128mul ipv6 pcspkr serio_raw virtio_net microcode virtio_pci virtio_ring virtio sr_mod cdrom [last unloaded: aesni_intel] [ 3482.075639] CPU: 1 PID: 11065 Comm: crconf Not tainted 4.3.4-grsec+ #126 [ 3482.075639] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 3482.075639] task: ffff88001cd41a40 ti: ffff88001cd422c8 task.ti: ffff88001cd422c8 [ 3482.075639] RIP: 0010:[] [] strncpy+0x13/0x30 [ 3482.075639] RSP: 0018:ffff88001f713b60 EFLAGS: 00010202 [ 3482.075639] RAX: ffff88001f6c4430 RBX: ffff88001f6c43a0 RCX: ffff88001f6c4430 [ 3482.075639] RDX: 0000000000000040 RSI: fefefefefefeff16 RDI: ffff88001f6c4430 [ 3482.075639] RBP: ffff88001f713b60 R08: ffff88001f6c4470 R09: ffff88001f6c4480 [ 3482.075639] R10: 0000000000000002 R11: 0000000000000246 R12: ffff88001ce2aa28 [ 3482.075639] R13: ffff880000093700 R14: ffff88001f5e4bf8 R15: 0000000000003b20 [ 3482.075639] FS: 0000033826fa2700(0000) GS:ffff88001e900000(0000) knlGS:0000000000000000 [ 3482.075639] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3482.075639] CR2: ffffffffff600400 CR3: 00000000139ec000 CR4: 00000000001606f0 [ 3482.075639] Stack: [ 3482.075639] ffff88001f713bd8 ffffffff936ccd00 ffff88001e5c4200 ffff880000093700 [ 3482.075639] ffff88001f713bd0 ffffffff938ef4bf 0000000000000000 0000000000003b20 [ 3482.075639] ffff88001f5e4bf8 ffff88001f5e4848 0000000000000000 0000000000003b20 [ 3482.075639] Call Trace: [ 3482.075639] [] crypto_report_alg+0xc0/0x3e0 [ 3482.075639] [] ? __alloc_skb+0x16f/0x300 [ 3482.075639] [] crypto_dump_report+0x6a/0x90 [ 3482.075639] [] netlink_dump+0x147/0x2e0 [ 3482.075639] [] __netlink_dump_start+0x159/0x190 [ 3482.075639] [] crypto_user_rcv_msg+0xc3/0x130 [ 3482.075639] [] ? crypto_report_alg+0x3e0/0x3e0 [ 3482.075639] [] ? alg_test_crc32c+0x120/0x120 [ 3482.075639] [] ? __netlink_lookup+0xd5/0x120 [ 3482.075639] [] ? crypto_add_alg+0x1d0/0x1d0 [ 3482.075639] [] netlink_rcv_skb+0xe1/0x130 [ 3482.075639] [] crypto_netlink_rcv+0x28/0x40 [ 3482.075639] [] netlink_unicast+0x108/0x180 [ 3482.075639] [] netlink_sendmsg+0x541/0x770 [ 3482.075639] [] sock_sendmsg+0x21/0x40 [ 3482.075639] [] SyS_sendto+0xf3/0x130 [ 3482.075639] [] ? bad_area_nosemaphore+0x13/0x20 [ 3482.075639] [] ? __do_page_fault+0x80/0x3a0 [ 3482.075639] [] entry_SYSCALL_64_fastpath+0x12/0x6e [ 3482.075639] Code: 88 4a ff 75 ed 5d 48 0f ba 2c 24 3f c3 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 85 d2 48 89 f8 48 89 f9 4c 8d 04 17 48 89 e5 74 15 <0f> b6 16 80 fa 01 88 11 48 83 de ff 48 83 c1 01 4c 39 c1 75 eb [ 3482.075639] RIP [] strncpy+0x13/0x30 To trigger the race run the following loops simultaneously for a while: $ while : ; do modprobe aesni-intel; rmmod aesni-intel; done $ while : ; do crconf show all > /dev/null; done Fix the race by taking the crypto_alg_sem read lock, thereby preventing crypto_unregister_alg() from modifying the algorithm list during the dump. This bug has been detected by the PaX memory sanitize feature. Cc: stable@vger.kernel.org Signed-off-by: Mathias Krause Cc: Steffen Klassert Cc: PaX Team Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/crypto_user.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index edf2e3ea17407..6a050e12fcdf1 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -499,6 +499,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; + down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) dump_alloc += CRYPTO_REPORT_MAXSIZE; @@ -508,8 +509,11 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) .done = link->done, .min_dump_alloc = dump_alloc, }; - return netlink_dump_start(crypto_nlsk, skb, nlh, &c); + err = netlink_dump_start(crypto_nlsk, skb, nlh, &c); } + up_read(&crypto_alg_sem); + + return err; } err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, From 762330b161c49c6d88ab689a0ee2a1a959dc5b6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Feb 2016 12:44:08 +0100 Subject: [PATCH 1899/2091] Backport fix for crypto: algif_skcipher - Require setkey before accept(2) commit dd504589577d8e8e70f51f997ad487a4cb6c026f upstream. Some cipher implementations will crash if you try to use them without calling setkey first. This patch adds a check so that the accept(2) call will fail with -ENOKEY if setkey hasn't been done on the socket yet. Cc: stable@vger.kernel.org Reported-by: Dmitry Vyukov Signed-off-by: Herbert Xu Tested-by: Dmitry Vyukov [backported to 4.1 by Milan Broz ] Signed-off-by: Sasha Levin --- crypto/algif_skcipher.c | 48 +++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 5bc42f9b23f0d..1c9879d4a46da 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -31,6 +31,11 @@ struct skcipher_sg_list { struct scatterlist sg[0]; }; +struct skcipher_tfm { + struct crypto_ablkcipher *skcipher; + bool has_key; +}; + struct skcipher_ctx { struct list_head tsgl; struct af_alg_sgl rsgl; @@ -752,17 +757,41 @@ static struct proto_ops algif_skcipher_ops = { static void *skcipher_bind(const char *name, u32 type, u32 mask) { - return crypto_alloc_ablkcipher(name, type, mask); + struct skcipher_tfm *tfm; + struct crypto_ablkcipher *skcipher; + + tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); + if (!tfm) + return ERR_PTR(-ENOMEM); + + skcipher = crypto_alloc_ablkcipher(name, type, mask); + if (IS_ERR(skcipher)) { + kfree(tfm); + return ERR_CAST(skcipher); + } + + tfm->skcipher = skcipher; + + return tfm; } static void skcipher_release(void *private) { - crypto_free_ablkcipher(private); + struct skcipher_tfm *tfm = private; + + crypto_free_ablkcipher(tfm->skcipher); + kfree(tfm); } static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen) { - return crypto_ablkcipher_setkey(private, key, keylen); + struct skcipher_tfm *tfm = private; + int err; + + err = crypto_ablkcipher_setkey(tfm->skcipher, key, keylen); + tfm->has_key = !err; + + return err; } static void skcipher_wait(struct sock *sk) @@ -794,20 +823,25 @@ static int skcipher_accept_parent(void *private, struct sock *sk) { struct skcipher_ctx *ctx; struct alg_sock *ask = alg_sk(sk); - unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private); + struct skcipher_tfm *tfm = private; + struct crypto_ablkcipher *skcipher = tfm->skcipher; + unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(skcipher); + + if (!tfm->has_key) + return -ENOKEY; ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private), + ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(skcipher), GFP_KERNEL); if (!ctx->iv) { sock_kfree_s(sk, ctx, len); return -ENOMEM; } - memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private)); + memset(ctx->iv, 0, crypto_ablkcipher_ivsize(skcipher)); INIT_LIST_HEAD(&ctx->tsgl); ctx->len = len; @@ -820,7 +854,7 @@ static int skcipher_accept_parent(void *private, struct sock *sk) ask->private = ctx; - ablkcipher_request_set_tfm(&ctx->req, private); + ablkcipher_request_set_tfm(&ctx->req, skcipher); ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_complete, &ctx->completion); From 1ce5c14e78a7c44a6f9c6af3548748dbe22a80af Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Feb 2016 12:44:09 +0100 Subject: [PATCH 1900/2091] Backport fix for crypto: algif_skcipher - Add nokey compatibility path commit a0fa2d037129a9849918a92d91b79ed6c7bd2818 upstream. This patch adds a compatibility path to support old applications that do acept(2) before setkey. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_skcipher.c | 149 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 5 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 1c9879d4a46da..566df2c9991f4 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -755,6 +755,99 @@ static struct proto_ops algif_skcipher_ops = { .poll = skcipher_poll, }; +static int skcipher_check_key(struct socket *sock) +{ + int err; + struct sock *psk; + struct alg_sock *pask; + struct skcipher_tfm *tfm; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + + if (ask->refcnt) + return 0; + + psk = ask->parent; + pask = alg_sk(ask->parent); + tfm = pask->private; + + err = -ENOKEY; + lock_sock(psk); + if (!tfm->has_key) + goto unlock; + + if (!pask->refcnt++) + sock_hold(psk); + + ask->refcnt = 1; + sock_put(psk); + + err = 0; + +unlock: + release_sock(psk); + + return err; +} + +static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t size) +{ + int err; + + err = skcipher_check_key(sock); + if (err) + return err; + + return skcipher_sendmsg(sock, msg, size); +} + +static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + int err; + + err = skcipher_check_key(sock); + if (err) + return err; + + return skcipher_sendpage(sock, page, offset, size, flags); +} + +static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) +{ + int err; + + err = skcipher_check_key(sock); + if (err) + return err; + + return skcipher_recvmsg(sock, msg, ignored, flags); +} + +static struct proto_ops algif_skcipher_ops_nokey = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .accept = sock_no_accept, + .setsockopt = sock_no_setsockopt, + + .release = af_alg_release, + .sendmsg = skcipher_sendmsg_nokey, + .sendpage = skcipher_sendpage_nokey, + .recvmsg = skcipher_recvmsg_nokey, + .poll = skcipher_poll, +}; + static void *skcipher_bind(const char *name, u32 type, u32 mask) { struct skcipher_tfm *tfm; @@ -804,7 +897,7 @@ static void skcipher_wait(struct sock *sk) msleep(100); } -static void skcipher_sock_destruct(struct sock *sk) +static void skcipher_sock_destruct_common(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; @@ -816,10 +909,33 @@ static void skcipher_sock_destruct(struct sock *sk) skcipher_free_sgl(sk); sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm)); sock_kfree_s(sk, ctx, ctx->len); +} + +static void skcipher_sock_destruct(struct sock *sk) +{ + skcipher_sock_destruct_common(sk); af_alg_release_parent(sk); } -static int skcipher_accept_parent(void *private, struct sock *sk) +static void skcipher_release_parent_nokey(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + + if (!ask->refcnt) { + sock_put(ask->parent); + return; + } + + af_alg_release_parent(sk); +} + +static void skcipher_sock_destruct_nokey(struct sock *sk) +{ + skcipher_sock_destruct_common(sk); + skcipher_release_parent_nokey(sk); +} + +static int skcipher_accept_parent_common(void *private, struct sock *sk) { struct skcipher_ctx *ctx; struct alg_sock *ask = alg_sk(sk); @@ -827,9 +943,6 @@ static int skcipher_accept_parent(void *private, struct sock *sk) struct crypto_ablkcipher *skcipher = tfm->skcipher; unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(skcipher); - if (!tfm->has_key) - return -ENOKEY; - ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -863,12 +976,38 @@ static int skcipher_accept_parent(void *private, struct sock *sk) return 0; } +static int skcipher_accept_parent(void *private, struct sock *sk) +{ + struct skcipher_tfm *tfm = private; + + if (!tfm->has_key) + return -ENOKEY; + + return skcipher_accept_parent_common(private, sk); +} + +static int skcipher_accept_parent_nokey(void *private, struct sock *sk) +{ + int err; + + err = skcipher_accept_parent_common(private, sk); + if (err) + goto out; + + sk->sk_destruct = skcipher_sock_destruct_nokey; + +out: + return err; +} + static const struct af_alg_type algif_type_skcipher = { .bind = skcipher_bind, .release = skcipher_release, .setkey = skcipher_setkey, .accept = skcipher_accept_parent, + .accept_nokey = skcipher_accept_parent_nokey, .ops = &algif_skcipher_ops, + .ops_nokey = &algif_skcipher_ops_nokey, .name = "skcipher", .owner = THIS_MODULE }; From 7327b23abeb498c103125f234c7698115db5970b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Feb 2016 12:44:10 +0100 Subject: [PATCH 1901/2091] Backport fix for crypto: algif_skcipher - Remove custom release parent function commit d7b65aee1e7b4c87922b0232eaba56a8a143a4a0 upstream. This patch removes the custom release parent function as the generic af_alg_release_parent now works for nokey sockets too. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_skcipher.c | 43 +++-------------------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 566df2c9991f4..83bcf75b358e0 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -897,7 +897,7 @@ static void skcipher_wait(struct sock *sk) msleep(100); } -static void skcipher_sock_destruct_common(struct sock *sk) +static void skcipher_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; @@ -909,33 +909,10 @@ static void skcipher_sock_destruct_common(struct sock *sk) skcipher_free_sgl(sk); sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm)); sock_kfree_s(sk, ctx, ctx->len); -} - -static void skcipher_sock_destruct(struct sock *sk) -{ - skcipher_sock_destruct_common(sk); - af_alg_release_parent(sk); -} - -static void skcipher_release_parent_nokey(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - - if (!ask->refcnt) { - sock_put(ask->parent); - return; - } - af_alg_release_parent(sk); } -static void skcipher_sock_destruct_nokey(struct sock *sk) -{ - skcipher_sock_destruct_common(sk); - skcipher_release_parent_nokey(sk); -} - -static int skcipher_accept_parent_common(void *private, struct sock *sk) +static int skcipher_accept_parent_nokey(void *private, struct sock *sk) { struct skcipher_ctx *ctx; struct alg_sock *ask = alg_sk(sk); @@ -983,21 +960,7 @@ static int skcipher_accept_parent(void *private, struct sock *sk) if (!tfm->has_key) return -ENOKEY; - return skcipher_accept_parent_common(private, sk); -} - -static int skcipher_accept_parent_nokey(void *private, struct sock *sk) -{ - int err; - - err = skcipher_accept_parent_common(private, sk); - if (err) - goto out; - - sk->sk_destruct = skcipher_sock_destruct_nokey; - -out: - return err; + return skcipher_accept_parent_nokey(private, sk); } static const struct af_alg_type algif_type_skcipher = { From 64c4131f16acba869e5280ee46e7dbf93fd8b960 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Feb 2016 12:44:11 +0100 Subject: [PATCH 1902/2091] Backport fix for crypto: algif_skcipher - Fix race condition in skcipher_check_key commit 1822793a523e5d5730b19cc21160ff1717421bc8 upstream. We need to lock the child socket in skcipher_check_key as otherwise two simultaneous calls can cause the parent socket to be freed. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/algif_skcipher.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 83bcf75b358e0..c0f03562a1457 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -757,22 +757,23 @@ static struct proto_ops algif_skcipher_ops = { static int skcipher_check_key(struct socket *sock) { - int err; + int err = 0; struct sock *psk; struct alg_sock *pask; struct skcipher_tfm *tfm; struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); + lock_sock(sk); if (ask->refcnt) - return 0; + goto unlock_child; psk = ask->parent; pask = alg_sk(ask->parent); tfm = pask->private; err = -ENOKEY; - lock_sock(psk); + lock_sock_nested(psk, SINGLE_DEPTH_NESTING); if (!tfm->has_key) goto unlock; @@ -786,6 +787,8 @@ static int skcipher_check_key(struct socket *sock) unlock: release_sock(psk); +unlock_child: + release_sock(sk); return err; } From 5955bf64814d8663117e9397373fb20f9f71dff0 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Mon, 12 Oct 2015 19:47:03 +0200 Subject: [PATCH 1903/2091] crypto: atmel - use devm_xxx() managed function [ Upstream commit b0e8b3417a620e6e0a91fd526fbc6db78714198e ] Using the devm_xxx() managed function to stripdown the error and remove code. Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/atmel-aes.c | 38 +++++++++---------------------------- drivers/crypto/atmel-sha.c | 27 +++++++++----------------- drivers/crypto/atmel-tdes.c | 29 +++++++--------------------- 3 files changed, 25 insertions(+), 69 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 0f9a9dc06a830..6178c1640ca99 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -1320,7 +1320,6 @@ static int atmel_aes_probe(struct platform_device *pdev) struct crypto_platform_data *pdata; struct device *dev = &pdev->dev; struct resource *aes_res; - unsigned long aes_phys_size; int err; pdata = pdev->dev.platform_data; @@ -1337,7 +1336,7 @@ static int atmel_aes_probe(struct platform_device *pdev) goto aes_dd_err; } - aes_dd = kzalloc(sizeof(struct atmel_aes_dev), GFP_KERNEL); + aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL); if (aes_dd == NULL) { dev_err(dev, "unable to alloc data struct.\n"); err = -ENOMEM; @@ -1368,36 +1367,35 @@ static int atmel_aes_probe(struct platform_device *pdev) goto res_err; } aes_dd->phys_base = aes_res->start; - aes_phys_size = resource_size(aes_res); /* Get the IRQ */ aes_dd->irq = platform_get_irq(pdev, 0); if (aes_dd->irq < 0) { dev_err(dev, "no IRQ resource info\n"); err = aes_dd->irq; - goto aes_irq_err; + goto res_err; } - err = request_irq(aes_dd->irq, atmel_aes_irq, IRQF_SHARED, "atmel-aes", - aes_dd); + err = devm_request_irq(&pdev->dev, aes_dd->irq, atmel_aes_irq, + IRQF_SHARED, "atmel-aes", aes_dd); if (err) { dev_err(dev, "unable to request aes irq.\n"); - goto aes_irq_err; + goto res_err; } /* Initializing the clock */ - aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); + aes_dd->iclk = devm_clk_get(&pdev->dev, "aes_clk"); if (IS_ERR(aes_dd->iclk)) { dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(aes_dd->iclk); - goto clk_err; + goto res_err; } - aes_dd->io_base = ioremap(aes_dd->phys_base, aes_phys_size); + aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res); if (!aes_dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto aes_io_err; + goto res_err; } atmel_aes_hw_version_init(aes_dd); @@ -1434,17 +1432,9 @@ static int atmel_aes_probe(struct platform_device *pdev) err_aes_dma: atmel_aes_buff_cleanup(aes_dd); err_aes_buff: - iounmap(aes_dd->io_base); -aes_io_err: - clk_put(aes_dd->iclk); -clk_err: - free_irq(aes_dd->irq, aes_dd); -aes_irq_err: res_err: tasklet_kill(&aes_dd->done_task); tasklet_kill(&aes_dd->queue_task); - kfree(aes_dd); - aes_dd = NULL; aes_dd_err: dev_err(dev, "initialization failed.\n"); @@ -1469,16 +1459,6 @@ static int atmel_aes_remove(struct platform_device *pdev) atmel_aes_dma_cleanup(aes_dd); - iounmap(aes_dd->io_base); - - clk_put(aes_dd->iclk); - - if (aes_dd->irq > 0) - free_irq(aes_dd->irq, aes_dd); - - kfree(aes_dd); - aes_dd = NULL; - return 0; } diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 5b35433c5399b..5f9da90db0605 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -1345,11 +1345,9 @@ static int atmel_sha_probe(struct platform_device *pdev) struct crypto_platform_data *pdata; struct device *dev = &pdev->dev; struct resource *sha_res; - unsigned long sha_phys_size; int err; - sha_dd = devm_kzalloc(&pdev->dev, sizeof(struct atmel_sha_dev), - GFP_KERNEL); + sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL); if (sha_dd == NULL) { dev_err(dev, "unable to alloc data struct.\n"); err = -ENOMEM; @@ -1378,7 +1376,6 @@ static int atmel_sha_probe(struct platform_device *pdev) goto res_err; } sha_dd->phys_base = sha_res->start; - sha_phys_size = resource_size(sha_res); /* Get the IRQ */ sha_dd->irq = platform_get_irq(pdev, 0); @@ -1388,26 +1385,26 @@ static int atmel_sha_probe(struct platform_device *pdev) goto res_err; } - err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha", - sha_dd); + err = devm_request_irq(&pdev->dev, sha_dd->irq, atmel_sha_irq, + IRQF_SHARED, "atmel-sha", sha_dd); if (err) { dev_err(dev, "unable to request sha irq.\n"); goto res_err; } /* Initializing the clock */ - sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); + sha_dd->iclk = devm_clk_get(&pdev->dev, "sha_clk"); if (IS_ERR(sha_dd->iclk)) { dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(sha_dd->iclk); - goto clk_err; + goto res_err; } - sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size); + sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res); if (!sha_dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto sha_io_err; + goto res_err; } atmel_sha_hw_version_init(sha_dd); @@ -1421,12 +1418,12 @@ static int atmel_sha_probe(struct platform_device *pdev) if (IS_ERR(pdata)) { dev_err(&pdev->dev, "platform data not available\n"); err = PTR_ERR(pdata); - goto err_pdata; + goto res_err; } } if (!pdata->dma_slave) { err = -ENXIO; - goto err_pdata; + goto res_err; } err = atmel_sha_dma_init(sha_dd, pdata); if (err) @@ -1457,12 +1454,6 @@ static int atmel_sha_probe(struct platform_device *pdev) if (sha_dd->caps.has_dma) atmel_sha_dma_cleanup(sha_dd); err_sha_dma: -err_pdata: - iounmap(sha_dd->io_base); -sha_io_err: - clk_put(sha_dd->iclk); -clk_err: - free_irq(sha_dd->irq, sha_dd); res_err: tasklet_kill(&sha_dd->done_task); sha_dd_err: diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index ca2999709eb4c..aaae20e724773 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -1355,7 +1355,6 @@ static int atmel_tdes_probe(struct platform_device *pdev) struct crypto_platform_data *pdata; struct device *dev = &pdev->dev; struct resource *tdes_res; - unsigned long tdes_phys_size; int err; tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL); @@ -1389,7 +1388,6 @@ static int atmel_tdes_probe(struct platform_device *pdev) goto res_err; } tdes_dd->phys_base = tdes_res->start; - tdes_phys_size = resource_size(tdes_res); /* Get the IRQ */ tdes_dd->irq = platform_get_irq(pdev, 0); @@ -1399,26 +1397,26 @@ static int atmel_tdes_probe(struct platform_device *pdev) goto res_err; } - err = request_irq(tdes_dd->irq, atmel_tdes_irq, IRQF_SHARED, - "atmel-tdes", tdes_dd); + err = devm_request_irq(&pdev->dev, tdes_dd->irq, atmel_tdes_irq, + IRQF_SHARED, "atmel-tdes", tdes_dd); if (err) { dev_err(dev, "unable to request tdes irq.\n"); - goto tdes_irq_err; + goto res_err; } /* Initializing the clock */ - tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); + tdes_dd->iclk = devm_clk_get(&pdev->dev, "tdes_clk"); if (IS_ERR(tdes_dd->iclk)) { dev_err(dev, "clock initialization failed.\n"); err = PTR_ERR(tdes_dd->iclk); - goto clk_err; + goto res_err; } - tdes_dd->io_base = ioremap(tdes_dd->phys_base, tdes_phys_size); + tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res); if (!tdes_dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto tdes_io_err; + goto res_err; } atmel_tdes_hw_version_init(tdes_dd); @@ -1474,12 +1472,6 @@ static int atmel_tdes_probe(struct platform_device *pdev) err_pdata: atmel_tdes_buff_cleanup(tdes_dd); err_tdes_buff: - iounmap(tdes_dd->io_base); -tdes_io_err: - clk_put(tdes_dd->iclk); -clk_err: - free_irq(tdes_dd->irq, tdes_dd); -tdes_irq_err: res_err: tasklet_kill(&tdes_dd->done_task); tasklet_kill(&tdes_dd->queue_task); @@ -1510,13 +1502,6 @@ static int atmel_tdes_remove(struct platform_device *pdev) atmel_tdes_buff_cleanup(tdes_dd); - iounmap(tdes_dd->io_base); - - clk_put(tdes_dd->iclk); - - if (tdes_dd->irq >= 0) - free_irq(tdes_dd->irq, tdes_dd); - return 0; } From 617c364c29ce8eefa9f0122fd01829adc2ac5d83 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Fri, 2 Oct 2015 14:12:58 +0200 Subject: [PATCH 1904/2091] crypto: atmel - Check for clk_prepare_enable() return value [ Upstream commit 9d83d299549d0e121245d56954242750d0c14338 ] clk_prepare_enable() can fail so add a check for this and return the error code if it fails. Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/atmel-aes.c | 6 +++++- drivers/crypto/atmel-sha.c | 6 +++++- drivers/crypto/atmel-tdes.c | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 6178c1640ca99..fb16d812c8f55 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -260,7 +260,11 @@ static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx) static int atmel_aes_hw_init(struct atmel_aes_dev *dd) { - clk_prepare_enable(dd->iclk); + int err; + + err = clk_prepare_enable(dd->iclk); + if (err) + return err; if (!(dd->flags & AES_FLAGS_INIT)) { atmel_aes_write(dd, AES_CR, AES_CR_SWRST); diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 5f9da90db0605..660d8c06540b9 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -794,7 +794,11 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err) static int atmel_sha_hw_init(struct atmel_sha_dev *dd) { - clk_prepare_enable(dd->iclk); + int err; + + err = clk_prepare_enable(dd->iclk); + if (err) + return err; if (!(SHA_FLAGS_INIT & dd->flags)) { atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST); diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index aaae20e724773..2c7a628d0375f 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -218,7 +218,11 @@ static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx) static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd) { - clk_prepare_enable(dd->iclk); + int err; + + err = clk_prepare_enable(dd->iclk); + if (err) + return err; if (!(dd->flags & TDES_FLAGS_INIT)) { atmel_tdes_write(dd, TDES_CR, TDES_CR_SWRST); From 0a86238d2e2ad9b74329622c72e99cacbbaf6485 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Fri, 5 Feb 2016 13:45:13 +0100 Subject: [PATCH 1905/2091] crypto: atmel-sha - remove calls of clk_prepare() from atomic contexts [ Upstream commit ee36c87a655325a7b5e442a9650a782db4ea20d2 ] commit c033042aa8f69894df37dabcaa0231594834a4e4 upstream. clk_prepare()/clk_unprepare() must not be called within atomic context. This patch calls clk_prepare() once for all from atmel_sha_probe() and clk_unprepare() from atmel_sha_remove(). Then calls of clk_prepare_enable()/clk_disable_unprepare() were replaced by calls of clk_enable()/clk_disable(). Signed-off-by: Cyrille Pitchen Reported-by: Matthias Mayr Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/crypto/atmel-sha.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 660d8c06540b9..a71c97c03c393 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -783,7 +783,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err) dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); - clk_disable_unprepare(dd->iclk); + clk_disable(dd->iclk); if (req->base.complete) req->base.complete(&req->base, err); @@ -796,7 +796,7 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd) { int err; - err = clk_prepare_enable(dd->iclk); + err = clk_enable(dd->iclk); if (err) return err; @@ -823,7 +823,7 @@ static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd) dev_info(dd->dev, "version: 0x%x\n", dd->hw_version); - clk_disable_unprepare(dd->iclk); + clk_disable(dd->iclk); } static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, @@ -1411,6 +1411,10 @@ static int atmel_sha_probe(struct platform_device *pdev) goto res_err; } + err = clk_prepare(sha_dd->iclk); + if (err) + goto res_err; + atmel_sha_hw_version_init(sha_dd); atmel_sha_get_cap(sha_dd); @@ -1422,12 +1426,12 @@ static int atmel_sha_probe(struct platform_device *pdev) if (IS_ERR(pdata)) { dev_err(&pdev->dev, "platform data not available\n"); err = PTR_ERR(pdata); - goto res_err; + goto iclk_unprepare; } } if (!pdata->dma_slave) { err = -ENXIO; - goto res_err; + goto iclk_unprepare; } err = atmel_sha_dma_init(sha_dd, pdata); if (err) @@ -1458,6 +1462,8 @@ static int atmel_sha_probe(struct platform_device *pdev) if (sha_dd->caps.has_dma) atmel_sha_dma_cleanup(sha_dd); err_sha_dma: +iclk_unprepare: + clk_unprepare(sha_dd->iclk); res_err: tasklet_kill(&sha_dd->done_task); sha_dd_err: @@ -1484,6 +1490,8 @@ static int atmel_sha_remove(struct platform_device *pdev) if (sha_dd->caps.has_dma) atmel_sha_dma_cleanup(sha_dd); + clk_unprepare(sha_dd->iclk); + iounmap(sha_dd->io_base); clk_put(sha_dd->iclk); From 8b6349ec0eefde22c3348b65804e343be1245a59 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 4 Feb 2016 11:45:16 -0500 Subject: [PATCH 1906/2091] qla2xxx: Use pci_enable_msix_range() instead of pci_enable_msix() [ Upstream commit 84e32a06f4f8756ce9ec3c8dc7e97896575f0771 ] As result of deprecation of MSI-X/MSI enablement functions pci_enable_msix() and pci_enable_msi_block() all drivers using these two interfaces need to be updated to use the new pci_enable_msi_range() or pci_enable_msi_exact() and pci_enable_msix_range() or pci_enable_msix_exact() interfaces. Log message code 0x00c6 preserved, although it is reported after successful call to pci_enable_msix_range(), not before possibly unsuccessful call to pci_enable_msix(). Consumers of the error code should not notice the difference. Signed-off-by: Alexander Gordeev Acked-by: Chad Dupuis Cc: qla2xxx-upstream@qlogic.com Cc: linux-scsi@vger.kernel.org Cc: linux-pci@vger.kernel.org Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_init.c | 10 +++++----- drivers/scsi/qla2xxx/qla_isr.c | 4 ++-- drivers/scsi/qla2xxx/qla_mid.c | 4 ++-- drivers/scsi/qla2xxx/qla_os.c | 6 ++++++ drivers/scsi/qla2xxx/qla_tmpl.c | 16 ++++++++++++++++ 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b323ad04ffb18..60f9651f26432 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2194,7 +2194,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) /* Clear outstanding commands array. */ for (que = 0; que < ha->max_req_queues; que++) { req = ha->req_q_map[que]; - if (!req) + if (!req || !test_bit(que, ha->req_qid_map)) continue; req->out_ptr = (void *)(req->ring + req->length); *req->out_ptr = 0; @@ -2211,7 +2211,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) for (que = 0; que < ha->max_rsp_queues; que++) { rsp = ha->rsp_q_map[que]; - if (!rsp) + if (!rsp || !test_bit(que, ha->rsp_qid_map)) continue; rsp->in_ptr = (void *)(rsp->ring + rsp->length); *rsp->in_ptr = 0; @@ -4957,7 +4957,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) for (i = 1; i < ha->max_rsp_queues; i++) { rsp = ha->rsp_q_map[i]; - if (rsp) { + if (rsp && test_bit(i, ha->rsp_qid_map)) { rsp->options &= ~BIT_0; ret = qla25xx_init_rsp_que(base_vha, rsp); if (ret != QLA_SUCCESS) @@ -4972,8 +4972,8 @@ qla25xx_init_queues(struct qla_hw_data *ha) } for (i = 1; i < ha->max_req_queues; i++) { req = ha->req_q_map[i]; - if (req) { - /* Clear outstanding commands array. */ + if (req && test_bit(i, ha->req_qid_map)) { + /* Clear outstanding commands array. */ req->options &= ~BIT_0; ret = qla25xx_init_req_que(base_vha, req); if (ret != QLA_SUCCESS) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 6dc14cd782b2a..1f3991ba75805 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2992,9 +2992,9 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) "MSI-X: Failed to enable support " "-- %d/%d\n Retry with %d vectors.\n", ha->msix_count, ret, ret); + ha->msix_count = ret; + ha->max_rsp_queues = ha->msix_count - 1; } - ha->msix_count = ret; - ha->max_rsp_queues = ha->msix_count - 1; ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) * ha->msix_count, GFP_KERNEL); if (!ha->msix_entries) { diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index cc94192511cf5..63abed122adf5 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -601,7 +601,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha) /* Delete request queues */ for (cnt = 1; cnt < ha->max_req_queues; cnt++) { req = ha->req_q_map[cnt]; - if (req) { + if (req && test_bit(cnt, ha->req_qid_map)) { ret = qla25xx_delete_req_que(vha, req); if (ret != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x00ea, @@ -615,7 +615,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha) /* Delete response queues */ for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) { rsp = ha->rsp_q_map[cnt]; - if (rsp) { + if (rsp && test_bit(cnt, ha->rsp_qid_map)) { ret = qla25xx_delete_rsp_que(vha, rsp); if (ret != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x00eb, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index e7a97a57a1ee1..d007255745770 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -398,6 +398,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha) int cnt; for (cnt = 0; cnt < ha->max_req_queues; cnt++) { + if (!test_bit(cnt, ha->req_qid_map)) + continue; + req = ha->req_q_map[cnt]; qla2x00_free_req_que(ha, req); } @@ -405,6 +408,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha) ha->req_q_map = NULL; for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) { + if (!test_bit(cnt, ha->rsp_qid_map)) + continue; + rsp = ha->rsp_q_map[cnt]; qla2x00_free_rsp_que(ha, rsp); } diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 962cb89fe0ae9..af806fdb0dbcd 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -395,6 +395,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, if (ent->t263.queue_type == T263_QUEUE_TYPE_REQ) { for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; + + if (!test_bit(i, vha->hw->req_qid_map)) + continue; + if (req || !buf) { length = req ? req->length : REQUEST_ENTRY_CNT_24XX; @@ -408,6 +412,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, } else if (ent->t263.queue_type == T263_QUEUE_TYPE_RSP) { for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; + + if (!test_bit(i, vha->hw->rsp_qid_map)) + continue; + if (rsp || !buf) { length = rsp ? rsp->length : RESPONSE_ENTRY_CNT_MQ; @@ -634,6 +642,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, if (ent->t274.queue_type == T274_QUEUE_TYPE_REQ_SHAD) { for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; + + if (!test_bit(i, vha->hw->req_qid_map)) + continue; + if (req || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); @@ -645,6 +657,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, } else if (ent->t274.queue_type == T274_QUEUE_TYPE_RSP_SHAD) { for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; + + if (!test_bit(i, vha->hw->rsp_qid_map)) + continue; + if (rsp || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); From 56819934d6148c68c85eb181f6153b36f028f99c Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 12 Jan 2016 15:14:46 -0800 Subject: [PATCH 1907/2091] serial: omap: Prevent DoS using unprivileged ioctl(TIOCSRS485) [ Upstream commit 308bbc9ab838d0ace0298268c7970ba9513e2c65 ] The omap-serial driver emulates RS485 delays using software timers, but neglects to clamp the input values from the unprivileged ioctl(TIOCSRS485). Because the software implementation busy-waits, malicious userspace could stall the cpu for ~49 days. Clamp the input values to < 100ms. Fixes: 4a0ac0f55b18 ("OMAP: add RS485 support") Cc: # 3.12+ Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/omap-serial.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7f49172ccd867..0a88693cd8ca0 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1368,7 +1368,7 @@ static inline void serial_omap_add_console_port(struct uart_omap_port *up) /* Enable or disable the rs485 support */ static int -serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf) +serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485) { struct uart_omap_port *up = to_uart_omap_port(port); unsigned int mode; @@ -1381,8 +1381,12 @@ serial_omap_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf) up->ier = 0; serial_out(up, UART_IER, 0); + /* Clamp the delays to [0, 100ms] */ + rs485->delay_rts_before_send = min(rs485->delay_rts_before_send, 100U); + rs485->delay_rts_after_send = min(rs485->delay_rts_after_send, 100U); + /* store new config */ - port->rs485 = *rs485conf; + port->rs485 = *rs485; /* * Just as a precaution, only allow rs485 From 6ca45550112e975be2298b0feda49b686029cc32 Mon Sep 17 00:00:00 2001 From: Jeremy McNicoll Date: Tue, 2 Feb 2016 13:00:45 -0800 Subject: [PATCH 1908/2091] tty: Add support for PCIe WCH382 2S multi-IO card [ Upstream commit 7dde55787b43a8f2b4021916db38d90c03a2ec64 ] WCH382 2S board is a PCIe card with 2 DB9 COM ports detected as Serial controller: Device 1c00:3253 (rev 10) (prog-if 05 [16850]) Signed-off-by: Jeremy McNicoll Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_pci.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 2fd163b756653..b82b2a0f82a35 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2002,6 +2002,7 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCIE_VENDOR_ID_WCH 0x1c00 #define PCIE_DEVICE_ID_WCH_CH382_2S1P 0x3250 #define PCIE_DEVICE_ID_WCH_CH384_4S 0x3470 +#define PCIE_DEVICE_ID_WCH_CH382_2S 0x3253 #define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358 #define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 @@ -2729,6 +2730,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_wch_ch353_setup, }, + /* WCH CH382 2S card (16850 clone) */ + { + .vendor = PCIE_VENDOR_ID_WCH, + .device = PCIE_DEVICE_ID_WCH_CH382_2S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_wch_ch38x_setup, + }, /* WCH CH382 2S1P card (16850 clone) */ { .vendor = PCIE_VENDOR_ID_WCH, @@ -3049,6 +3058,7 @@ enum pci_board_num_t { pbn_fintek_4, pbn_fintek_8, pbn_fintek_12, + pbn_wch382_2, pbn_wch384_4, pbn_pericom_PI7C9X7951, pbn_pericom_PI7C9X7952, @@ -3879,6 +3889,13 @@ static struct pciserial_board pci_boards[] = { .base_baud = 115200, .first_offset = 0x40, }, + [pbn_wch382_2] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + .first_offset = 0xC0, + }, [pbn_wch384_4] = { .flags = FL_BASE0, .num_ports = 4, @@ -5691,6 +5708,10 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_2_115200 }, + { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH382_2S, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_wch382_2 }, + { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_wch384_4 }, From 614f8734d11ad22ee17a5faecf355b70756904ef Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Mon, 11 Jan 2016 12:07:43 -0200 Subject: [PATCH 1909/2091] pty: fix possible use after free of tty->driver_data [ Upstream commit 2831c89f42dcde440cfdccb9fee9f42d54bbc1ef ] This change fixes a bug for a corner case where we have the the last release from a pty master/slave coming from a previously opened /dev/tty file. When this happens, the tty->driver_data can be stale, due to all ptmx or pts/N files having already been closed before (and thus the inode related to these files, which tty->driver_data points to, being already freed/destroyed). The fix here is to keep a reference on the opened master ptmx inode. We maintain the inode referenced until the final pty_unix98_shutdown, and only pass this inode to devpts_kill_index. Signed-off-by: Herton R. Krzesinski Cc: # 2.6.29+ Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/pty.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 4d5e8409769c3..5c260c872af26 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -672,7 +672,14 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) /* this is called once with whichever end is closed last */ static void pty_unix98_shutdown(struct tty_struct *tty) { - devpts_kill_index(tty->driver_data, tty->index); + struct inode *ptmx_inode; + + if (tty->driver->subtype == PTY_TYPE_MASTER) + ptmx_inode = tty->driver_data; + else + ptmx_inode = tty->link->driver_data; + devpts_kill_index(ptmx_inode, tty->index); + iput(ptmx_inode); /* drop reference we acquired at ptmx_open */ } static const struct tty_operations ptm_unix98_ops = { @@ -764,6 +771,15 @@ static int ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ tty->driver_data = inode; + /* + * In the case where all references to ptmx inode are dropped and we + * still have /dev/tty opened pointing to the master/slave pair (ptmx + * is closed/released before /dev/tty), we must make sure that the inode + * is still valid when we call the final pty_unix98_shutdown, thus we + * hold an additional reference to the ptmx inode + */ + ihold(inode); + tty_add_file(tty, filp); slave_inode = devpts_pty_new(inode, From 38bdf7e3fb81c3d9cbbacab6f96e9fd28ff6c704 Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Thu, 14 Jan 2016 17:56:58 -0200 Subject: [PATCH 1910/2091] pty: make sure super_block is still valid in final /dev/tty close [ Upstream commit 1f55c718c290616889c04946864a13ef30f64929 ] Considering current pty code and multiple devpts instances, it's possible to umount a devpts file system while a program still has /dev/tty opened pointing to a previosuly closed pty pair in that instance. In the case all ptmx and pts/N files are closed, umount can be done. If the program closes /dev/tty after umount is done, devpts_kill_index will use now an invalid super_block, which was already destroyed in the umount operation after running ->kill_sb. This is another "use after free" type of issue, but now related to the allocated super_block instance. To avoid the problem (warning at ida_remove and potential crashes) for this specific case, I added two functions in devpts which grabs additional references to the super_block, which pty code now uses so it makes sure the super block structure is still valid until pty shutdown is done. I also moved the additional inode references to the same functions, which also covered similar case with inode being freed before /dev/tty final close/shutdown. Signed-off-by: Herton R. Krzesinski Cc: stable@vger.kernel.org # 2.6.29+ Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/pty.c | 9 ++++++--- fs/devpts/inode.c | 20 ++++++++++++++++++++ include/linux/devpts_fs.h | 4 ++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 5c260c872af26..254c183a5efed 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -679,7 +679,7 @@ static void pty_unix98_shutdown(struct tty_struct *tty) else ptmx_inode = tty->link->driver_data; devpts_kill_index(ptmx_inode, tty->index); - iput(ptmx_inode); /* drop reference we acquired at ptmx_open */ + devpts_del_ref(ptmx_inode); } static const struct tty_operations ptm_unix98_ops = { @@ -776,9 +776,12 @@ static int ptmx_open(struct inode *inode, struct file *filp) * still have /dev/tty opened pointing to the master/slave pair (ptmx * is closed/released before /dev/tty), we must make sure that the inode * is still valid when we call the final pty_unix98_shutdown, thus we - * hold an additional reference to the ptmx inode + * hold an additional reference to the ptmx inode. For the same /dev/tty + * last close case, we also need to make sure the super_block isn't + * destroyed (devpts instance unmounted), before /dev/tty is closed and + * on its release devpts_kill_index is called. */ - ihold(inode); + devpts_add_ref(inode); tty_add_file(tty, filp); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index add566303c684..91360444adf5e 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -569,6 +569,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_unlock(&allocated_ptys_lock); } +/* + * pty code needs to hold extra references in case of last /dev/tty close + */ + +void devpts_add_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + atomic_inc(&sb->s_active); + ihold(ptmx_inode); +} + +void devpts_del_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + iput(ptmx_inode); + deactivate_super(sb); +} + /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 251a2090a5544..e0ee0b3000b2d 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,8 @@ int devpts_new_index(struct inode *ptmx_inode); void devpts_kill_index(struct inode *ptmx_inode, int idx); +void devpts_add_ref(struct inode *ptmx_inode); +void devpts_del_ref(struct inode *ptmx_inode); /* mknod in devpts */ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv); @@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode); /* Dummy stubs in the no-pty case */ static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } +static inline void devpts_add_ref(struct inode *ptmx_inode) { } +static inline void devpts_del_ref(struct inode *ptmx_inode) { } static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv) { From 485037307b65916364a5d9ac77e44d5cb64073f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 7 Feb 2016 09:38:26 +0100 Subject: [PATCH 1911/2091] ALSA: hda - Fix speaker output from VAIO AiO machines [ Upstream commit c44d9b1181cf34e0860c72cc8a00e0c47417aac0 ] Some Sony VAIO AiO models (VGC-JS4EF and VGC-JS25G, both with PCI SSID 104d:9044) need the same quirk to make the speaker working properly. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112031 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e589d5ff1d0bb..92d85c3c76576 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2228,6 +2228,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), + SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), /* All Apple entries are in codec SSIDs */ SND_PCI_QUIRK(0x106b, 0x00a0, "MacBookPro 3,1", ALC889_FIXUP_MBP_VREF), From 84ec02eecb3b257f14cb8b10ffcd73d5420c96c0 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 13 Jan 2016 08:10:31 -0800 Subject: [PATCH 1912/2091] klist: fix starting point removed bug in klist iterators [ Upstream commit 00cd29b799e3449f0c68b1cc77cd4a5f95b42d17 ] The starting node for a klist iteration is often passed in from somewhere way above the klist infrastructure, meaning there's no guarantee the node is still on the list. We've seen this in SCSI where we use bus_find_device() to iterate through a list of devices. In the face of heavy hotplug activity, the last device returned by bus_find_device() can be removed before the next call. This leads to Dec 3 13:22:02 localhost kernel: WARNING: CPU: 2 PID: 28073 at include/linux/kref.h:47 klist_iter_init_node+0x3d/0x50() Dec 3 13:22:02 localhost kernel: Modules linked in: scsi_debug x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel joydev iTCO_wdt dcdbas ipmi_devintf acpi_power_meter iTCO_vendor_support ipmi_si imsghandler pcspkr wmi acpi_cpufreq tpm_tis tpm shpchp lpc_ich mfd_core nfsd nfs_acl lockd grace sunrpc tg3 ptp pps_core Dec 3 13:22:02 localhost kernel: CPU: 2 PID: 28073 Comm: cat Not tainted 4.4.0-rc1+ #2 Dec 3 13:22:02 localhost kernel: Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013 Dec 3 13:22:02 localhost kernel: ffffffff81a20e77 ffff880613acfd18 ffffffff81321eef 0000000000000000 Dec 3 13:22:02 localhost kernel: ffff880613acfd50 ffffffff8107ca52 ffff88061176b198 0000000000000000 Dec 3 13:22:02 localhost kernel: ffffffff814542b0 ffff880610cfb100 ffff88061176b198 ffff880613acfd60 Dec 3 13:22:02 localhost kernel: Call Trace: Dec 3 13:22:02 localhost kernel: [] dump_stack+0x44/0x55 Dec 3 13:22:02 localhost kernel: [] warn_slowpath_common+0x82/0xc0 Dec 3 13:22:02 localhost kernel: [] ? proc_scsi_show+0x20/0x20 Dec 3 13:22:02 localhost kernel: [] warn_slowpath_null+0x1a/0x20 Dec 3 13:22:02 localhost kernel: [] klist_iter_init_node+0x3d/0x50 Dec 3 13:22:02 localhost kernel: [] bus_find_device+0x51/0xb0 Dec 3 13:22:02 localhost kernel: [] scsi_seq_next+0x2d/0x40 [...] And an eventual crash. It can actually occur in any hotplug system which has a device finder and a starting device. We can fix this globally by making sure the starting node for klist_iter_init_node() is actually a member of the list before using it (and by starting from the beginning if it isn't). Reported-by: Ewan D. Milne Tested-by: Ewan D. Milne Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- lib/klist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/klist.c b/lib/klist.c index 89b485a2a58d1..2a072bfaeacef 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -282,9 +282,9 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n) { i->i_klist = k; - i->i_cur = n; - if (n) - kref_get(&n->n_ref); + i->i_cur = NULL; + if (n && kref_get_unless_zero(&n->n_ref)) + i->i_cur = n; } EXPORT_SYMBOL_GPL(klist_iter_init_node); From c5e67773f634806feecb3779baf2158e182ccd8a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 2 Feb 2016 15:27:36 +0100 Subject: [PATCH 1913/2091] ALSA: dummy: Implement timer backend switching more safely [ Upstream commit ddce57a6f0a2d8d1bfacfa77f06043bc760403c2 ] Currently the selected timer backend is referred at any moment from the running PCM callbacks. When the backend is switched, it's possible to lead to inconsistency from the running backend. This was pointed by syzkaller fuzzer, and the commit [7ee96216c31a: ALSA: dummy: Disable switching timer backend via sysfs] disabled the dynamic switching for avoiding the crash. This patch improves the handling of timer backend switching. It keeps the reference to the selected backend during the whole operation of an opened stream so that it won't be changed by other streams. Together with this change, the hrtimer parameter is reenabled as writable now. NOTE: this patch also turned out to fix the still remaining race. Namely, ops was still replaced dynamically at dummy_pcm_open: static int dummy_pcm_open(struct snd_pcm_substream *substream) { .... dummy->timer_ops = &dummy_systimer_ops; if (hrtimer) dummy->timer_ops = &dummy_hrtimer_ops; Since dummy->timer_ops is common among all streams, and when the replacement happens during accesses of other streams, it may lead to a crash. This was actually triggered by syzkaller fuzzer and KASAN. This patch rewrites the code not to use the ops shared by all streams any longer, too. BugLink: http://lkml.kernel.org/r/CACT4Y+aZ+xisrpuM6cOXbL21DuM0yVxPYXf4cD4Md9uw0C3dBQ@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/drivers/dummy.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 96592d5ba7bfe..c5d5217a41804 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver."); module_param(fake_buffer, bool, 0444); MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations."); #ifdef CONFIG_HIGH_RES_TIMERS -module_param(hrtimer, bool, 0444); +module_param(hrtimer, bool, 0644); MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source."); #endif @@ -109,6 +109,9 @@ struct dummy_timer_ops { snd_pcm_uframes_t (*pointer)(struct snd_pcm_substream *); }; +#define get_dummy_ops(substream) \ + (*(const struct dummy_timer_ops **)(substream)->runtime->private_data) + struct dummy_model { const char *name; int (*playback_constraints)(struct snd_pcm_runtime *runtime); @@ -137,7 +140,6 @@ struct snd_dummy { int iobox; struct snd_kcontrol *cd_volume_ctl; struct snd_kcontrol *cd_switch_ctl; - const struct dummy_timer_ops *timer_ops; }; /* @@ -231,6 +233,8 @@ struct dummy_model *dummy_models[] = { */ struct dummy_systimer_pcm { + /* ops must be the first item */ + const struct dummy_timer_ops *timer_ops; spinlock_t lock; struct timer_list timer; unsigned long base_time; @@ -366,6 +370,8 @@ static struct dummy_timer_ops dummy_systimer_ops = { */ struct dummy_hrtimer_pcm { + /* ops must be the first item */ + const struct dummy_timer_ops *timer_ops; ktime_t base_time; ktime_t period_time; atomic_t running; @@ -492,31 +498,25 @@ static struct dummy_timer_ops dummy_hrtimer_ops = { static int dummy_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: - return dummy->timer_ops->start(substream); + return get_dummy_ops(substream)->start(substream); case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: - return dummy->timer_ops->stop(substream); + return get_dummy_ops(substream)->stop(substream); } return -EINVAL; } static int dummy_pcm_prepare(struct snd_pcm_substream *substream) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - - return dummy->timer_ops->prepare(substream); + return get_dummy_ops(substream)->prepare(substream); } static snd_pcm_uframes_t dummy_pcm_pointer(struct snd_pcm_substream *substream) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - - return dummy->timer_ops->pointer(substream); + return get_dummy_ops(substream)->pointer(substream); } static struct snd_pcm_hardware dummy_pcm_hardware = { @@ -562,17 +562,19 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream) struct snd_dummy *dummy = snd_pcm_substream_chip(substream); struct dummy_model *model = dummy->model; struct snd_pcm_runtime *runtime = substream->runtime; + const struct dummy_timer_ops *ops; int err; - dummy->timer_ops = &dummy_systimer_ops; + ops = &dummy_systimer_ops; #ifdef CONFIG_HIGH_RES_TIMERS if (hrtimer) - dummy->timer_ops = &dummy_hrtimer_ops; + ops = &dummy_hrtimer_ops; #endif - err = dummy->timer_ops->create(substream); + err = ops->create(substream); if (err < 0) return err; + get_dummy_ops(substream) = ops; runtime->hw = dummy->pcm_hw; if (substream->pcm->device & 1) { @@ -594,7 +596,7 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream) err = model->capture_constraints(substream->runtime); } if (err < 0) { - dummy->timer_ops->free(substream); + get_dummy_ops(substream)->free(substream); return err; } return 0; @@ -602,8 +604,7 @@ static int dummy_pcm_open(struct snd_pcm_substream *substream) static int dummy_pcm_close(struct snd_pcm_substream *substream) { - struct snd_dummy *dummy = snd_pcm_substream_chip(substream); - dummy->timer_ops->free(substream); + get_dummy_ops(substream)->free(substream); return 0; } From 3c9dc5750d0c3052ba7a3bf0173ba429bc96f323 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 Feb 2016 12:50:49 +0200 Subject: [PATCH 1914/2091] drm/i915/dsi: defend gpio table against out of bounds access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4db3a2448ec8902310acb78de39b6227a9a56ac8 ] Do not blindly trust the VBT data used for indexing. Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/cc32d40c2b47f2d2151811855ac2c3dabab1d57d.1454582914.git.jani.nikula@intel.com (cherry picked from commit 5d2d0a12d3d08bf50434f0b5947bb73bac04b941) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index d2cd8d5b27a16..a5e8be0baf298 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -209,6 +209,11 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) /* pull up/down */ action = *data++; + if (gpio >= ARRAY_SIZE(gtable)) { + DRM_DEBUG_KMS("unknown gpio %u\n", gpio); + goto out; + } + function = gtable[gpio].function_reg; pad = gtable[gpio].pad_reg; @@ -226,6 +231,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) vlv_gpio_nc_write(dev_priv, pad, val); mutex_unlock(&dev_priv->dpio_lock); +out: return data; } From 6b20ea56ddee5353a29616692723c80793fb05c5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 Feb 2016 12:50:50 +0200 Subject: [PATCH 1915/2091] drm/i915/dsi: don't pass arbitrary data to sideband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 26f6f2d301c1fb46acb1138ee155125815239b0d ] Since sequence block v2 the second byte contains flags other than just pull up/down. Don't pass arbitrary data to the sideband interface. The rest may or may not work for sequence block v2, but there should be no harm done. Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/ebe3c2eee623afc4b3a134533b01f8d591d13f32.1454582914.git.jani.nikula@intel.com (cherry picked from commit 4e1c63e3761b84ec7d87c75b58bbc8bcf18e98ee) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index a5e8be0baf298..82f8e20cca740 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -207,7 +207,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) gpio = *data++; /* pull up/down */ - action = *data++; + action = *data++ & 1; if (gpio >= ARRAY_SIZE(gtable)) { DRM_DEBUG_KMS("unknown gpio %u\n", gpio); From 47eef3475c5f3063fc20e938f2fe24cd84d731aa Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Feb 2016 17:36:25 +0100 Subject: [PATCH 1916/2091] ALSA: timer: Fix wrong instance passed to slave callbacks [ Upstream commit 117159f0b9d392fb433a7871426fad50317f06f7 ] In snd_timer_notify1(), the wrong timer instance was passed for slave ccallback function. This leads to the access to the wrong data when an incompatible master is handled (e.g. the master is the sequencer timer and the slave is a user timer), as spotted by syzkaller fuzzer. This patch fixes that wrong assignment. BugLink: http://lkml.kernel.org/r/CACT4Y+Y_Bm+7epAb=8Wi=AaWd+DYS7qawX52qxdCfOfY49vozQ@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 00e8c5f4de177..4493dc317d437 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -422,7 +422,7 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event) spin_lock_irqsave(&timer->lock, flags); list_for_each_entry(ts, &ti->slave_active_head, active_list) if (ts->ccallback) - ts->ccallback(ti, event + 100, &tstamp, resolution); + ts->ccallback(ts, event + 100, &tstamp, resolution); spin_unlock_irqrestore(&timer->lock, flags); } From ca54f8d139ae5b261d3dba4c71bb08b46fc8f9ea Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 8 Feb 2016 09:14:37 +0100 Subject: [PATCH 1917/2091] ARM: 8517/1: ICST: avoid arithmetic overflow in icst_hz() [ Upstream commit 5070fb14a0154f075c8b418e5bc58a620ae85a45 ] When trying to set the ICST 307 clock to 25174000 Hz I ran into this arithmetic error: the icst_hz_to_vco() correctly figure out DIVIDE=2, RDW=100 and VDW=99 yielding a frequency of 25174000 Hz out of the VCO. (I replicated the icst_hz() function in a spreadsheet to verify this.) However, when I called icst_hz() on these VCO settings it would instead return 4122709 Hz. This causes an error in the common clock driver for ICST as the common clock framework will call .round_rate() on the clock which will utilize icst_hz_to_vco() followed by icst_hz() suggesting the erroneous frequency, and then the clock gets set to this. The error did not manifest in the old clock framework since this high frequency was only used by the CLCD, which calls clk_set_rate() without first calling clk_round_rate() and since the old clock framework would not call clk_round_rate() before setting the frequency, the correct values propagated into the VCO. After some experimenting I figured out that it was due to a simple arithmetic overflow: the divisor for 24Mhz reference frequency as reference becomes 24000000*2*(99+8)=0x132212400 and the "1" in bit 32 overflows and is lost. But introducing an explicit 64-by-32 bit do_div() and casting the divisor into (u64) we get the right frequency back, and the right frequency gets set. Tested on the ARM Versatile. Cc: stable@vger.kernel.org Cc: linux-clk@vger.kernel.org Cc: Pawel Moll Signed-off-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/common/icst.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/common/icst.c b/arch/arm/common/icst.c index 2dc6da70ae598..d3c0e69df2591 100644 --- a/arch/arm/common/icst.c +++ b/arch/arm/common/icst.c @@ -16,7 +16,7 @@ */ #include #include - +#include #include /* @@ -29,7 +29,11 @@ EXPORT_SYMBOL(icst525_s2div); unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco) { - return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]); + u64 dividend = p->ref * 2 * (u64)(vco.v + 8); + u32 divisor = (vco.r + 2) * p->s2div[vco.s]; + + do_div(dividend, divisor); + return (unsigned long)dividend; } EXPORT_SYMBOL(icst_hz); From 9b7283948d5f83ac692ed0243e17e2f1d3843c6c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Feb 2016 12:02:32 +0100 Subject: [PATCH 1918/2091] ALSA: timer: Fix race between stop and interrupt [ Upstream commit ed8b1d6d2c741ab26d60d499d7fbb7ac801f0f51 ] A slave timer element also unlinks at snd_timer_stop() but it takes only slave_active_lock. When a slave is assigned to a master, however, this may become a race against the master's interrupt handling, eventually resulting in a list corruption. The actual bug could be seen with a syzkaller fuzzer test case in BugLink below. As a fix, we need to take timeri->timer->lock when timer isn't NULL, i.e. assigned to a master, while the assignment to a master itself is protected by slave_active_lock. BugLink: http://lkml.kernel.org/r/CACT4Y+Y_Bm+7epAb=8Wi=AaWd+DYS7qawX52qxdCfOfY49vozQ@mail.gmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 4493dc317d437..72934243bcdac 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -518,9 +518,13 @@ static int _snd_timer_stop(struct snd_timer_instance *timeri, int event) spin_unlock_irqrestore(&slave_active_lock, flags); return -EBUSY; } + if (timeri->timer) + spin_lock(&timeri->timer->lock); timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); + if (timeri->timer) + spin_unlock(&timeri->timer->lock); spin_unlock_irqrestore(&slave_active_lock, flags); goto __end; } From dbe8ccc27ee453e435597e636b4c7445fdc8c6d5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Feb 2016 10:23:52 +0100 Subject: [PATCH 1919/2091] ALSA: hda - Fix bad dereference of jack object [ Upstream commit 2ebab40eb74a0225d5dfba72bfae317dd948fa2d ] The hda_jack_tbl entries are managed by snd_array for allowing multiple jacks. It's good per se, but the problem is that struct hda_jack_callback keeps the hda_jack_tbl pointer. Since snd_array doesn't preserve each pointer at resizing the array, we can't keep the original pointer but have to deduce the pointer at each time via snd_array_entry() instead. Actually, this resulted in the deference to the wrong pointer on codecs that have many pins such as CS4208. This patch replaces the pointer to the NID value as the search key. As an unexpected good side effect, this even simplifies the code, as only NID is needed in most cases. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_generic.c | 4 ++-- sound/pci/hda/hda_jack.c | 2 +- sound/pci/hda/hda_jack.h | 2 +- sound/pci/hda/patch_ca0132.c | 5 ++++- sound/pci/hda/patch_hdmi.c | 2 +- sound/pci/hda/patch_realtek.c | 2 +- sound/pci/hda/patch_sigmatel.c | 6 +++--- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 5bc7f2e2715cb..194627c6c42b3 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3998,9 +3998,9 @@ static void pin_power_callback(struct hda_codec *codec, struct hda_jack_callback *jack, bool on) { - if (jack && jack->tbl->nid) + if (jack && jack->nid) sync_power_state_change(codec, - set_pin_power_jack(codec, jack->tbl->nid, on)); + set_pin_power_jack(codec, jack->nid, on)); } /* callback only doing power up -- called at first */ diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c index d7cfe7b8c32b1..52cc36758dd42 100644 --- a/sound/pci/hda/hda_jack.c +++ b/sound/pci/hda/hda_jack.c @@ -259,7 +259,7 @@ snd_hda_jack_detect_enable_callback(struct hda_codec *codec, hda_nid_t nid, if (!callback) return ERR_PTR(-ENOMEM); callback->func = func; - callback->tbl = jack; + callback->nid = jack->nid; callback->next = jack->callback; jack->callback = callback; } diff --git a/sound/pci/hda/hda_jack.h b/sound/pci/hda/hda_jack.h index b279e327a23b3..a13c11c3ddbb6 100644 --- a/sound/pci/hda/hda_jack.h +++ b/sound/pci/hda/hda_jack.h @@ -21,7 +21,7 @@ struct hda_jack_callback; typedef void (*hda_jack_callback_fn) (struct hda_codec *, struct hda_jack_callback *); struct hda_jack_callback { - struct hda_jack_tbl *tbl; + hda_nid_t nid; hda_jack_callback_fn func; unsigned int private_data; /* arbitrary data */ struct hda_jack_callback *next; diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 4a4e7b282e4f8..0374bd5b61c82 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -4401,13 +4401,16 @@ static void ca0132_process_dsp_response(struct hda_codec *codec, static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) { struct ca0132_spec *spec = codec->spec; + struct hda_jack_tbl *tbl; /* Delay enabling the HP amp, to let the mic-detection * state machine run. */ cancel_delayed_work_sync(&spec->unsol_hp_work); schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); - cb->tbl->block_report = 1; + tbl = snd_hda_jack_tbl_get(codec, cb->nid); + if (tbl) + tbl->block_report = 1; } static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 063d82345bb45..51d519554744e 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1179,7 +1179,7 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid) static void jack_callback(struct hda_codec *codec, struct hda_jack_callback *jack) { - check_presence_and_report(codec, jack->tbl->nid); + check_presence_and_report(codec, jack->nid); } static void hdmi_intrinsic_event(struct hda_codec *codec, unsigned int res) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 92d85c3c76576..df34c78a6ced9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -277,7 +277,7 @@ static void alc_update_knob_master(struct hda_codec *codec, uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (!uctl) return; - val = snd_hda_codec_read(codec, jack->tbl->nid, 0, + val = snd_hda_codec_read(codec, jack->nid, 0, AC_VERB_GET_VOLUME_KNOB_CONTROL, 0); val &= HDA_AMP_VOLMASK; uctl->value.integer.value[0] = val; diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 8e7d4c087a7a8..840178a26a6b4 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -493,9 +493,9 @@ static void jack_update_power(struct hda_codec *codec, if (!spec->num_pwrs) return; - if (jack && jack->tbl->nid) { - stac_toggle_power_map(codec, jack->tbl->nid, - snd_hda_jack_detect(codec, jack->tbl->nid), + if (jack && jack->nid) { + stac_toggle_power_map(codec, jack->nid, + snd_hda_jack_detect(codec, jack->nid), true); return; } From 0163f1a71f10b25eae8d7019124cd7f1141b109a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Feb 2016 17:26:58 +0100 Subject: [PATCH 1920/2091] ALSA: timer: Fix race at concurrent reads [ Upstream commit 4dff5c7b7093b19c19d3a100f8a3ad87cb7cd9e7 ] snd_timer_user_read() has a potential race among parallel reads, as qhead and qused are updated outside the critical section due to copy_to_user() calls. Move them into the critical section, and also sanitize the relevant code a bit. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 72934243bcdac..bf48e71f73cde 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1924,6 +1924,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, { struct snd_timer_user *tu; long result = 0, unit; + int qhead; int err = 0; tu = file->private_data; @@ -1935,7 +1936,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) { err = -EAGAIN; - break; + goto _error; } set_current_state(TASK_INTERRUPTIBLE); @@ -1950,42 +1951,37 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, if (tu->disconnected) { err = -ENODEV; - break; + goto _error; } if (signal_pending(current)) { err = -ERESTARTSYS; - break; + goto _error; } } + qhead = tu->qhead++; + tu->qhead %= tu->queue_size; spin_unlock_irq(&tu->qlock); - if (err < 0) - goto _error; if (tu->tread) { - if (copy_to_user(buffer, &tu->tqueue[tu->qhead++], - sizeof(struct snd_timer_tread))) { + if (copy_to_user(buffer, &tu->tqueue[qhead], + sizeof(struct snd_timer_tread))) err = -EFAULT; - goto _error; - } } else { - if (copy_to_user(buffer, &tu->queue[tu->qhead++], - sizeof(struct snd_timer_read))) { + if (copy_to_user(buffer, &tu->queue[qhead], + sizeof(struct snd_timer_read))) err = -EFAULT; - goto _error; - } } - tu->qhead %= tu->queue_size; - - result += unit; - buffer += unit; - spin_lock_irq(&tu->qlock); tu->qused--; + if (err < 0) + goto _error; + result += unit; + buffer += unit; } - spin_unlock_irq(&tu->qlock); _error: + spin_unlock_irq(&tu->qlock); return result > 0 ? result : err; } From 68fce03ba7901aa338a566292a59e6a753948861 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Feb 2016 16:11:26 -0500 Subject: [PATCH 1921/2091] Revert "workqueue: make sure delayed work run in local cpu" [ Upstream commit 041bd12e272c53a35c54c13875839bcb98c999ce ] This reverts commit 874bbfe600a660cba9c776b3957b1ce393151b76. Workqueue used to implicity guarantee that work items queued without explicit CPU specified are put on the local CPU. Recent changes in timer broke the guarantee and led to vmstat breakage which was fixed by 176bed1de5bf ("vmstat: explicitly schedule per-cpu work on the CPU we need it to run on"). vmstat is the most likely to expose the issue and it's quite possible that there are other similar problems which are a lot more difficult to trigger. As a preventive measure, 874bbfe600a6 ("workqueue: make sure delayed work run in local cpu") was applied to restore the local CPU guarnatee. Unfortunately, the change exposed a bug in timer code which got fixed by 22b886dd1018 ("timers: Use proper base migration in add_timer_on()"). Due to code restructuring, the commit couldn't be backported beyond certain point and stable kernels which only had 874bbfe600a6 started crashing. The local CPU guarantee was accidental more than anything else and we want to get rid of it anyway. As, with the vmstat case fixed, 874bbfe600a6 is causing more problems than it's fixing, it has been decided to take the chance and officially break the guarantee by reverting the commit. A debug feature will be added to force foreign CPU assignment to expose cases relying on the guarantee and fixes for the individual cases will be backported to stable as necessary. Signed-off-by: Tejun Heo Fixes: 874bbfe600a6 ("workqueue: make sure delayed work run in local cpu") Link: http://lkml.kernel.org/g/20160120211926.GJ10810@quack.suse.cz Cc: stable@vger.kernel.org Cc: Mike Galbraith Cc: Henrique de Moraes Holschuh Cc: Daniel Bilik Cc: Jan Kara Cc: Shaohua Li Cc: Sasha Levin Cc: Ben Hutchings Cc: Thomas Gleixner Cc: Daniel Bilik Cc: Jiri Slaby Cc: Michal Hocko Signed-off-by: Sasha Levin --- kernel/workqueue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5c01664c26e25..586ad91300b0f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1451,13 +1451,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, timer_stats_timer_set_start_info(&dwork->timer); dwork->wq = wq; - /* timer isn't guaranteed to run in this cpu, record earlier */ - if (cpu == WORK_CPU_UNBOUND) - cpu = raw_smp_processor_id(); dwork->cpu = cpu; timer->expires = jiffies + delay; - add_timer_on(timer, cpu); + if (unlikely(cpu != WORK_CPU_UNBOUND)) + add_timer_on(timer, cpu); + else + add_timer(timer); } /** From 7822d8eb3551dce8f0e594807d030cf8be342540 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 28 Jan 2016 16:14:18 +0800 Subject: [PATCH 1922/2091] phy: core: fix wrong err handle for phy_power_on [ Upstream commit b82fcabe212a11698fd4b3e604d2f81d929d22f6 ] If phy_pm_runtime_get_sync failed but we already enable regulator, current code return directly without doing regulator_disable. This patch fix this problem and cleanup err handle of phy_power_on to be more readable. Fixes: 3be88125d85d ("phy: core: Support regulator ...") Cc: # v3.18+ Cc: Roger Quadros Cc: Axel Lin Signed-off-by: Shawn Lin Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/phy-core.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 63bc12d7a73e5..153e0a27c7ee9 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -275,20 +275,21 @@ EXPORT_SYMBOL_GPL(phy_exit); int phy_power_on(struct phy *phy) { - int ret; + int ret = 0; if (!phy) - return 0; + goto out; if (phy->pwr) { ret = regulator_enable(phy->pwr); if (ret) - return ret; + goto out; } ret = phy_pm_runtime_get_sync(phy); if (ret < 0 && ret != -ENOTSUPP) - return ret; + goto err_pm_sync; + ret = 0; /* Override possible ret == -ENOTSUPP */ mutex_lock(&phy->mutex); @@ -296,19 +297,20 @@ int phy_power_on(struct phy *phy) ret = phy->ops->power_on(phy); if (ret < 0) { dev_err(&phy->dev, "phy poweron failed --> %d\n", ret); - goto out; + goto err_pwr_on; } } ++phy->power_count; mutex_unlock(&phy->mutex); return 0; -out: +err_pwr_on: mutex_unlock(&phy->mutex); phy_pm_runtime_put_sync(phy); +err_pm_sync: if (phy->pwr) regulator_disable(phy->pwr); - +out: return ret; } EXPORT_SYMBOL_GPL(phy_power_on); From feebbdd7d01430b5ca620bec2b637909d498fa0f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 30 Nov 2015 21:39:53 -0800 Subject: [PATCH 1923/2091] phy: twl4030-usb: Relase usb phy on unload [ Upstream commit b241d31ef2f6a289d33dcaa004714b26e06f476f ] Otherwise rmmod omap2430; rmmod phy-twl4030-usb; modprobe omap2430 will try to use a non-existing phy and oops: Unable to handle kernel paging request at virtual address b6f7c1f0 ... [] (devm_usb_get_phy_by_node) from [] (omap2430_musb_init+0x44/0x2b4 [omap2430]) [] (omap2430_musb_init [omap2430]) from [] (musb_init_controller+0x194/0x878 [musb_hdrc]) Cc: stable@vger.kernel.org Cc: Bin Liu Cc: Felipe Balbi Cc: Kishon Vijay Abraham I Cc: NeilBrown Signed-off-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/phy-twl4030-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index 6285f46f3ddb4..f8c818e96314d 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -754,6 +754,7 @@ static int twl4030_usb_remove(struct platform_device *pdev) struct twl4030_usb *twl = platform_get_drvdata(pdev); int val; + usb_remove_phy(&twl->phy); pm_runtime_get_sync(twl->dev); cancel_delayed_work(&twl->id_workaround_work); device_remove_file(twl->dev, &dev_attr_vbus); From bb789d042685425c595b63a01ed555d5c1c404eb Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 30 Nov 2015 21:39:54 -0800 Subject: [PATCH 1924/2091] phy: twl4030-usb: Fix unbalanced pm_runtime_enable on module reload [ Upstream commit 58a66dba1beac2121d931cda4682ae4d40816af5 ] If we reload phy-twl4030-usb, we get a warning about unbalanced pm_runtime_enable. Let's fix the issue and also fix idling of the device on unload before we attempt to shut it down. If we don't properly idle the PHY before shutting it down on removal, the twl4030 ends up consuming about 62mW of extra power compared to running idle with the module loaded. Cc: stable@vger.kernel.org Cc: Bin Liu Cc: Felipe Balbi Cc: Kishon Vijay Abraham I Cc: NeilBrown Signed-off-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sasha Levin --- drivers/phy/phy-twl4030-usb.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index f8c818e96314d..fb9e30ed80184 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -719,6 +719,7 @@ static int twl4030_usb_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, 2000); pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); /* Our job is to use irqs and status from the power module * to keep the transceiver disabled when nothing's connected. @@ -762,6 +763,13 @@ static int twl4030_usb_remove(struct platform_device *pdev) /* set transceiver mode to power on defaults */ twl4030_usb_set_mode(twl, -1); + /* idle ulpi before powering off */ + if (cable_present(twl->linkstat)) + pm_runtime_put_noidle(twl->dev); + pm_runtime_mark_last_busy(twl->dev); + pm_runtime_put_sync_suspend(twl->dev); + pm_runtime_disable(twl->dev); + /* autogate 60MHz ULPI clock, * clear dpll clock request for i2c access, * disable 32KHz @@ -776,11 +784,6 @@ static int twl4030_usb_remove(struct platform_device *pdev) /* disable complete OTG block */ twl4030_usb_clear_bits(twl, POWER_CTRL, POWER_CTRL_OTG_ENAB); - if (cable_present(twl->linkstat)) - pm_runtime_put_noidle(twl->dev); - pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put(twl->dev); - return 0; } From de7b555fc95c1e54b6f4b1d0487ad0cc13e30eca Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Fri, 5 Feb 2016 15:27:49 -0800 Subject: [PATCH 1925/2091] ahci: Intel DNV device IDs SATA [ Upstream commit 342decff2b846b46fa61eb5ee40986fab79a9a32 ] Adding Intel codename DNV platform device IDs for SATA. Signed-off-by: Alexandra Yates Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e6ea912aee31a..666fd8a1500ac 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -262,6 +262,26 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */ + { PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b2), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b3), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b4), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b5), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b6), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19b7), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19bE), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19bF), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c0), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c1), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c2), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c3), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c4), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c5), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c6), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19c7), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */ + { PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */ { PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */ { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */ { PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */ From 9e1a3771b412f52694e22a93cf188dbe9f0eab24 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 27 Apr 2015 17:58:38 +0800 Subject: [PATCH 1926/2091] workqueue: split apply_workqueue_attrs() into 3 stages [ Upstream commit 2d5f0764b5264d2954ba6e3deb04f4f5de8e4476 ] Current apply_workqueue_attrs() includes pwqs-allocation and pwqs-installation, so when we batch multiple apply_workqueue_attrs()s as a transaction, we can't ensure the transaction must succeed or fail as a complete unit. To solve this, we split apply_workqueue_attrs() into three stages. The first stage does the preparation: allocation memory, pwqs. The second stage does the attrs-installaion and pwqs-installation. The third stage frees the allocated memory and (old or unused) pwqs. As the result, batching multiple apply_workqueue_attrs()s can succeed or fail as a complete unit: 1) batch do all the first stage for all the workqueues 2) only commit all when all the above succeed. This patch is a preparation for the next patch ("Allow modifying low level unbound workqueue cpumask") which will do a multiple apply_workqueue_attrs(). The patch doesn't have functionality changed except two minor adjustment: 1) free_unbound_pwq() for the error path is removed, we use the heavier version put_pwq_unlocked() instead since the error path is rare. this adjustment simplifies the code. 2) the memory-allocation is also moved into wq_pool_mutex. this is needed to avoid to do the further splitting. tj: minor updates to comments. Suggested-by: Tejun Heo Cc: Christoph Lameter Cc: Kevin Hilman Cc: Lai Jiangshan Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Tejun Heo Cc: Viresh Kumar Cc: Frederic Weisbecker Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/workqueue.c | 199 ++++++++++++++++++++++++++------------------- 1 file changed, 115 insertions(+), 84 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 586ad91300b0f..26ff249240160 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3425,17 +3425,6 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, return pwq; } -/* undo alloc_unbound_pwq(), used only in the error path */ -static void free_unbound_pwq(struct pool_workqueue *pwq) -{ - lockdep_assert_held(&wq_pool_mutex); - - if (pwq) { - put_unbound_pool(pwq->pool); - kmem_cache_free(pwq_cache, pwq); - } -} - /** * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node * @attrs: the wq_attrs of interest @@ -3498,42 +3487,48 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq, return old_pwq; } -/** - * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue - * @wq: the target workqueue - * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() - * - * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA - * machines, this function maps a separate pwq to each NUMA node with - * possibles CPUs in @attrs->cpumask so that work items are affine to the - * NUMA node it was issued on. Older pwqs are released as in-flight work - * items finish. Note that a work item which repeatedly requeues itself - * back-to-back will stay on its current pwq. - * - * Performs GFP_KERNEL allocations. - * - * Return: 0 on success and -errno on failure. - */ -int apply_workqueue_attrs(struct workqueue_struct *wq, - const struct workqueue_attrs *attrs) +/* context to store the prepared attrs & pwqs before applying */ +struct apply_wqattrs_ctx { + struct workqueue_struct *wq; /* target workqueue */ + struct workqueue_attrs *attrs; /* attrs to apply */ + struct pool_workqueue *dfl_pwq; + struct pool_workqueue *pwq_tbl[]; +}; + +/* free the resources after success or abort */ +static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx) +{ + if (ctx) { + int node; + + for_each_node(node) + put_pwq_unlocked(ctx->pwq_tbl[node]); + put_pwq_unlocked(ctx->dfl_pwq); + + free_workqueue_attrs(ctx->attrs); + + kfree(ctx); + } +} + +/* allocate the attrs and pwqs for later installation */ +static struct apply_wqattrs_ctx * +apply_wqattrs_prepare(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) { + struct apply_wqattrs_ctx *ctx; struct workqueue_attrs *new_attrs, *tmp_attrs; - struct pool_workqueue **pwq_tbl, *dfl_pwq; - int node, ret; + int node; - /* only unbound workqueues can change attributes */ - if (WARN_ON(!(wq->flags & WQ_UNBOUND))) - return -EINVAL; + lockdep_assert_held(&wq_pool_mutex); - /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]), + GFP_KERNEL); - pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(GFP_KERNEL); tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); - if (!pwq_tbl || !new_attrs || !tmp_attrs) - goto enomem; + if (!ctx || !new_attrs || !tmp_attrs) + goto out_free; /* make a copy of @attrs and sanitize it */ copy_workqueue_attrs(new_attrs, attrs); @@ -3546,76 +3541,112 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, */ copy_workqueue_attrs(tmp_attrs, new_attrs); - /* - * CPUs should stay stable across pwq creations and installations. - * Pin CPUs, determine the target cpumask for each node and create - * pwqs accordingly. - */ - get_online_cpus(); - - mutex_lock(&wq_pool_mutex); - /* * If something goes wrong during CPU up/down, we'll fall back to * the default pwq covering whole @attrs->cpumask. Always create * it even if we don't use it immediately. */ - dfl_pwq = alloc_unbound_pwq(wq, new_attrs); - if (!dfl_pwq) - goto enomem_pwq; + ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); + if (!ctx->dfl_pwq) + goto out_free; for_each_node(node) { if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) { - pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); - if (!pwq_tbl[node]) - goto enomem_pwq; + ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); + if (!ctx->pwq_tbl[node]) + goto out_free; } else { - dfl_pwq->refcnt++; - pwq_tbl[node] = dfl_pwq; + ctx->dfl_pwq->refcnt++; + ctx->pwq_tbl[node] = ctx->dfl_pwq; } } - mutex_unlock(&wq_pool_mutex); + ctx->attrs = new_attrs; + ctx->wq = wq; + free_workqueue_attrs(tmp_attrs); + return ctx; + +out_free: + free_workqueue_attrs(tmp_attrs); + free_workqueue_attrs(new_attrs); + apply_wqattrs_cleanup(ctx); + return NULL; +} + +/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */ +static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx) +{ + int node; /* all pwqs have been created successfully, let's install'em */ - mutex_lock(&wq->mutex); + mutex_lock(&ctx->wq->mutex); - copy_workqueue_attrs(wq->unbound_attrs, new_attrs); + copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); /* save the previous pwq and install the new one */ for_each_node(node) - pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]); + ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node, + ctx->pwq_tbl[node]); /* @dfl_pwq might not have been used, ensure it's linked */ - link_pwq(dfl_pwq); - swap(wq->dfl_pwq, dfl_pwq); + link_pwq(ctx->dfl_pwq); + swap(ctx->wq->dfl_pwq, ctx->dfl_pwq); - mutex_unlock(&wq->mutex); + mutex_unlock(&ctx->wq->mutex); +} - /* put the old pwqs */ - for_each_node(node) - put_pwq_unlocked(pwq_tbl[node]); - put_pwq_unlocked(dfl_pwq); +/** + * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue + * @wq: the target workqueue + * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() + * + * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA + * machines, this function maps a separate pwq to each NUMA node with + * possibles CPUs in @attrs->cpumask so that work items are affine to the + * NUMA node it was issued on. Older pwqs are released as in-flight work + * items finish. Note that a work item which repeatedly requeues itself + * back-to-back will stay on its current pwq. + * + * Performs GFP_KERNEL allocations. + * + * Return: 0 on success and -errno on failure. + */ +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) +{ + struct apply_wqattrs_ctx *ctx; + int ret = -ENOMEM; - put_online_cpus(); - ret = 0; - /* fall through */ -out_free: - free_workqueue_attrs(tmp_attrs); - free_workqueue_attrs(new_attrs); - kfree(pwq_tbl); - return ret; + /* only unbound workqueues can change attributes */ + if (WARN_ON(!(wq->flags & WQ_UNBOUND))) + return -EINVAL; -enomem_pwq: - free_unbound_pwq(dfl_pwq); - for_each_node(node) - if (pwq_tbl && pwq_tbl[node] != dfl_pwq) - free_unbound_pwq(pwq_tbl[node]); + /* creating multiple pwqs breaks ordering guarantee */ + if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) + return -EINVAL; + + /* + * CPUs should stay stable across pwq creations and installations. + * Pin CPUs, determine the target cpumask for each node and create + * pwqs accordingly. + */ + get_online_cpus(); + + mutex_lock(&wq_pool_mutex); + ctx = apply_wqattrs_prepare(wq, attrs); mutex_unlock(&wq_pool_mutex); + + /* the ctx has been prepared successfully, let's commit it */ + if (ctx) { + apply_wqattrs_commit(ctx); + ret = 0; + } + put_online_cpus(); -enomem: - ret = -ENOMEM; - goto out_free; + + apply_wqattrs_cleanup(ctx); + + return ret; } /** From d3c4dd8843bef1885fabaa9f61d5d354ec2a5e3a Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 12 May 2015 20:32:29 +0800 Subject: [PATCH 1927/2091] workqueue: wq_pool_mutex protects the attrs-installation [ Upstream commit 5b95e1af8d17d85a17728f6de7dbff538e6e3c49 ] Current wq_pool_mutex doesn't proctect the attrs-installation, it results that ->unbound_attrs, ->numa_pwq_tbl[] and ->dfl_pwq can only be accessed under wq->mutex and causes some inconveniences. Example, wq_update_unbound_numa() has to acquire wq->mutex before fetching the wq->unbound_attrs->no_numa and the old_pwq. attrs-installation is a short operation, so this change will no cause any latency for other operations which also acquire the wq_pool_mutex. The only unprotected attrs-installation code is in apply_workqueue_attrs(), so this patch touches code less than comments. It is also a preparation patch for next several patches which read wq->unbound_attrs, wq->numa_pwq_tbl[] and wq->dfl_pwq with only wq_pool_mutex held. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/workqueue.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 26ff249240160..53b80c011341f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -127,6 +127,11 @@ enum { * * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. * + * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. + * + * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or + * sched-RCU for reads. + * * WQ: wq->mutex protected. * * WR: wq->mutex protected for writes. Sched-RCU protected for reads. @@ -247,8 +252,8 @@ struct workqueue_struct { int nr_drainers; /* WQ: drain in progress */ int saved_max_active; /* WQ: saved pwq max_active */ - struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */ - struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */ + struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ + struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */ #ifdef CONFIG_SYSFS struct wq_device *wq_dev; /* I: for sysfs interface */ @@ -268,7 +273,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ - struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */ + struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */ }; static struct kmem_cache *pwq_cache; @@ -347,6 +352,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); lockdep_is_held(&wq->mutex), \ "sched RCU or wq->mutex should be held") +#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ + rcu_lockdep_assert(rcu_read_lock_sched_held() || \ + lockdep_is_held(&wq->mutex) || \ + lockdep_is_held(&wq_pool_mutex), \ + "sched RCU, wq->mutex or wq_pool_mutex should be held") + #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ @@ -551,7 +562,8 @@ static int worker_pool_assign_id(struct worker_pool *pool) * @wq: the target workqueue * @node: the node ID * - * This must be called either with pwq_lock held or sched RCU read locked. + * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU + * read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * @@ -560,7 +572,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, int node) { - assert_rcu_or_wq_mutex(wq); + assert_rcu_or_wq_mutex_or_pool_mutex(wq); return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } @@ -3477,6 +3489,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq, { struct pool_workqueue *old_pwq; + lockdep_assert_held(&wq_pool_mutex); lockdep_assert_held(&wq->mutex); /* link_pwq() can handle duplicate calls */ @@ -3631,10 +3644,9 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, * pwqs accordingly. */ get_online_cpus(); - mutex_lock(&wq_pool_mutex); + ctx = apply_wqattrs_prepare(wq, attrs); - mutex_unlock(&wq_pool_mutex); /* the ctx has been prepared successfully, let's commit it */ if (ctx) { @@ -3642,6 +3654,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, ret = 0; } + mutex_unlock(&wq_pool_mutex); put_online_cpus(); apply_wqattrs_cleanup(ctx); From f3d69fd89df665c1fa1931f4af846039a4ac5dc4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Feb 2016 13:54:25 -0500 Subject: [PATCH 1928/2091] workqueue: handle NUMA_NO_NODE for unbound pool_workqueue lookup [ Upstream commit d6e022f1d207a161cd88e08ef0371554680ffc46 ] When looking up the pool_workqueue to use for an unbound workqueue, workqueue assumes that the target CPU is always bound to a valid NUMA node. However, currently, when a CPU goes offline, the mapping is destroyed and cpu_to_node() returns NUMA_NO_NODE. This has always been broken but hasn't triggered often enough before 874bbfe600a6 ("workqueue: make sure delayed work run in local cpu"). After the commit, workqueue forcifully assigns the local CPU for delayed work items without explicit target CPU to fix a different issue. This widens the window where CPU can go offline while a delayed work item is pending causing delayed work items dispatched with target CPU set to an already offlined CPU. The resulting NUMA_NO_NODE mapping makes workqueue try to queue the work item on a NULL pool_workqueue and thus crash. While 874bbfe600a6 has been reverted for a different reason making the bug less visible again, it can still happen. Fix it by mapping NUMA_NO_NODE to the default pool_workqueue from unbound_pwq_by_node(). This is a temporary workaround. The long term solution is keeping CPU -> NODE mapping stable across CPU off/online cycles which is being worked on. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Cc: Tang Chen Cc: Rafael J. Wysocki Cc: Len Brown Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/g/1454424264.11183.46.camel@gmail.com Link: http://lkml.kernel.org/g/1453702100-2597-1-git-send-email-tangchen@cn.fujitsu.com Signed-off-by: Sasha Levin --- kernel/workqueue.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 53b80c011341f..6d631161705c5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -573,6 +573,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, int node) { assert_rcu_or_wq_mutex_or_pool_mutex(wq); + + /* + * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a + * delayed item is pending. The plan is to keep CPU -> NODE + * mapping valid and stable across CPU on/offlines. Once that + * happens, this workaround can be removed. + */ + if (unlikely(node == NUMA_NO_NODE)) + return wq->dfl_pwq; + return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } From 7071cc9b4f9d4fd6dee20bb8dd760a8f9e1ceec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 5 Feb 2016 14:35:53 -0500 Subject: [PATCH 1929/2091] drm/radeon: hold reference to fences in radeon_sa_bo_new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f6ff4f67cdf8455d0a4226eeeaf5af17c37d05eb ] An arbitrary amount of time can pass between spin_unlock and radeon_fence_wait_any, so we need to ensure that nobody frees the fences from under us. Based on the analogous fix for amdgpu. Signed-off-by: Nicolai Hähnle Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_sa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c507896aca45a..197b157b73d09 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,8 +349,13 @@ int radeon_sa_bo_new(struct radeon_device *rdev, /* see if we can skip over some allocations */ } while (radeon_sa_bo_next_hole(sa_manager, fences, tries)); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_ref(fences[i]); + spin_unlock(&sa_manager->wq.lock); r = radeon_fence_wait_any(rdev, fences, false); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_unref(&fences[i]); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ if (r == -ENOENT) { From 4f55047bff0dd28d65e59455fb16d23b8dafcf73 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 10 Feb 2016 12:50:21 -0500 Subject: [PATCH 1930/2091] cifs: fix erroneous return value [ Upstream commit 4b550af519854421dfec9f7732cdddeb057134b2 ] The setup_ntlmv2_rsp() function may return positive value ENOMEM instead of -ENOMEM in case of kmalloc failure. Signed-off-by: Anton Protopopov CC: Stable Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsencrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index afa09fce81515..e682b36a210f2 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); if (!ses->auth_key.response) { - rc = ENOMEM; + rc = -ENOMEM; ses->auth_key.len = 0; goto setup_ntlmv2_rsp_ret; } From 2e35e9c4671925a7af5a2d3fe4f5f614e13a0d6d Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 15 Dec 2015 10:16:43 +0100 Subject: [PATCH 1931/2091] s390/dasd: prevent incorrect length error under z/VM after PAV changes [ Upstream commit 020bf042e5b397479c1174081b935d0ff15d1a64 ] The channel checks the specified length and the provided amount of data for CCWs and provides an incorrect length error if the size does not match. Under z/VM with simulation activated the length may get changed. Having the suppress length indication bit set is stated as good CCW coding practice and avoids errors under z/VM. Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index a2597e683e790..d52d7a23b2e7e 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -722,7 +722,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu, ASCEBC((char *) &cqr->magic, 4); ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_RSCK; - ccw->flags = 0 ; + ccw->flags = CCW_FLAG_SLI; ccw->count = 16; ccw->cda = (__u32)(addr_t) cqr->data; ((char *)cqr->data)[0] = reason; From 5ae3f5ad236f18bda2486bc557333427ec8a6797 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 15 Dec 2015 10:45:05 +0100 Subject: [PATCH 1932/2091] s390/dasd: fix refcount for PAV reassignment [ Upstream commit 9d862ababb609439c5d6987f6d3ddd09e703aa0b ] Add refcount to the DASD device when a summary unit check worker is scheduled. This prevents that the device is set offline with worker in place. Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- drivers/s390/block/dasd_alias.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index d52d7a23b2e7e..6a64e86e8ccd1 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -264,8 +264,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) spin_unlock_irqrestore(&lcu->lock, flags); cancel_work_sync(&lcu->suc_data.worker); spin_lock_irqsave(&lcu->lock, flags); - if (device == lcu->suc_data.device) + if (device == lcu->suc_data.device) { + dasd_put_device(device); lcu->suc_data.device = NULL; + } } was_pending = 0; if (device == lcu->ruac_data.device) { @@ -273,8 +275,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) was_pending = 1; cancel_delayed_work_sync(&lcu->ruac_data.dwork); spin_lock_irqsave(&lcu->lock, flags); - if (device == lcu->ruac_data.device) + if (device == lcu->ruac_data.device) { + dasd_put_device(device); lcu->ruac_data.device = NULL; + } } private->lcu = NULL; spin_unlock_irqrestore(&lcu->lock, flags); @@ -549,8 +553,10 @@ static void lcu_update_work(struct work_struct *work) if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) { DBF_DEV_EVENT(DBF_WARNING, device, "could not update" " alias data in lcu (rc = %d), retry later", rc); - schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ); + if (!schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ)) + dasd_put_device(device); } else { + dasd_put_device(device); lcu->ruac_data.device = NULL; lcu->flags &= ~UPDATE_PENDING; } @@ -593,8 +599,10 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, */ if (!usedev) return -EINVAL; + dasd_get_device(usedev); lcu->ruac_data.device = usedev; - schedule_delayed_work(&lcu->ruac_data.dwork, 0); + if (!schedule_delayed_work(&lcu->ruac_data.dwork, 0)) + dasd_put_device(usedev); return 0; } @@ -926,6 +934,7 @@ static void summary_unit_check_handling_work(struct work_struct *work) /* 3. read new alias configuration */ _schedule_lcu_update(lcu, device); lcu->suc_data.device = NULL; + dasd_put_device(device); spin_unlock_irqrestore(&lcu->lock, flags); } @@ -985,6 +994,8 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, } lcu->suc_data.reason = reason; lcu->suc_data.device = device; + dasd_get_device(device); spin_unlock(&lcu->lock); - schedule_work(&lcu->suc_data.worker); + if (!schedule_work(&lcu->suc_data.worker)) + dasd_put_device(device); }; From 1c35d875d020c798da316ddbac5da7be93b6901c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 10 Feb 2016 09:25:17 +0100 Subject: [PATCH 1933/2091] ARM: 8519/1: ICST: try other dividends than 1 [ Upstream commit e972c37459c813190461dabfeaac228e00aae259 ] Since the dawn of time the ICST code has only supported divide by one or hang in an eternal loop. Luckily we were always dividing by one because the reference frequency for the systems using the ICSTs is 24MHz and the [min,max] values for the PLL input if [10,320] MHz for ICST307 and [6,200] for ICST525, so the loop will always terminate immediately without assigning any divisor for the reference frequency. But for the code to make sense, let's insert the missing i++ Reported-by: David Binderman Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/common/icst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/common/icst.c b/arch/arm/common/icst.c index d3c0e69df2591..d7ed252708c57 100644 --- a/arch/arm/common/icst.c +++ b/arch/arm/common/icst.c @@ -62,6 +62,7 @@ icst_hz_to_vco(const struct icst_params *p, unsigned long freq) if (f > p->vco_min && f <= p->vco_max) break; + i++; } while (i < 8); if (i >= 8) From 3a68deb6673cffe2d66f0f0ace615bc0cb4c1153 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 13 Nov 2015 13:44:28 +0100 Subject: [PATCH 1934/2091] btrfs: properly set the termination value of ctx->pos in readdir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bc4ef7592f657ae81b017207a1098817126ad4cb ] The value of ctx->pos in the last readdir call is supposed to be set to INT_MAX due to 32bit compatibility, unless 'pos' is intentially set to a larger value, then it's LLONG_MAX. There's a report from PaX SIZE_OVERFLOW plugin that "ctx->pos++" overflows (https://forums.grsecurity.net/viewtopic.php?f=1&t=4284), on a 64bit arch, where the value is 0x7fffffffffffffff ie. LLONG_MAX before the increment. We can get to that situation like that: * emit all regular readdir entries * still in the same call to readdir, bump the last pos to INT_MAX * next call to readdir will not emit any entries, but will reach the bump code again, finds pos to be INT_MAX and sets it to LLONG_MAX Normally this is not a problem, but if we call readdir again, we'll find 'pos' set to LLONG_MAX and the unconditional increment will overflow. The report from Victor at (http://thread.gmane.org/gmane.comp.file-systems.btrfs/49500) with debugging print shows that pattern: Overflow: e Overflow: 7fffffff Overflow: 7fffffffffffffff PAX: size overflow detected in function btrfs_real_readdir fs/btrfs/inode.c:5760 cicus.935_282 max, count: 9, decl: pos; num: 0; context: dir_context; CPU: 0 PID: 2630 Comm: polkitd Not tainted 4.2.3-grsec #1 Hardware name: Gigabyte Technology Co., Ltd. H81ND2H/H81ND2H, BIOS F3 08/11/2015 ffffffff81901608 0000000000000000 ffffffff819015e6 ffffc90004973d48 ffffffff81742f0f 0000000000000007 ffffffff81901608 ffffc90004973d78 ffffffff811cb706 0000000000000000 ffff8800d47359e0 ffffc90004973ed8 Call Trace: [] dump_stack+0x4c/0x7f [] report_size_overflow+0x36/0x40 [] btrfs_real_readdir+0x69c/0x6d0 [] iterate_dir+0xa8/0x150 [] ? __fget_light+0x2d/0x70 [] SyS_getdents+0xba/0x1c0 Overflow: 1a [] ? iterate_dir+0x150/0x150 [] entry_SYSCALL_64_fastpath+0x12/0x83 The jump from 7fffffff to 7fffffffffffffff happens when new dir entries are not yet synced and are processed from the delayed list. Then the code could go to the bump section again even though it might not emit any new dir entries from the delayed list. The fix avoids entering the "bump" section again once we've finished emitting the entries, both for synced and delayed entries. References: https://forums.grsecurity.net/viewtopic.php?f=1&t=4284 Reported-by: Victor CC: stable@vger.kernel.org Signed-off-by: David Sterba Tested-by: Holger Hoffstätte Signed-off-by: Chris Mason Signed-off-by: Sasha Levin --- fs/btrfs/delayed-inode.c | 3 ++- fs/btrfs/delayed-inode.h | 2 +- fs/btrfs/inode.c | 14 +++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a2ae42720a6af..bc2d048a9eb92 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1690,7 +1690,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, * */ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list) + struct list_head *ins_list, bool *emitted) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; @@ -1734,6 +1734,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, if (over) return 1; + *emitted = true; } return 0; } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f70119f254216..0167853c84aea 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list); + struct list_head *ins_list, bool *emitted); /* for init */ int __init btrfs_delayed_inode_init(void); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index df4e0462976e3..b114a0539d3dc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5666,6 +5666,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) char *name_ptr; int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ + bool emitted; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5694,6 +5695,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (ret < 0) goto err; + emitted = false; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -5773,6 +5775,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (over) goto nopos; + emitted = true; di_len = btrfs_dir_name_len(leaf, di) + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; @@ -5785,11 +5788,20 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (key_type == BTRFS_DIR_INDEX_KEY) { if (is_curr) ctx->pos++; - ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); + ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted); if (ret) goto nopos; } + /* + * If we haven't emitted any dir entry, we must not touch ctx->pos as + * it was was set to the termination value in previous call. We assume + * that "." and ".." were emitted if we reach this point and set the + * termination value as well for an empty directory. + */ + if (ctx->pos > 2 && !emitted) + goto nopos; + /* Reached end of directory/root. Bump pos past the last item. */ ctx->pos++; From 9d7679794e9cd701890d997a76024b2239f90638 Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Fri, 12 Feb 2016 01:15:59 -0500 Subject: [PATCH 1935/2091] ext4: fix potential integer overflow [ Upstream commit 46901760b46064964b41015d00c140c83aa05bcf ] Since sizeof(ext_new_group_data) > sizeof(ext_new_flex_group_data), integer overflow could be happened. Therefore, need to fix integer overflow sanitization. Cc: stable@vger.kernel.org Signed-off-by: Insu Yun Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index cf0c472047e3a..0e783b9f70071 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; From a285eee1ce6b0efc4bcc2599cd3fd96dd6103e6d Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 12 Feb 2016 01:20:43 -0500 Subject: [PATCH 1936/2091] ext4: don't read blocks from disk after extents being swapped [ Upstream commit bcff24887d00bce102e0857d7b0a8c44a40f53d1 ] I notice ext4/307 fails occasionally on ppc64 host, reporting md5 checksum mismatch after moving data from original file to donor file. The reason is that move_extent_per_page() calls __block_write_begin() and block_commit_write() to write saved data from original inode blocks to donor inode blocks, but __block_write_begin() not only maps buffer heads but also reads block content from disk if the size is not block size aligned. At this time the physical block number in mapped buffer head is pointing to the donor file not the original file, and that results in reading wrong data to page, which get written to disk in following block_commit_write call. This also can be reproduced by the following script on 1k block size ext4 on x86_64 host: mnt=/mnt/ext4 donorfile=$mnt/donor testfile=$mnt/testfile e4compact=~/xfstests/src/e4compact rm -f $donorfile $testfile # reserve space for donor file, written by 0xaa and sync to disk to # avoid EBUSY on EXT4_IOC_MOVE_EXT xfs_io -fc "pwrite -S 0xaa 0 1m" -c "fsync" $donorfile # create test file written by 0xbb xfs_io -fc "pwrite -S 0xbb 0 1023" -c "fsync" $testfile # compute initial md5sum md5sum $testfile | tee md5sum.txt # drop cache, force e4compact to read data from disk echo 3 > /proc/sys/vm/drop_caches # test defrag echo "$testfile" | $e4compact -i -v -f $donorfile # check md5sum md5sum -c md5sum.txt Fix it by creating & mapping buffer heads only but not reading blocks from disk, because all the data in page is guaranteed to be up-to-date in mext_page_mkuptodate(). Cc: stable@vger.kernel.org Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/move_extent.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 370420bfae8d7..7da8ac1047f8f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -268,11 +268,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; - int err2, jblocks, retries = 0; + int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; + struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because @@ -383,8 +384,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - *err = __block_write_begin(pagep[0], from, replaced_size, - ext4_get_block); + if (!page_has_buffers(pagep[0])) + create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); + bh = page_buffers(pagep[0]); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + for (i = 0; i < block_len_in_page; i++) { + *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); + if (*err < 0) + break; + } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); From d185fa457006e98aa975ed6c0e7d2ddfe3d26695 Mon Sep 17 00:00:00 2001 From: Ryan Ware Date: Thu, 11 Feb 2016 15:58:44 -0800 Subject: [PATCH 1937/2091] EVM: Use crypto_memneq() for digest comparisons [ Upstream commit 613317bd212c585c20796c10afe5daaa95d4b0a1 ] This patch fixes vulnerability CVE-2016-2085. The problem exists because the vm_verify_hmac() function includes a use of memcmp(). Unfortunately, this allows timing side channel attacks; specifically a MAC forgery complexity drop from 2^128 to 2^12. This patch changes the memcmp() to the cryptographically safe crypto_memneq(). Reported-by: Xiaofei Rex Guo Signed-off-by: Ryan Ware Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: James Morris Signed-off-by: Sasha Levin --- security/integrity/evm/evm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 5820914988191..d6bc2b3af9efe 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "evm.h" int evm_initialized; @@ -148,7 +149,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, xattr_value_len, calc.digest); if (rc) break; - rc = memcmp(xattr_data->digest, calc.digest, + rc = crypto_memneq(xattr_data->digest, calc.digest, sizeof(calc.digest)); if (rc) rc = -EINVAL; From f3dd341b9230ac943f317efa4c5b1bfae587434b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 12 Feb 2016 09:39:15 +0100 Subject: [PATCH 1938/2091] bio: return EINTR if copying to user space got interrupted [ Upstream commit 2d99b55d378c996b9692a0c93dd25f4ed5d58934 ] Commit 35dc248383bbab0a7203fca4d722875bc81ef091 introduced a check for current->mm to see if we have a user space context and only copies data if we do. Now if an IO gets interrupted by a signal data isn't copied into user space any more (as we don't have a user space context) but user space isn't notified about it. This patch modifies the behaviour to return -EINTR from bio_uncopy_user() to notify userland that a signal has interrupted the syscall, otherwise it could lead to a situation where the caller may get a buffer with no data returned. This can be reproduced by issuing SG_IO ioctl()s in one thread while constantly sending signals to it. Fixes: 35dc248 [SCSI] sg: Fix user memory corruption when SG_IO is interrupted by a signal Signed-off-by: Johannes Thumshirn Signed-off-by: Hannes Reinecke Cc: stable@vger.kernel.org # v.3.11+ Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/bio.c b/block/bio.c index 4441522ca339e..cbce3e2208f4c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1122,9 +1122,12 @@ int bio_uncopy_user(struct bio *bio) if (!bio_flagged(bio, BIO_NULL_MAPPED)) { /* * if we're in a workqueue, the request is orphaned, so - * don't copy into a random user address space, just free. + * don't copy into a random user address space, just free + * and return -EINTR so user space doesn't expect any data. */ - if (current->mm && bio_data_dir(bio) == READ) + if (!current->mm) + ret = -EINTR; + else if (bio_data_dir(bio) == READ) ret = bio_copy_to_iter(bio, bmd->iter); if (bmd->is_our_pages) bio_free_pages(bio); From 1ea63b629c9c53af6cdde4daf166b3d31b3e9cfe Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sat, 13 Feb 2016 11:08:06 +0300 Subject: [PATCH 1939/2091] ALSA: usb-audio: avoid freeing umidi object twice [ Upstream commit 07d86ca93db7e5cdf4743564d98292042ec21af7 ] The 'umidi' object will be free'd on the error path by snd_usbmidi_free() when tearing down the rawmidi interface. So we shouldn't try to free it in snd_usbmidi_create() after having registered the rawmidi interface. Found by KASAN. Signed-off-by: Andrey Konovalov Acked-by: Clemens Ladisch Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/midi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index bec63e0d26056..f059326a4914d 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -2451,7 +2451,6 @@ int snd_usbmidi_create(struct snd_card *card, else err = snd_usbmidi_create_endpoints(umidi, endpoints); if (err < 0) { - snd_usbmidi_free(umidi); return err; } From 712394523149ed77377fcc9a264ac35245bd16a3 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 9 Feb 2016 15:50:21 +1100 Subject: [PATCH 1940/2091] powerpc/eeh: Fix stale cached primary bus [ Upstream commit 05ba75f848647135f063199dc0e9f40fee769724 ] When PE is created, its primary bus is cached to pe->bus. At later point, the cached primary bus is returned from eeh_pe_bus_get(). However, we could get stale cached primary bus and run into kernel crash in one case: full hotplug as part of fenced PHB error recovery releases all PCI busses under the PHB at unplugging time and recreate them at plugging time. pe->bus is still dereferencing the PCI bus that was released. This adds another PE flag (EEH_PE_PRI_BUS) to represent the validity of pe->bus. pe->bus is updated when its first child EEH device is online and the flag is set. Before unplugging in full hotplug for error recovery, the flag is cleared. Fixes: 8cdb2833 ("powerpc/eeh: Trace PCI bus from PE") Cc: stable@vger.kernel.org #v3.11+ Reported-by: Andrew Donnellan Reported-by: Pradipta Ghosh Signed-off-by: Gavin Shan Tested-by: Andrew Donnellan Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh_driver.c | 3 +++ arch/powerpc/kernel/eeh_pe.c | 2 +- arch/powerpc/platforms/powernv/eeh-powernv.c | 5 ++++- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a52db28ecc1e1..e4de79abfe703 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -79,6 +79,7 @@ struct pci_dn; #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */ +#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 24768ff3cb730..90cc67904dc67 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -561,6 +561,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pcibios_remove_pci_devices(bus); pci_unlock_rescan_remove(); @@ -792,6 +793,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) * the their PCI config any more. */ if (frozen_bus) { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); pci_lock_rescan_remove(); @@ -875,6 +877,7 @@ static void eeh_handle_special_event(void) continue; /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 22f6d954ef89e..c3e0420b8a424 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -906,7 +906,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) bus = pe->phb->bus; } else if (pe->type & EEH_PE_BUS || pe->type & EEH_PE_DEVICE) { - if (pe->bus) { + if (pe->state & EEH_PE_PRI_BUS) { bus = pe->bus; goto out; } diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ce738ab3d5a9f..abb396876b9ab 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -455,9 +455,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * PCI devices of the PE are expected to be removed prior * to PE reset. */ - if (!edev->pe->bus) + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { edev->pe->bus = pci_find_bus(hose->global_number, pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } /* * Enable EEH explicitly so that we will do EEH check From d26d8b485d21ecc9e466b2a1e02c5962e46624de Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Feb 2016 16:20:24 +0100 Subject: [PATCH 1941/2091] ALSA: seq: Fix leak of pool buffer at concurrent writes [ Upstream commit d99a36f4728fcbcc501b78447f625bdcce15b842 ] When multiple concurrent writes happen on the ALSA sequencer device right after the open, it may try to allocate vmalloc buffer for each write and leak some of them. It's because the presence check and the assignment of the buffer is done outside the spinlock for the pool. The fix is to move the check and the assignment into the spinlock. (The current implementation is suboptimal, as there can be multiple unnecessary vmallocs because the allocation is done before the check in the spinlock. But the pool size is already checked beforehand, so this isn't a big problem; that is, the only possible path is the multiple writes before any pool assignment, and practically seen, the current coverage should be "good enough".) The issue was triggered by syzkaller fuzzer. BugLink: http://lkml.kernel.org/r/CACT4Y+bSzazpXNvtAr=WXaL8hptqjHwqEyFA+VN2AWEx=aurkg@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_memory.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index 801076687bb16..c850345c43b53 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -383,15 +383,20 @@ int snd_seq_pool_init(struct snd_seq_pool *pool) if (snd_BUG_ON(!pool)) return -EINVAL; - if (pool->ptr) /* should be atomic? */ - return 0; - pool->ptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size); - if (!pool->ptr) + cellptr = vmalloc(sizeof(struct snd_seq_event_cell) * pool->size); + if (!cellptr) return -ENOMEM; /* add new cells to the free cell list */ spin_lock_irqsave(&pool->lock, flags); + if (pool->ptr) { + spin_unlock_irqrestore(&pool->lock, flags); + vfree(cellptr); + return 0; + } + + pool->ptr = cellptr; pool->free = NULL; for (cell = 0; cell < pool->size; cell++) { From 800b26634a89a88bbbc341c4d1e84f5141c1e38a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Feb 2016 16:37:24 +0100 Subject: [PATCH 1942/2091] ALSA: hda - Cancel probe work instead of flush at remove [ Upstream commit 0b8c82190c12e530eb6003720dac103bf63e146e ] The commit [991f86d7ae4e: ALSA: hda - Flush the pending probe work at remove] introduced the sync of async probe work at remove for fixing the race. However, this may lead to another hangup when the module removal is performed quickly before starting the probe work, because it issues flush_work() and it's blocked forever. The workaround is to use cancel_work_sync() instead of flush_work() there. Fixes: 991f86d7ae4e ('ALSA: hda - Flush the pending probe work at remove') Cc: # v3.17+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 09920ba55ba19..69093ce34231f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1976,10 +1976,10 @@ static void azx_remove(struct pci_dev *pci) struct hda_intel *hda; if (card) { - /* flush the pending probing work */ + /* cancel the pending probing work */ chip = card->private_data; hda = container_of(chip, struct hda_intel, chip); - flush_work(&hda->probe_work); + cancel_work_sync(&hda->probe_work); snd_card_free(card); } From 6bdec2b3a4bf4ecc5f9ba5ea24457fe57943fc61 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Feb 2016 15:59:42 +0200 Subject: [PATCH 1943/2091] dmaengine: dw: disable BLOCK IRQs for non-cyclic xfer [ Upstream commit ee1cdcdae59563535485a5f56ee72c894ab7d7ad ] The commit 2895b2cad6e7 ("dmaengine: dw: fix cyclic transfer callbacks") re-enabled BLOCK interrupts with regard to make cyclic transfers work. However, this change becomes a regression for non-cyclic transfers as interrupt counters under stress test had been grown enormously (approximately per 4-5 bytes in the UART loop back test). Taking into consideration above enable BLOCK interrupts if and only if channel is programmed to perform cyclic transfer. Fixes: 2895b2cad6e7 ("dmaengine: dw: fix cyclic transfer callbacks") Signed-off-by: Andy Shevchenko Acked-by: Mans Rullgard Tested-by: Mans Rullgard Acked-by: Viresh Kumar Cc: Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dw/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 303d937d63c79..ebffc744cb1b1 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -156,7 +156,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); - channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); dwc->initialized = true; @@ -588,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, spin_unlock_irqrestore(&dwc->lock, flags); } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); } /* ------------------------------------------------------------------------- */ @@ -618,11 +620,8 @@ static void dw_dma_tasklet(unsigned long data) dwc_scan_descriptors(dw, dwc); } - /* - * Re-enable interrupts. - */ + /* Re-enable interrupts */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); - channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -1256,6 +1255,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1264,7 +1264,12 @@ int dw_dma_cyclic_start(struct dma_chan *chan) } spin_lock_irqsave(&dwc->lock, flags); + + /* Enable interrupts to perform cyclic transfer */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); + dwc_dostart(dwc, dwc->cdesc->desc[0]); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; From 999c2ceac0bcd8cb773e627151e4afaefe653427 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 15 Feb 2016 12:36:14 -0500 Subject: [PATCH 1944/2091] tracepoints: Do not trace when cpu is offline [ Upstream commit f37755490fe9bf76f6ba1d8c6591745d3574a6a6 ] The tracepoint infrastructure uses RCU sched protection to enable and disable tracepoints safely. There are some instances where tracepoints are used in infrastructure code (like kfree()) that get called after a CPU is going offline, and perhaps when it is coming back online but hasn't been registered yet. This can probuce the following warning: [ INFO: suspicious RCU usage. ] 4.4.0-00006-g0fe53e8-dirty #34 Tainted: G S ------------------------------- include/trace/events/kmem.h:141 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/8/0. stack backtrace: CPU: 8 PID: 0 Comm: swapper/8 Tainted: G S 4.4.0-00006-g0fe53e8-dirty #34 Call Trace: [c0000005b76c78d0] [c0000000008b9540] .dump_stack+0x98/0xd4 (unreliable) [c0000005b76c7950] [c00000000010c898] .lockdep_rcu_suspicious+0x108/0x170 [c0000005b76c79e0] [c00000000029adc0] .kfree+0x390/0x440 [c0000005b76c7a80] [c000000000055f74] .destroy_context+0x44/0x100 [c0000005b76c7b00] [c0000000000934a0] .__mmdrop+0x60/0x150 [c0000005b76c7b90] [c0000000000e3ff0] .idle_task_exit+0x130/0x140 [c0000005b76c7c20] [c000000000075804] .pseries_mach_cpu_die+0x64/0x310 [c0000005b76c7cd0] [c000000000043e7c] .cpu_die+0x3c/0x60 [c0000005b76c7d40] [c0000000000188d8] .arch_cpu_idle_dead+0x28/0x40 [c0000005b76c7db0] [c000000000101e6c] .cpu_startup_entry+0x50c/0x560 [c0000005b76c7ed0] [c000000000043bd8] .start_secondary+0x328/0x360 [c0000005b76c7f90] [c000000000008a6c] start_secondary_prolog+0x10/0x14 This warning is not a false positive either. RCU is not protecting code that is being executed while the CPU is offline. Instead of playing "whack-a-mole(TM)" and adding conditional statements to the tracepoints we find that are used in this instance, simply add a cpu_online() test to the tracepoint code where the tracepoint will be ignored if the CPU is offline. Use of raw_smp_processor_id() is fine, as there should never be a case where the tracepoint code goes from running on a CPU that is online and suddenly gets migrated to a CPU that is offline. Link: http://lkml.kernel.org/r/1455387773-4245-1-git-send-email-kda@linux-powerpc.org Reported-by: Denis Kirjanov Fixes: 97e1c18e8d17b ("tracing: Kernel Tracepoints") Cc: stable@vger.kernel.org # v2.6.28+ Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- include/linux/tracepoint.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a5f7f3ecafa3a..a6e1bca88cc63 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -14,8 +14,10 @@ * See the file COPYING for more details. */ +#include #include #include +#include #include #include @@ -129,6 +131,9 @@ extern void syscall_unregfunc(void); void *it_func; \ void *__data; \ \ + if (!cpu_online(raw_smp_processor_id())) \ + return; \ + \ if (!(cond)) \ return; \ prercu; \ From a77b1f3d1928a8c31ee0e088e52404331aed5dd5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Feb 2016 22:26:42 +0100 Subject: [PATCH 1945/2091] tracing: Fix freak link error caused by branch tracer [ Upstream commit b33c8ff4431a343561e2319f17c14286f2aa52e2 ] In my randconfig tests, I came across a bug that involves several components: * gcc-4.9 through at least 5.3 * CONFIG_GCOV_PROFILE_ALL enabling -fprofile-arcs for all files * CONFIG_PROFILE_ALL_BRANCHES overriding every if() * The optimized implementation of do_div() that tries to replace a library call with an division by multiplication * code in drivers/media/dvb-frontends/zl10353.c doing u32 adc_clock = 450560; /* 45.056 MHz */ if (state->config.adc_clock) adc_clock = state->config.adc_clock; do_div(value, adc_clock); In this case, gcc fails to determine whether the divisor in do_div() is __builtin_constant_p(). In particular, it concludes that __builtin_constant_p(adc_clock) is false, while __builtin_constant_p(!!adc_clock) is true. That in turn throws off the logic in do_div() that also uses __builtin_constant_p(), and instead of picking either the constant- optimized division, and the code in ilog2() that uses __builtin_constant_p() to figure out whether it knows the answer at compile time. The result is a link error from failing to find multiple symbols that should never have been called based on the __builtin_constant_p(): dvb-frontends/zl10353.c:138: undefined reference to `____ilog2_NaN' dvb-frontends/zl10353.c:138: undefined reference to `__aeabi_uldivmod' ERROR: "____ilog2_NaN" [drivers/media/dvb-frontends/zl10353.ko] undefined! ERROR: "__aeabi_uldivmod" [drivers/media/dvb-frontends/zl10353.ko] undefined! This patch avoids the problem by changing __trace_if() to check whether the condition is known at compile-time to be nonzero, rather than checking whether it is actually a constant. I see this one link error in roughly one out of 1600 randconfig builds on ARM, and the patch fixes all known instances. Link: http://lkml.kernel.org/r/1455312410-1058841-1-git-send-email-arnd@arndb.de Acked-by: Nicolas Pitre Fixes: ab3c9c686e22 ("branch tracer, intel-iommu: fix build with CONFIG_BRANCH_TRACER=y") Cc: stable@vger.kernel.org # v2.6.30+ Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2c..99728072e536c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -142,7 +142,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); */ #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) ) #define __trace_if(cond) \ - if (__builtin_constant_p((cond)) ? !!(cond) : \ + if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ({ \ int ______r; \ static struct ftrace_branch_data \ From ad4ad1ac3b0aef2779097a298ecc20ef9eef17d6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Feb 2016 14:15:59 +0100 Subject: [PATCH 1946/2091] ALSA: seq: Fix double port list deletion [ Upstream commit 13d5e5d4725c64ec06040d636832e78453f477b7 ] The commit [7f0973e973cd: ALSA: seq: Fix lockdep warnings due to double mutex locks] split the management of two linked lists (source and destination) into two individual calls for avoiding the AB/BA deadlock. However, this may leave the possible double deletion of one of two lists when the counterpart is being deleted concurrently. It ends up with a list corruption, as revealed by syzkaller fuzzer. This patch fixes it by checking the list emptiness and skipping the deletion and the following process. BugLink: http://lkml.kernel.org/r/CACT4Y+bay9qsrz6dQu31EcGaH9XwfW7o3oBzSQUG9fMszoh=Sg@mail.gmail.com Fixes: 7f0973e973cd ('ALSA: seq: Fix lockdep warnings due to 'double mutex locks) Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/seq_ports.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index 921fb2bd8fadb..fe686ee41c6da 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -535,19 +535,22 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, bool is_src, bool ack) { struct snd_seq_port_subs_info *grp; + struct list_head *list; + bool empty; grp = is_src ? &port->c_src : &port->c_dest; + list = is_src ? &subs->src_list : &subs->dest_list; down_write(&grp->list_mutex); write_lock_irq(&grp->list_lock); - if (is_src) - list_del(&subs->src_list); - else - list_del(&subs->dest_list); + empty = list_empty(list); + if (!empty) + list_del_init(list); grp->exclusive = 0; write_unlock_irq(&grp->list_lock); up_write(&grp->list_mutex); - unsubscribe_port(client, port, grp, &subs->info, ack); + if (!empty) + unsubscribe_port(client, port, grp, &subs->info, ack); } /* connect two ports */ From 1a82e899582acee9df55a271fb8d6bb73de49fea Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 15 Feb 2016 19:41:47 +0100 Subject: [PATCH 1947/2091] drm/radeon: use post-decrement in error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bc3f5d8c4ca01555820617eb3b6c0857e4df710d ] We need to use post-decrement to get the pci_map_page undone also for i==0, and to avoid some very unpleasant behaviour if pci_map_page failed already at i==0. Reviewed-by: Christian König Signed-off-by: Rasmus Villemoes Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index edafd3c2b1702..f5c0590bbf736 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -758,7 +758,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) { - while (--i) { + while (i--) { pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i], PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); gtt->ttm.dma_address[i] = 0; From 1d1338cfb7c94ffa41b3839a31c5ae332315131e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 16 Feb 2016 14:25:00 +0100 Subject: [PATCH 1948/2091] drm/qxl: use kmalloc_array to alloc reloc_info in qxl_process_single_command [ Upstream commit 34855706c30d52b0a744da44348b5d1cc39fbe51 ] This avoids integer overflows on 32bit machines when calculating reloc_info size, as reported by Alan Cox. Cc: stable@vger.kernel.org Cc: gnomes@lxorguk.ukuu.org.uk Signed-off-by: Gerd Hoffmann Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/qxl/qxl_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 7354a4cda59d9..3aefaa058f0cc 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -168,7 +168,8 @@ static int qxl_process_single_command(struct qxl_device *qdev, cmd->command_size)) return -EFAULT; - reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL); + reloc_info = kmalloc_array(cmd->relocs_num, + sizeof(struct qxl_reloc_info), GFP_KERNEL); if (!reloc_info) return -ENOMEM; From 468eeda0837d3e4f1574f800dc460d85e6b49fb7 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 17 Feb 2016 18:16:54 -0700 Subject: [PATCH 1949/2091] x86/mm: Fix vmalloc_fault() to handle large pages properly [ Upstream commit f4eafd8bcd5229e998aa252627703b8462c3b90f ] A kernel page fault oops with the callstack below was observed when a read syscall was made to a pmem device after a huge amount (>512GB) of vmalloc ranges was allocated by ioremap() on a x86_64 system: BUG: unable to handle kernel paging request at ffff880840000ff8 IP: vmalloc_fault+0x1be/0x300 PGD c7f03a067 PUD 0 Oops: 0000 [#1] SM Call Trace: __do_page_fault+0x285/0x3e0 do_page_fault+0x2f/0x80 ? put_prev_entity+0x35/0x7a0 page_fault+0x28/0x30 ? memcpy_erms+0x6/0x10 ? schedule+0x35/0x80 ? pmem_rw_bytes+0x6a/0x190 [nd_pmem] ? schedule_timeout+0x183/0x240 btt_log_read+0x63/0x140 [nd_btt] : ? __symbol_put+0x60/0x60 ? kernel_read+0x50/0x80 SyS_finit_module+0xb9/0xf0 entry_SYSCALL_64_fastpath+0x1a/0xa4 Since v4.1, ioremap() supports large page (pud/pmd) mappings in x86_64 and PAE. vmalloc_fault() however assumes that the vmalloc range is limited to pte mappings. vmalloc faults do not normally happen in ioremap'd ranges since ioremap() sets up the kernel page tables, which are shared by user processes. pgd_ctor() sets the kernel's PGD entries to user's during fork(). When allocation of the vmalloc ranges crosses a 512GB boundary, ioremap() allocates a new pud table and updates the kernel PGD entry to point it. If user process's PGD entry does not have this update yet, a read/write syscall to the range will cause a vmalloc fault, which hits the Oops above as it does not handle a large page properly. Following changes are made to vmalloc_fault(). 64-bit: - No change for the PGD sync operation as it handles large pages already. - Add pud_huge() and pmd_huge() to the validation code to handle large pages. - Change pud_page_vaddr() to pud_pfn() since an ioremap range is not directly mapped (while the if-statement still works with a bogus addr). - Change pmd_page() to pmd_pfn() since an ioremap range is not backed by struct page (while the if-statement still works with a bogus addr). 32-bit: - No change for the sync operation since the index3 PGD entry covers the entire vmalloc range, which is always valid. (A separate change to sync PGD entry is necessary if this memory layout is changed regardless of the page size.) - Add pmd_huge() to the validation code to handle large pages. This is for completeness since vmalloc_fault() won't happen in ioremap'd ranges as its PGD entry is always valid. Reported-by: Henning Schild Signed-off-by: Toshi Kani Acked-by: Borislav Petkov Cc: # 4.1+ Cc: Andrew Morton Cc: Andy Lutomirski Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-mm@kvack.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/1455758214-24623-1-git-send-email-toshi.kani@hpe.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/mm/fault.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 181c53bac3a7e..62855ac37ab7e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -285,6 +285,9 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; + if (pmd_huge(*pmd_k)) + return 0; + pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; @@ -356,8 +359,6 @@ void vmalloc_sync_all(void) * 64-bit: * * Handle a fault on the vmalloc area - * - * This assumes no large pages in there. */ static noinline int vmalloc_fault(unsigned long address) { @@ -399,17 +400,23 @@ static noinline int vmalloc_fault(unsigned long address) if (pud_none(*pud_ref)) return -1; - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) + if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) BUG(); + if (pud_huge(*pud)) + return 0; + pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); if (pmd_none(*pmd_ref)) return -1; - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) + if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) BUG(); + if (pmd_huge(*pmd)) + return 0; + pte_ref = pte_offset_kernel(pmd_ref, address); if (!pte_present(*pte_ref)) return -1; From cab48a095169b3f981c600b4365170a076ac9f3a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 17 Feb 2016 14:30:26 +0100 Subject: [PATCH 1950/2091] ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream [ Upstream commit 67ec1072b053c15564e6090ab30127895dc77a89 ] A non-atomic PCM stream may take snd_pcm_link_rwsem rw semaphore twice in the same code path, e.g. one in snd_pcm_action_nonatomic() and another in snd_pcm_stream_lock(). Usually this is OK, but when a write lock is issued between these two read locks, the problem happens: the write lock is blocked due to the first reade lock, and the second read lock is also blocked by the write lock. This eventually deadlocks. The reason is the way rwsem manages waiters; it's queued like FIFO, so even if the writer itself doesn't take the lock yet, it blocks all the waiters (including reads) queued after it. As a workaround, in this patch, we replace the standard down_write() with an spinning loop. This is far from optimal, but it's good enough, as the spinning time is supposed to be relatively short for normal PCM operations, and the code paths requiring the write lock aren't called so often. Reported-by: Vinod Koul Tested-by: Ramesh Babu Cc: # v3.18+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/pcm_native.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 75888dd38a7fc..aa999e747c94b 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -74,6 +74,18 @@ static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream); static DEFINE_RWLOCK(snd_pcm_link_rwlock); static DECLARE_RWSEM(snd_pcm_link_rwsem); +/* Writer in rwsem may block readers even during its waiting in queue, + * and this may lead to a deadlock when the code path takes read sem + * twice (e.g. one in snd_pcm_action_nonatomic() and another in + * snd_pcm_stream_lock()). As a (suboptimal) workaround, let writer to + * spin until it gets the lock. + */ +static inline void down_write_nonblock(struct rw_semaphore *lock) +{ + while (!down_write_trylock(lock)) + cond_resched(); +} + /** * snd_pcm_stream_lock - Lock the PCM stream * @substream: PCM substream @@ -1816,7 +1828,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) res = -ENOMEM; goto _nolock; } - down_write(&snd_pcm_link_rwsem); + down_write_nonblock(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN || substream->runtime->status->state != substream1->runtime->status->state || @@ -1863,7 +1875,7 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream) struct snd_pcm_substream *s; int res = 0; - down_write(&snd_pcm_link_rwsem); + down_write_nonblock(&snd_pcm_link_rwsem); write_lock_irq(&snd_pcm_link_rwlock); if (!snd_pcm_stream_linked(substream)) { res = -EALREADY; From aff5e5984051fb83578ef2a30db98747a6e39fa6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 17 Feb 2016 13:11:15 -0800 Subject: [PATCH 1951/2091] mm: fix regression in remap_file_pages() emulation [ Upstream commit 48f7df329474b49d83d0dffec1b6186647f11976 ] Grazvydas Ignotas has reported a regression in remap_file_pages() emulation. Testcase: #define _GNU_SOURCE #include #include #include #include #define SIZE (4096 * 3) int main(int argc, char **argv) { unsigned long *p; long i; p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { perror("mmap"); return -1; } for (i = 0; i < SIZE / 4096; i++) p[i * 4096 / sizeof(*p)] = i; if (remap_file_pages(p, 4096, 0, 1, 0)) { perror("remap_file_pages"); return -1; } if (remap_file_pages(p, 4096 * 2, 0, 1, 0)) { perror("remap_file_pages"); return -1; } assert(p[0] == 1); munmap(p, SIZE); return 0; } The second remap_file_pages() fails with -EINVAL. The reason is that remap_file_pages() emulation assumes that the target vma covers whole area we want to over map. That assumption is broken by first remap_file_pages() call: it split the area into two vma. The solution is to check next adjacent vmas, if they map the same file with the same flags. Fixes: c8d78c1823f4 ("mm: replace remap_file_pages() syscall with emulation") Signed-off-by: Kirill A. Shutemov Reported-by: Grazvydas Ignotas Tested-by: Grazvydas Ignotas Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/mmap.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index b639fa2721d8d..d30b8f8f02b19 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2654,12 +2654,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (!vma || !(vma->vm_flags & VM_SHARED)) goto out; - if (start < vma->vm_start || start + size > vma->vm_end) + if (start < vma->vm_start) goto out; - if (pgoff == linear_page_index(vma, start)) { - ret = 0; - goto out; + if (start + size > vma->vm_end) { + struct vm_area_struct *next; + + for (next = vma->vm_next; next; next = next->vm_next) { + /* hole between vmas ? */ + if (next->vm_start != next->vm_prev->vm_end) + goto out; + + if (next->vm_file != vma->vm_file) + goto out; + + if (next->vm_flags != vma->vm_flags) + goto out; + + if (start + size <= next->vm_end) + break; + } + + if (!next) + goto out; } prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; @@ -2669,9 +2686,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags &= MAP_NONBLOCK; flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; if (vma->vm_flags & VM_LOCKED) { + struct vm_area_struct *tmp; flags |= MAP_LOCKED; + /* drop PG_Mlocked flag for over-mapped range */ - munlock_vma_pages_range(vma, start, start + size); + for (tmp = vma; tmp->vm_start >= start + size; + tmp = tmp->vm_next) { + munlock_vma_pages_range(tmp, + max(tmp->vm_start, start), + min(tmp->vm_end, start + size)); + } } file = get_file(vma->vm_file); From 0eba52da056a131ae7d5715e9bae4e147356d3d9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 30 Jun 2015 14:58:36 -0700 Subject: [PATCH 1952/2091] ipc,shm: move BUG_ON check into shm_lock [ Upstream commit c5c8975b2eb4eb7604e8ce4f762987f56d2a96a2 ] Upon every shm_lock call, we BUG_ON if an error was returned, indicating racing either in idr or in shm_destroy. Move this logic into the locking. [akpm@linux-foundation.org: simplify code] Signed-off-by: Davidlohr Bueso Cc: Manfred Spraul Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- ipc/shm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index 499a8bd22fad1..818eb7d24f5eb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -155,8 +155,11 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); - if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; + /* + * We raced in the idr lookup or with shm_destroy(). Either way, the + * ID is busted. + */ + BUG_ON(IS_ERR(ipcp)); return container_of(ipcp, struct shmid_kernel, shm_perm); } @@ -191,7 +194,6 @@ static void shm_open(struct vm_area_struct *vma) struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); - BUG_ON(IS_ERR(shp)); shp->shm_atim = get_seconds(); shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; @@ -258,7 +260,6 @@ static void shm_close(struct vm_area_struct *vma) down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); - BUG_ON(IS_ERR(shp)); shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; @@ -1192,7 +1193,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, out_nattch: down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); - BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); From 6e82212c489fcdc50446bff12bd1ed5e8ef110a2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 9 Sep 2015 15:39:20 -0700 Subject: [PATCH 1953/2091] ipc: convert invalid scenarios to use WARN_ON [ Upstream commit d0edd8528362c07216498340e928159510595e7b ] Considering Linus' past rants about the (ab)use of BUG in the kernel, I took a look at how we deal with such calls in ipc. Given that any errors or corruption in ipc code are most likely contained within the set of processes participating in the broken mechanisms, there aren't really many strong fatal system failure scenarios that would require a BUG call. Also, if something is seriously wrong, ipc might not be the place for such a BUG either. 1. For example, recently, a customer hit one of these BUG_ONs in shm after failing shm_lock(). A busted ID imho does not merit a BUG_ON, and WARN would have been better. 2. MSG_COPY functionality of posix msgrcv(2) for checkpoint/restore. I don't see how we can hit this anyway -- at least it should be IS_ERR. The 'copy' arg from do_msgrcv is always set by calling prepare_copy() first and foremost. We could also probably drop this check altogether. Either way, it does not merit a BUG_ON. 3. No ->fault() callback for the fs getting the corresponding page -- seems selfish to make the system unusable. Signed-off-by: Davidlohr Bueso Cc: Manfred Spraul Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- ipc/msgutil.c | 2 +- ipc/shm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 2b491590ebab1..71f448e5e927a 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -123,7 +123,7 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) size_t len = src->m_ts; size_t alen; - BUG_ON(dst == NULL); + WARN_ON(dst == NULL); if (src->m_ts > dst->m_ts) return ERR_PTR(-EINVAL); diff --git a/ipc/shm.c b/ipc/shm.c index 818eb7d24f5eb..cfb9dd00e3449 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -159,7 +159,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) * We raced in the idr lookup or with shm_destroy(). Either way, the * ID is busted. */ - BUG_ON(IS_ERR(ipcp)); + WARN_ON(IS_ERR(ipcp)); return container_of(ipcp, struct shmid_kernel, shm_perm); } @@ -393,7 +393,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) return ret; sfd->vm_ops = vma->vm_ops; #ifdef CONFIG_MMU - BUG_ON(!sfd->vm_ops->fault); + WARN_ON(!sfd->vm_ops->fault); #endif vma->vm_ops = &shm_vm_ops; shm_open(vma); From 5d0e8394db90caa6d06477f68cbdb48fe65f468d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 17 Feb 2016 13:11:35 -0800 Subject: [PATCH 1954/2091] ipc/shm: handle removed segments gracefully in shm_mmap() [ Upstream commit 15db15e2f10ae12d021c9a2e9edd8a03b9238551 ] commit 1ac0b6dec656f3f78d1c3dd216fad84cb4d0a01e upstream. remap_file_pages(2) emulation can reach file which represents removed IPC ID as long as a memory segment is mapped. It breaks expectations of IPC subsystem. Test case (rewritten to be more human readable, originally autogenerated by syzkaller[1]): #define _GNU_SOURCE #include #include #include #include #define PAGE_SIZE 4096 int main() { int id; void *p; id = shmget(IPC_PRIVATE, 3 * PAGE_SIZE, 0); p = shmat(id, NULL, 0); shmctl(id, IPC_RMID, NULL); remap_file_pages(p, 3 * PAGE_SIZE, 0, 7, 0); return 0; } The patch changes shm_mmap() and code around shm_lock() to propagate locking error back to caller of shm_mmap(). [1] http://github.com/google/syzkaller Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Cc: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- ipc/shm.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index cfb9dd00e3449..bbe5f62f2b129 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -156,11 +156,12 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); /* - * We raced in the idr lookup or with shm_destroy(). Either way, the - * ID is busted. + * Callers of shm_lock() must validate the status of the returned ipc + * object pointer (as returned by ipc_lock()), and error out as + * appropriate. */ - WARN_ON(IS_ERR(ipcp)); - + if (IS_ERR(ipcp)) + return (void *)ipcp; return container_of(ipcp, struct shmid_kernel, shm_perm); } @@ -186,18 +187,33 @@ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) } -/* This is called by fork, once for every shm attach. */ -static void shm_open(struct vm_area_struct *vma) +static int __shm_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); + + if (IS_ERR(shp)) + return PTR_ERR(shp); + shp->shm_atim = get_seconds(); shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; shm_unlock(shp); + return 0; +} + +/* This is called by fork, once for every shm attach. */ +static void shm_open(struct vm_area_struct *vma) +{ + int err = __shm_open(vma); + /* + * We raced in the idr lookup or with shm_destroy(). + * Either way, the ID is busted. + */ + WARN_ON_ONCE(err); } /* @@ -260,6 +276,14 @@ static void shm_close(struct vm_area_struct *vma) down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); + + /* + * We raced in the idr lookup or with shm_destroy(). + * Either way, the ID is busted. + */ + if (WARN_ON_ONCE(IS_ERR(shp))) + goto done; /* no-op */ + shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; @@ -267,6 +291,7 @@ static void shm_close(struct vm_area_struct *vma) shm_destroy(ns, shp); else shm_unlock(shp); +done: up_write(&shm_ids(ns).rwsem); } @@ -388,17 +413,25 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) struct shm_file_data *sfd = shm_file_data(file); int ret; + /* + * In case of remap_file_pages() emulation, the file can represent + * removed IPC ID: propogate shm_lock() error to caller. + */ + ret =__shm_open(vma); + if (ret) + return ret; + ret = sfd->file->f_op->mmap(sfd->file, vma); - if (ret != 0) + if (ret) { + shm_close(vma); return ret; + } sfd->vm_ops = vma->vm_ops; #ifdef CONFIG_MMU WARN_ON(!sfd->vm_ops->fault); #endif vma->vm_ops = &shm_vm_ops; - shm_open(vma); - - return ret; + return 0; } static int shm_release(struct inode *ino, struct file *file) From 62adae8f26b918f4403129e355d81301a629b6a2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 19 Feb 2016 00:33:21 -0500 Subject: [PATCH 1955/2091] ext4: fix crashes in dioread_nolock mode [ Upstream commit 74dae4278546b897eb81784fdfcce872ddd8b2b8 ] Competing overwrite DIO in dioread_nolock mode will just overwrite pointer to io_end in the inode. This may result in data corruption or extent conversion happening from IO completion interrupt because we don't properly set buffer_defer_completion() when unlocked DIO races with locked DIO to unwritten extent. Since unlocked DIO doesn't need io_end for anything, just avoid allocating it and corrupting pointer from inode for locked DIO. A cleaner fix would be to avoid these games with io_end pointer from the inode but that requires more intrusive changes so we leave that for later. Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 966c614822cc1..2b3a53a515822 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3133,29 +3133,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * case, we allocate an io_end structure to hook to the iocb. */ iocb->private = NULL; - ext4_inode_aio_set(inode, NULL); - if (!is_sync_kiocb(iocb)) { - io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) { - ret = -ENOMEM; - goto retake_lock; - } - /* - * Grab reference for DIO. Will be dropped in ext4_end_io_dio() - */ - iocb->private = ext4_get_io_end(io_end); - /* - * we save the io structure for current async direct - * IO, so that later ext4_map_blocks() could flag the - * io structure whether there is a unwritten extents - * needs to be converted when IO is completed. - */ - ext4_inode_aio_set(inode, io_end); - } - if (overwrite) { get_block_func = ext4_get_block_write_nolock; } else { + ext4_inode_aio_set(inode, NULL); + if (!is_sync_kiocb(iocb)) { + io_end = ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } + /* + * Grab reference for DIO. Will be dropped in + * ext4_end_io_dio() + */ + iocb->private = ext4_get_io_end(io_end); + /* + * we save the io structure for current async direct + * IO, so that later ext4_map_blocks() could flag the + * io structure whether there is a unwritten extents + * needs to be converted when IO is completed. + */ + ext4_inode_aio_set(inode, io_end); + } get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } From 35cc814663e4f3515fb6bae8a3b5c188aa2b6f89 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 1 Mar 2016 16:02:43 +1100 Subject: [PATCH 1956/2091] powerpc/eeh: Fix build error caused by pci_dn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eeh.h could be included when we have following condition. Then we run into build error as below: (CONFIG_PPC64 && !CONFIG_EEH) || (!CONFIG_PPC64 && !CONFIG_EEH) In file included from arch/powerpc/kernel/of_platform.c:30:0: ./arch/powerpc/include/asm/eeh.h:344:48: error: ‘struct pci_dn’ \ declared inside parameter list [-Werror] : In file included from arch/powerpc/mm/hash_utils_64.c:49:0: ./arch/powerpc/include/asm/eeh.h:344:48: error: ‘struct pci_dn’ \ declared inside parameter list [-Werror] This fixes the issue by replacing those empty inline functions with macro so that we don't rely on @pci_dn when CONFIG_EEH is disabled. Cc: stable@vger.kernel.org # v4.1+ Fixes: ff57b45 ("powerpc/eeh: Do probe on pci_dn") Reported-by: Guenter Roeck Signed-off-by: Gavin Shan Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/eeh.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index e4de79abfe703..4457cb605356d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -337,19 +337,13 @@ static inline int eeh_check_failure(const volatile void __iomem *token) #define eeh_dev_check_failure(x) (0) -static inline void eeh_addr_cache_build(void) { } - -static inline void eeh_add_device_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_late(struct pci_dev *dev) { } - -static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } - -static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } - -static inline void eeh_remove_device(struct pci_dev *dev) { } +#define eeh_addr_cache_build() +#define eeh_add_device_early(pdn) +#define eeh_add_device_tree_early(pdn) +#define eeh_add_device_late(pdev) +#define eeh_add_device_tree_late(pbus) +#define eeh_add_sysfs_files(pbus) +#define eeh_remove_device(pdev) #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) From 257ed6d440536351b9ceca7005ee89fbcc352b75 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 19 Jan 2016 10:41:33 +0100 Subject: [PATCH 1957/2091] af_iucv: Validate socket address length in iucv_sock_bind() [ Upstream commit 52a82e23b9f2a9e1d429c5207f8575784290d008 ] Signed-off-by: Ursula Braun Reported-by: Dmitry Vyukov Reviewed-by: Evgeny Cherkashin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/iucv/af_iucv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6daa52a18d40c..123f6f9f854c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -709,6 +709,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, if (!addr || addr->sa_family != AF_IUCV) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_iucv)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != IUCV_OPEN) { err = -EBADFD; From 0c0c5254583eb9a77e62a4391094258907e08ea1 Mon Sep 17 00:00:00 2001 From: Manfred Rudigier Date: Wed, 20 Jan 2016 11:22:28 +0100 Subject: [PATCH 1958/2091] net: dp83640: Fix tx timestamp overflow handling. [ Upstream commit 81e8f2e930fe76b9814c71b9d87c30760b5eb705 ] PHY status frames are not reliable, the PHY may not be able to send them during heavy receive traffic. This overflow condition is signaled by the PHY in the next status frame, but the driver did not make use of it. Instead it always reported wrong tx timestamps to user space after an overflow happened because it assigned newly received tx timestamps to old packets in the queue. This commit fixes this issue by clearing the tx timestamp queue every time an overflow happens, so that no timestamps are delivered for overflow packets. This way time stamping will continue correctly after an overflow. Signed-off-by: Manfred Rudigier Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/dp83640.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 00cb41e713123..c56cf0b86f2c8 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -833,6 +833,11 @@ static void decode_rxts(struct dp83640_private *dp83640, struct skb_shared_hwtstamps *shhwtstamps = NULL; struct sk_buff *skb; unsigned long flags; + u8 overflow; + + overflow = (phy_rxts->ns_hi >> 14) & 0x3; + if (overflow) + pr_debug("rx timestamp queue overflow, count %d\n", overflow); spin_lock_irqsave(&dp83640->rx_lock, flags); @@ -875,6 +880,7 @@ static void decode_txts(struct dp83640_private *dp83640, struct skb_shared_hwtstamps shhwtstamps; struct sk_buff *skb; u64 ns; + u8 overflow; /* We must already have the skb that triggered this. */ @@ -884,6 +890,17 @@ static void decode_txts(struct dp83640_private *dp83640, pr_debug("have timestamp but tx_queue empty\n"); return; } + + overflow = (phy_txts->ns_hi >> 14) & 0x3; + if (overflow) { + pr_debug("tx timestamp queue overflow, count %d\n", overflow); + while (skb) { + skb_complete_tx_timestamp(skb, NULL); + skb = skb_dequeue(&dp83640->tx_queue); + } + return; + } + ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(ns); From 93493f5aaff542f0d542c48a3fda2dea1afe5e9e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Jan 2016 08:02:54 -0800 Subject: [PATCH 1959/2091] tcp: fix NULL deref in tcp_v4_send_ack() [ Upstream commit e62a123b8ef7c5dc4db2c16383d506860ad21b47 ] Neal reported crashes with this stack trace : RIP: 0010:[] tcp_v4_send_ack+0x41/0x20f ... CR2: 0000000000000018 CR3: 000000044005c000 CR4: 00000000001427e0 ... [] tcp_v4_reqsk_send_ack+0xa5/0xb4 [] tcp_check_req+0x2ea/0x3e0 [] tcp_rcv_state_process+0x850/0x2500 [] tcp_v4_do_rcv+0x141/0x330 [] sk_backlog_rcv+0x21/0x30 [] tcp_recvmsg+0x75d/0xf90 [] inet_recvmsg+0x80/0xa0 [] sock_aio_read+0xee/0x110 [] do_sync_read+0x6f/0xa0 [] SyS_read+0x1e1/0x290 [] system_call_fastpath+0x16/0x1b The problem here is the skb we provide to tcp_v4_send_ack() had to be parked in the backlog of a new TCP fastopen child because this child was owned by the user at the time an out of window packet arrived. Before queuing a packet, TCP has to set skb->dev to NULL as the device could disappear before packet is removed from the queue. Fix this issue by using the net pointer provided by the socket (being a timewait or a request socket). IPv6 is immune to the bug : tcp_v6_send_response() already gets the net pointer from the socket if provided. Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Jerry Chu Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd18c3d3251e1..13b92d595138f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -705,7 +705,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(struct net *net, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -720,7 +721,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -782,7 +782,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sock_net(sk), skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -801,8 +802,10 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + tcp_v4_send_ack(sock_net(sk), skb, seq, tcp_rsk(req)->rcv_nxt, req->rcv_wnd, tcp_time_stamp, req->ts_recent, From 8d988538da0c17711c0de0a53fc38cef49e3ed1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Jan 2016 13:53:50 -0800 Subject: [PATCH 1960/2091] af_unix: fix struct pid memory leak [ Upstream commit fa0dc04df259ba2df3ce1920e9690c7842f8fa4b ] Dmitry reported a struct pid leak detected by a syzkaller program. Bug happens in unix_stream_recvmsg() when we break the loop when a signal is pending, without properly releasing scm. Fixes: b3ca9b02b007 ("net: fix multithreaded signal handling in unix recv routines") Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Cc: Rainer Weikusat Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index cb3a01a9ed384..c741d8327204d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2131,6 +2131,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, if (signal_pending(current)) { err = sock_intr_errno(timeo); + scm_destroy(&scm); goto out; } From ce28c3ced53aa6385eafeabaeb9c70eca9e3b1ba Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 22 Jan 2016 01:39:43 +0100 Subject: [PATCH 1961/2091] pptp: fix illegal memory access caused by multiple bind()s [ Upstream commit 9a368aff9cb370298fa02feeffa861f2db497c18 ] Several times already this has been reported as kasan reports caused by syzkaller and trinity and people always looked at RCU races, but it is much more simple. :) In case we bind a pptp socket multiple times, we simply add it to the callid_sock list but don't remove the old binding. Thus the old socket stays in the bucket with unused call_id indexes and doesn't get cleaned up. This causes various forms of kasan reports which were hard to pinpoint. Simply don't allow multiple binds and correct error handling in pptp_bind. Also keep sk_state bits in place in pptp_connect. Fixes: 00959ade36acad ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") Cc: Dmitry Kozlov Cc: Sasha Levin Cc: Dmitry Vyukov Reported-by: Dmitry Vyukov Cc: Dave Jones Reported-by: Dave Jones Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ppp/pptp.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 0bacabfa486e5..b35199cc8f34d 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -131,24 +131,27 @@ static int lookup_chan_dst(u16 call_id, __be32 d_addr) return i < MAX_CALLID; } -static int add_chan(struct pppox_sock *sock) +static int add_chan(struct pppox_sock *sock, + struct pptp_addr *sa) { static int call_id; spin_lock(&chan_lock); - if (!sock->proto.pptp.src_addr.call_id) { + if (!sa->call_id) { call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); if (call_id == MAX_CALLID) { call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); if (call_id == MAX_CALLID) goto out_err; } - sock->proto.pptp.src_addr.call_id = call_id; - } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) + sa->call_id = call_id; + } else if (test_bit(sa->call_id, callid_bitmap)) { goto out_err; + } - set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); - rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); + sock->proto.pptp.src_addr = *sa; + set_bit(sa->call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sa->call_id], sock); spin_unlock(&chan_lock); return 0; @@ -417,7 +420,6 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; struct pppox_sock *po = pppox_sk(sk); - struct pptp_opt *opt = &po->proto.pptp; int error = 0; if (sockaddr_len < sizeof(struct sockaddr_pppox)) @@ -425,10 +427,22 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); - opt->src_addr = sp->sa_addr.pptp; - if (add_chan(po)) + if (sk->sk_state & PPPOX_DEAD) { + error = -EALREADY; + goto out; + } + + if (sk->sk_state & PPPOX_BOUND) { error = -EBUSY; + goto out; + } + + if (add_chan(po, &sp->sa_addr.pptp)) + error = -EBUSY; + else + sk->sk_state |= PPPOX_BOUND; +out: release_sock(sk); return error; } @@ -499,7 +513,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, } opt->dst_addr = sp->sa_addr.pptp; - sk->sk_state = PPPOX_CONNECTED; + sk->sk_state |= PPPOX_CONNECTED; end: release_sock(sk); From 6e13e7b0183e0b9945ff350bd5fad573c2ece83c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 22 Jan 2016 18:29:49 -0200 Subject: [PATCH 1962/2091] sctp: allow setting SCTP_SACK_IMMEDIATELY by the application [ Upstream commit 27f7ed2b11d42ab6d796e96533c2076ec220affc ] This patch extends commit b93d6471748d ("sctp: implement the sender side for SACK-IMMEDIATELY extension") as it didn't white list SCTP_SACK_IMMEDIATELY on sctp_msghdr_parse(), causing it to be understood as an invalid flag and returning -EINVAL to the application. Note that the actual handling of the flag is already there in sctp_datamsg_from_user(). https://tools.ietf.org/html/rfc7053#section-7 Fixes: b93d6471748d ("sctp: implement the sender side for SACK-IMMEDIATELY extension") Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sctp/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 76e6ec62cf926..1b80f209c5ec6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6653,6 +6653,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -6676,6 +6677,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; From 257a04782588b4b97ac005a044f85947f792be44 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Wed, 27 Jan 2016 11:35:59 +0100 Subject: [PATCH 1963/2091] tipc: fix connection abort during subscription cancel [ Upstream commit 4d5cfcba2f6ec494d8810b9e3c0a7b06255c8067 ] In 'commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing to events")', we terminate the connection if the subscription creation fails. In the same commit, the subscription creation result was based on the value of the subscription pointer (set in the function) instead of the return code. Unfortunately, the same function tipc_subscrp_create() handles subscription cancel request. For a subscription cancellation request, the subscription pointer cannot be set. Thus if a subscriber has several subscriptions and cancels any of them, the connection is terminated. In this commit, we terminate the connection based on the return value of tipc_subscrp_create(). Fixes: commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing to events") Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/subscr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1c147c869c2e6..948f316019d73 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -302,11 +302,10 @@ static void subscr_conn_msg_event(struct net *net, int conid, struct tipc_net *tn = net_generic(net, tipc_net_id); spin_lock_bh(&subscriber->lock); - subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); - if (sub) - tipc_nametbl_subscribe(sub); - else + if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub)) tipc_conn_terminate(tn->topsrv, subscriber->conid); + else + tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); } From 4c62ddf7e39943b356157b4c2196a71825c8328c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 27 Jan 2016 15:16:43 +0100 Subject: [PATCH 1964/2091] switchdev: Require RTNL mutex to be held when sending FDB notifications [ Upstream commit 4f2c6ae5c64c353fb1b0425e4747e5603feadba1 ] When switchdev drivers process FDB notifications from the underlying device they resolve the netdev to which the entry points to and notify the bridge using the switchdev notifier. However, since the RTNL mutex is not held there is nothing preventing the netdev from disappearing in the middle, which will cause br_switchdev_event() to dereference a non-existing netdev. Make switchdev drivers hold the lock at the beginning of the notification processing session and release it once it ends, after notifying the bridge. Also, remove switchdev_mutex and fdb_lock, as they are no longer needed when RTNL mutex is held. Fixes: 03bf0c281234 ("switchdev: introduce switchdev notifier") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/rocker/rocker.c | 2 ++ net/bridge/br.c | 3 +-- net/switchdev/switchdev.c | 15 ++++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 73b6fc21ea00d..4fedf7fa72c46 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -3384,12 +3384,14 @@ static void rocker_port_fdb_learn_work(struct work_struct *work) info.addr = lw->addr; info.vid = lw->vid; + rtnl_lock(); if (learned && removing) call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL, lw->dev, &info.info); else if (learned && !removing) call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD, lw->dev, &info.info); + rtnl_unlock(); kfree(work); } diff --git a/net/bridge/br.c b/net/bridge/br.c index 02c24cf63c344..c72e01cf09d09 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +/* called with RTNL */ static int br_netdev_switch_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -130,7 +131,6 @@ static int br_netdev_switch_event(struct notifier_block *unused, struct netdev_switch_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; - rtnl_lock(); p = br_port_get_rtnl(dev); if (!p) goto out; @@ -155,7 +155,6 @@ static int br_netdev_switch_event(struct notifier_block *unused, } out: - rtnl_unlock(); return err; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 055453d486683..a8dbe8001e464 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,6 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) } EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); -static DEFINE_MUTEX(netdev_switch_mutex); static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); /** @@ -79,9 +79,9 @@ int register_netdev_switch_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); + rtnl_lock(); err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); @@ -97,9 +97,9 @@ int unregister_netdev_switch_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); + rtnl_lock(); err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); @@ -113,16 +113,17 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); * Call all network notifier blocks. This should be called by driver * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). + * rtnl_lock must be held. */ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, struct netdev_switch_notifier_info *info) { int err; + ASSERT_RTNL(); + info->dev = dev; - mutex_lock(&netdev_switch_mutex); err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); - mutex_unlock(&netdev_switch_mutex); return err; } EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); From 1987c92ad0381beab46a65f79827d650de533584 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jan 2016 10:52:43 -0800 Subject: [PATCH 1965/2091] tcp: beware of alignments in tcp_get_info() [ Upstream commit ff5d749772018602c47509bdc0093ff72acd82ec ] With some combinations of user provided flags in netlink command, it is possible to call tcp_get_info() with a buffer that is not 8-bytes aligned. It does matter on some arches, so we need to use put_unaligned() to store the u64 fields. Current iproute2 package does not trigger this particular issue. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Fixes: 977cb0ecf82e ("tcp: add pacing_rate information into tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bb2ce74f60049..b5f4f5cb452e8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include #include +#include #include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -2603,6 +2604,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + u64 rate64; u32 rate; memset(info, 0, sizeof(*info)); @@ -2665,15 +2667,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; rate = READ_ONCE(sk->sk_pacing_rate); - info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate); - info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); do { start = u64_stats_fetch_begin_irq(&tp->syncp); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); } EXPORT_SYMBOL_GPL(tcp_get_info); From 0c6943f31317d02178f55b882e36e03f4e5795f7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 29 Jan 2016 12:30:19 +0100 Subject: [PATCH 1966/2091] ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail() [ Upstream commit 6f21c96a78b835259546d8f3fb4edff0f651d478 ] The current implementation of ip6_dst_lookup_tail basically ignore the egress ifindex match: if the saddr is set, ip6_route_output() purposefully ignores flowi6_oif, due to the commit d46a9d678e4c ("net: ipv6: Dont add RT6_LOOKUP_F_IFACE flag if saddr set"), if the saddr is 'any' the first route lookup in ip6_dst_lookup_tail fails, but upon failure a second lookup will be performed with saddr set, thus ignoring the ifindex constraint. This commit adds an output route lookup function variant, which allows the caller to specify lookup flags, and modify ip6_dst_lookup_tail() to enforce the ifindex match on the second lookup via said helper. ip6_route_output() becames now a static inline function build on top of ip6_route_output_flags(); as a side effect, out-of-tree modules need now a GPL license to access the output route lookup functionality. Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/ip6_route.h | 12 ++++++++++-- net/ipv6/ip6_output.c | 6 +++++- net/ipv6/route.c | 8 +++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5e192068e6cb6..388dea4da0832 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) void ip6_route_input(struct sk_buff *skb); -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6); +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags); + +static inline struct dst_entry *ip6_route_output(struct net *net, + const struct sock *sk, + struct flowi6 *fl6) +{ + return ip6_route_output_flags(net, sk, fl6, 0); +} + struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f50228b0abe58..36b9ac48b8fb1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -885,6 +885,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct rt6_info *rt; #endif int err; + int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, @@ -916,10 +917,13 @@ static int ip6_dst_lookup_tail(struct sock *sk, dst_release(*dst); *dst = NULL; } + + if (fl6->flowi6_oif) + flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f371fefa7fdcc..fe70bd6a75162 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1030,11 +1030,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags) { - int flags = 0; - fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) @@ -1047,7 +1045,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } -EXPORT_SYMBOL(ip6_route_output); +EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { From d04aa950dbd249222f87c96fac9c547890524f5a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 29 Jan 2016 12:30:20 +0100 Subject: [PATCH 1967/2091] ipv6/udp: use sticky pktinfo egress ifindex on connect() [ Upstream commit 1cdda91871470f15e79375991bd2eddc6e86ddb1 ] Currently, the egress interface index specified via IPV6_PKTINFO is ignored by __ip6_datagram_connect(), so that RFC 3542 section 6.7 can be subverted when the user space application calls connect() before sendmsg(). Fix it by initializing properly flowi6_oif in connect() before performing the route lookup. Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/datagram.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 13ca4cf5616f8..8e6cb3f143268 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -162,6 +162,9 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; From d07f3017baa86b3e44eff41a41e3b297604ac8fb Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Jul 2015 14:28:42 +0800 Subject: [PATCH 1968/2091] net/ipv6: add sysctl option accept_ra_min_hop_limit [ Upstream commit 8013d1d7eafb0589ca766db6b74026f76b7f5cb4 ] Commit 6fd99094de2b ("ipv6: Don't reduce hop limit for an interface") disabled accept hop limit from RA if it is smaller than the current hop limit for security stuff. But this behavior kind of break the RFC definition. RFC 4861, 6.3.4. Processing Received Router Advertisements A Router Advertisement field (e.g., Cur Hop Limit, Reachable Time, and Retrans Timer) may contain a value denoting that it is unspecified. In such cases, the parameter should be ignored and the host should continue using whatever value it is already using. If the received Cur Hop Limit value is non-zero, the host SHOULD set its CurHopLimit variable to the received value. So add sysctl option accept_ra_min_hop_limit to let user choose the minimum hop limit value they can accept from RA. And set default to 1 to meet RFC standards. Signed-off-by: Hangbin Liu Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 2 ++ net/ipv6/addrconf.c | 10 ++++++++++ net/ipv6/ndisc.c | 16 +++++++--------- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 071fb18dc57c8..07fad3d2fc568 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1321,6 +1321,14 @@ accept_ra_from_local - BOOLEAN disabled if accept_ra_from_local is disabled on a specific interface. +accept_ra_min_hop_limit - INTEGER + Minimum hop limit Information in Router Advertisement. + + Hop limit Information in Router Advertisement less than this + variable shall be ignored. + + Default: 1 + accept_ra_pinfo - BOOLEAN Learn Prefix Information in Router Advertisement. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e4b4649833227..01c25923675b4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -29,6 +29,7 @@ struct ipv6_devconf { __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; + __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5efa54ae567ca..80f3b74446a1a 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -171,6 +171,8 @@ enum { DEVCONF_USE_OPTIMISTIC, DEVCONF_ACCEPT_RA_MTU, DEVCONF_STABLE_SECRET, + DEVCONF_USE_OIF_ADDRS_ONLY, + DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f4795b0d6e6ef..28c4bc58f2cc7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_from_local = 0, + .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -236,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_from_local = 0, + .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -4565,6 +4567,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; + array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; @@ -5457,6 +5460,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "accept_ra_min_hop_limit", + .data = &ipv6_devconf.accept_ra_min_hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "accept_ra_pinfo", .data = &ipv6_devconf.accept_ra_pinfo, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 96f153c0846b7..abb0bdda759a5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1225,18 +1225,16 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); - if (ra_msg->icmph.icmp6_hop_limit) { - /* Only set hop_limit on the interface if it is higher than - * the current hop_limit. - */ - if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && + ra_msg->icmph.icmp6_hop_limit) { + if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + if (rt) + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } else { - ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } - if (rt) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, - ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: From 9f88ecf6c707ef3a6e26a8bd58e948728d2093d6 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Tue, 2 Feb 2016 02:11:10 +0000 Subject: [PATCH 1969/2091] ipv6: addrconf: Fix recursive spin lock call [ Upstream commit 16186a82de1fdd868255448274e64ae2616e2640 ] A rcu stall with the following backtrace was seen on a system with forwarding, optimistic_dad and use_optimistic set. To reproduce, set these flags and allow ipv6 autoconf. This occurs because the device write_lock is acquired while already holding the read_lock. Back trace below - INFO: rcu_preempt self-detected stall on CPU { 1} (t=2100 jiffies g=3992 c=3991 q=4471) <6> Task dump for CPU 1: <2> kworker/1:0 R running task 12168 15 2 0x00000002 <2> Workqueue: ipv6_addrconf addrconf_dad_work <6> Call trace: <2> [] el1_irq+0x68/0xdc <2> [] _raw_write_lock_bh+0x20/0x30 <2> [] __ipv6_dev_ac_inc+0x64/0x1b4 <2> [] addrconf_join_anycast+0x9c/0xc4 <2> [] __ipv6_ifa_notify+0x160/0x29c <2> [] ipv6_ifa_notify+0x50/0x70 <2> [] addrconf_dad_work+0x314/0x334 <2> [] process_one_work+0x244/0x3fc <2> [] worker_thread+0x2f8/0x418 <2> [] kthread+0xe0/0xec v2: do addrconf_dad_kick inside read lock and then acquire write lock for ipv6_ifa_notify as suggested by Eric Fixes: 7fd2561e4ebdd ("net: ipv6: Add a sysctl to make optimistic addresses useful candidates") Cc: Eric Dumazet Cc: Erik Kline Cc: Hannes Frederic Sowa Signed-off-by: Subash Abhinov Kasiviswanathan Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 28c4bc58f2cc7..fcfbd05a8e0b4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3423,6 +3423,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; + bool notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3468,7 +3469,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ - ipv6_ifa_notify(RTM_NEWADDR, ifp); + notify = true; } } @@ -3476,6 +3477,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) out: spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); + if (notify) + ipv6_ifa_notify(RTM_NEWADDR, ifp); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) From 6c92a8f0502d3e9a9036473408f7f67b58f22ea9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Feb 2016 17:55:01 -0800 Subject: [PATCH 1970/2091] ipv6: fix a lockdep splat [ Upstream commit 44c3d0c1c0a880354e9de5d94175742e2c7c9683 ] Silence lockdep false positive about rcu_dereference() being used in the wrong context. First one should use rcu_dereference_protected() as we own the spinlock. Second one should be a normal assignation, as no barrier is needed. Fixes: 18367681a10bd ("ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/ip6_flowlabel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d491125011c4d..db939e4ac68a8 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp)) != NULL; + (sfl = rcu_dereference_protected(*sflp, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = rcu_dereference(sfl->next); + *sflp = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); From 797c009c98dbb21127a2549d1106ed19d18661cf Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Feb 2016 02:11:03 +0100 Subject: [PATCH 1971/2091] unix: correctly track in-flight fds in sending process user_struct [ Upstream commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 ] The commit referenced in the Fixes tag incorrectly accounted the number of in-flight fds over a unix domain socket to the original opener of the file-descriptor. This allows another process to arbitrary deplete the original file-openers resource limit for the maximum of open files. Instead the sending processes and its struct cred should be credited. To do so, we add a reference counted struct user_struct pointer to the scm_fp_list and use it to account for the number of inflight unix fds. Fixes: 712f4aad406bb1 ("unix: properly account for FDs passed over unix sockets") Reported-by: David Herrmann Cc: David Herrmann Cc: Willy Tarreau Cc: Linus Torvalds Suggested-by: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/af_unix.h | 4 ++-- include/net/scm.h | 1 + net/core/scm.c | 7 +++++++ net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 8 ++++---- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index e830c3dff61a1..7bb69c9c3c43d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -6,8 +6,8 @@ #include #include -void unix_inflight(struct file *fp); -void unix_notinflight(struct file *fp); +void unix_inflight(struct user_struct *user, struct file *fp); +void unix_notinflight(struct user_struct *user, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/include/net/scm.h b/include/net/scm.h index 262532d111f51..59fa93c01d2a1 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -21,6 +21,7 @@ struct scm_creds { struct scm_fp_list { short count; short max; + struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302b..dce0acb929f16 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; + fpl->user = NULL; } fpp = &fpl->fp[fpl->count]; @@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fpp++ = file; fpl->count++; } + + if (!fpl->user) + fpl->user = get_uid(current_user()); + return num; } @@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm) scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); + free_uid(fpl->user); kfree(fpl); } } @@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; + new_fpl->user = get_uid(fpl->user); } return new_fpl; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c741d8327204d..d644042868056 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1464,7 +1464,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1529,7 +1529,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); + unix_inflight(scm->fp->user, scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 8fcdc2283af50..6a0d48525fcf9 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -133,11 +133,11 @@ void unix_inflight(struct file *fp) } unix_tot_inflight++; } - fp->f_cred->user->unix_inflight++; + user->unix_inflight++; spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp) list_del_init(&u->link); unix_tot_inflight--; } - fp->f_cred->user->unix_inflight--; + user->unix_inflight--; spin_unlock(&unix_gc_lock); } From 3faf465445a1d8677b860eee894aecc3d2e32cc6 Mon Sep 17 00:00:00 2001 From: Hans Westgaard Ry Date: Wed, 3 Feb 2016 09:26:57 +0100 Subject: [PATCH 1972/2091] net:Add sysctl_max_skb_frags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f74f82ea34c0da80ea0b49192bb5ea06e063593 ] Devices may have limits on the number of fragments in an skb they support. Current codebase uses a constant as maximum for number of fragments one skb can hold and use. When enabling scatter/gather and running traffic with many small messages the codebase uses the maximum number of fragments and may thereby violate the max for certain devices. The patch introduces a global variable as max number of fragments. Signed-off-by: Hans Westgaard Ry Reviewed-by: Håkon Bugge Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 2 ++ net/core/sysctl_net_core.c | 10 ++++++++++ net/ipv4/tcp.c | 4 ++-- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f17abe237251..6633b0cd3fb9d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -203,6 +203,7 @@ struct sk_buff; #else #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) #endif +extern int sysctl_max_skb_frags; typedef struct skb_frag_struct skb_frag_t; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2e5fcda165705..c9793c6c5005f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,6 +79,8 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; +int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; +EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 95b6139d710c4..a6beb7b6ae556 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,7 @@ static int zero = 0; static int one = 1; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; +static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "max_skb_frags", + .data = &sysctl_max_skb_frags, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &max_skb_frags, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5f4f5cb452e8..19d385a0f02de 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -922,7 +922,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1188,7 +1188,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } From beb9881442593e9de581bf7d5396788cfedcdbd3 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 3 Feb 2016 14:09:38 +0530 Subject: [PATCH 1973/2091] tg3: Fix for tg3 transmit queue 0 timed out when too many gso_segs [ Upstream commit b7d987295c74500b733a0ba07f9a9bcc4074fa83 ] tg3_tso_bug() can hit a condition where the entire tx ring is not big enough to segment the GSO packet. For example, if MSS is very small, gso_segs can exceed the tx ring size. When we hit the condition, it will cause tx timeout. tg3_tso_bug() is called to handle TSO and DMA hardware bugs. For TSO bugs, if tg3_tso_bug() cannot succeed, we have to drop the packet. For DMA bugs, we can still fall back to linearize the SKB and let the hardware transmit the TSO packet. This patch adds a function tg3_tso_bug_gso_check() to check if there are enough tx descriptors for GSO before calling tg3_tso_bug(). The caller will then handle the error appropriately - drop or lineraize the SKB. v2: Corrected patch description to avoid confusion. Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Acked-by: Prashant Sreedharan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 0d8af5bb59077..d5415205779fb 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7833,6 +7833,14 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, return ret; } +static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb) +{ + /* Check if we will never have enough descriptors, + * as gso_segs can be more than current ring size + */ + return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3; +} + static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); /* Use GSO to workaround all TSO packets that meet HW bug conditions @@ -7936,14 +7944,19 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) * vlan encapsulated. */ if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - return tg3_tso_bug(tp, tnapi, txq, skb); + skb->protocol == htons(ETH_P_8021AD)) { + if (tg3_tso_bug_gso_check(tnapi, skb)) + return tg3_tso_bug(tp, tnapi, txq, skb); + goto drop; + } if (!skb_is_gso_v6(skb)) { if (unlikely((ETH_HLEN + hdr_len) > 80) && - tg3_flag(tp, TSO_BUG)) - return tg3_tso_bug(tp, tnapi, txq, skb); - + tg3_flag(tp, TSO_BUG)) { + if (tg3_tso_bug_gso_check(tnapi, skb)) + return tg3_tso_bug(tp, tnapi, txq, skb); + goto drop; + } ip_csum = iph->check; ip_tot_len = iph->tot_len; iph->check = 0; @@ -8075,7 +8088,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (would_hit_hwbug) { tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); - if (mss) { + if (mss && tg3_tso_bug_gso_check(tnapi, skb)) { /* If it's a TSO packet, do GSO instead of * allocating and copying to a large linear SKB */ From bb6f76ba4c6b29e4650310982dc61639d34fa833 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 3 Feb 2016 23:33:30 +0800 Subject: [PATCH 1974/2091] sctp: translate network order to host order when users get a hmacid [ Upstream commit 7a84bd46647ff181eb2659fdc99590e6f16e501d ] Commit ed5a377d87dc ("sctp: translate host order to network order when setting a hmacid") corrected the hmacid byte-order when setting a hmacid. but the same issue also exists on getting a hmacid. We fix it by changing hmacids to host order when users get them with getsockopt. Fixes: Commit ed5a377d87dc ("sctp: translate host order to network order when setting a hmacid") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sctp/socket.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1b80f209c5ec6..3c5833058b036 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5555,6 +5555,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; + int i; if (!ep->auth_enable) return -EACCES; @@ -5572,8 +5573,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, return -EFAULT; if (put_user(num_idents, &p->shmac_num_idents)) return -EFAULT; - if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) - return -EFAULT; + for (i = 0; i < num_idents; i++) { + __u16 hmacid = ntohs(hmacs->hmac_ids[i]); + + if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16))) + return -EFAULT; + } return 0; } From 916f99656dc7d69355cb1045530c421cb1976590 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 9 Feb 2016 02:49:54 -0800 Subject: [PATCH 1975/2091] flow_dissector: Fix unaligned access in __skb_flow_dissector when used by eth_get_headlen [ Upstream commit 461547f3158978c180d74484d58e82be9b8e7357, since we lack the flow dissector flags in this release we guard the flow label access using a test on 'skb' being NULL ] This patch fixes an issue with unaligned accesses when using eth_get_headlen on a page that was DMA aligned instead of being IP aligned. The fact is when trying to check the length we don't need to be looking at the flow label so we can reorder the checks to first check if we are supposed to gather the flow label and then make the call to actually get it. v2: Updated path so that either STOP_AT_FLOW_LABEL or KEY_FLOW_LABEL can cause us to check for the flow label. Reported-by: Sowmini Varadhan Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/flow_dissector.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2c35c02a931e2..f96d2caa748c9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -113,7 +113,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; - __be32 flow_label; ipv6: iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); @@ -130,8 +129,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); - flow_label = ip6_flowlabel(iph); - if (flow_label) { + if (skb && ip6_flowlabel(iph)) { + __be32 flow_label = ip6_flowlabel(iph); + /* Awesome, IPv6 packet has a flow label so we can * use that to represent the ports without any * further dissection. From f68887de614bcf857651c0d2e24cfc3004dc20e4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 9 Feb 2016 06:14:43 -0800 Subject: [PATCH 1976/2091] net: Copy inner L3 and L4 headers as unaligned on GRE TEB [ Upstream commit 78565208d73ca9b654fb9a6b142214d52eeedfd1 ] This patch corrects the unaligned accesses seen on GRE TEB tunnels when generating hash keys. Specifically what this patch does is make it so that we force the use of skb_copy_bits when the GRE inner headers will be unaligned due to NET_IP_ALIGNED being a non-zero value. Signed-off-by: Alexander Duyck Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/flow_dissector.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f96d2caa748c9..3556791fdc6eb 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -233,6 +233,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, return false; proto = eth->h_proto; nhoff += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = nhoff; } goto again; } From 0f912f6700a3f14481c13cbda2b9cc1b636948ac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Feb 2016 16:47:11 +0100 Subject: [PATCH 1977/2091] bpf: fix branch offset adjustment on backjumps after patching ctx expansion [ Upstream commit a1b14d27ed0965838350f1377ff97c93ee383492 ] When ctx access is used, the kernel often needs to expand/rewrite instructions, so after that patching, branch offsets have to be adjusted for both forward and backward jumps in the new eBPF program, but for backward jumps it fails to account the delta. Meaning, for example, if the expansion happens exactly on the insn that sits at the jump target, it doesn't fix up the back jump offset. Analysis on what the check in adjust_branches() is currently doing: /* adjust offset of jmps if necessary */ if (i < pos && i + insn->off + 1 > pos) insn->off += delta; else if (i > pos && i + insn->off + 1 < pos) insn->off -= delta; First condition (forward jumps): Before: After: insns[0] insns[0] insns[1] <--- i/insn insns[1] <--- i/insn insns[2] <--- pos insns[P] <--- pos insns[3] insns[P] `------| delta insns[4] <--- target_X insns[P] `-----| insns[5] insns[3] insns[4] <--- target_X insns[5] First case is if we cross pos-boundary and the jump instruction was before pos. This is handeled correctly. I.e. if i == pos, then this would mean our jump that we currently check was the patchlet itself that we just injected. Since such patchlets are self-contained and have no awareness of any insns before or after the patched one, the delta is correctly not adjusted. Also, for the second condition in case of i + insn->off + 1 == pos, means we jump to that newly patched instruction, so no offset adjustment are needed. That part is correct. Second condition (backward jumps): Before: After: insns[0] insns[0] insns[1] <--- target_X insns[1] <--- target_X insns[2] <--- pos <-- target_Y insns[P] <--- pos <-- target_Y insns[3] insns[P] `------| delta insns[4] <--- i/insn insns[P] `-----| insns[5] insns[3] insns[4] <--- i/insn insns[5] Second interesting case is where we cross pos-boundary and the jump instruction was after pos. Backward jump with i == pos would be impossible and pose a bug somewhere in the patchlet, so the first condition checking i > pos is okay only by itself. However, i + insn->off + 1 < pos does not always work as intended to trigger the adjustment. It works when jump targets would be far off where the delta wouldn't matter. But, for example, where the fixed insn->off before pointed to pos (target_Y), it now points to pos + delta, so that additional room needs to be taken into account for the check. This means that i) both tests here need to be adjusted into pos + delta, and ii) for the second condition, the test needs to be <= as pos itself can be a target in the backjump, too. Fixes: 9bac3d6d548e ("bpf: allow extended BPF programs access skb fields") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 141d562064a72..6582410a71c79 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1944,7 +1944,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta) /* adjust offset of jmps if necessary */ if (i < pos && i + insn->off + 1 > pos) insn->off += delta; - else if (i > pos && i + insn->off + 1 < pos) + else if (i > pos + delta && i + insn->off + 1 <= pos + delta) insn->off -= delta; } } From 54d0901de6ecdfc72514f5c610fe89d65674c126 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Tue, 2 Feb 2016 13:35:56 -0800 Subject: [PATCH 1978/2091] bonding: Fix ARP monitor validation [ Upstream commit 21a75f0915dde8674708b39abfcda113911c49b1 ] The current logic in bond_arp_rcv will accept an incoming ARP for validation if (a) the receiving slave is either "active" (which includes the currently active slave, or the current ARP slave) or, (b) there is a currently active slave, and it has received an ARP since it became active. For case (b), the receiving slave isn't the currently active slave, and is receiving the original broadcast ARP request, not an ARP reply from the target. This logic can fail if there is no currently active slave. In this situation, the ARP probe logic cycles through all slaves, assigning each in turn as the "current_arp_slave" for one arp_interval, then setting that one as "active," and sending an ARP probe from that slave. The current logic expects the ARP reply to arrive on the sending current_arp_slave, however, due to switch FDB updating delays, the reply may be directed to another slave. This can arise if the bonding slaves and switch are working, but the ARP target is not responding. When the ARP target recovers, a condition may result wherein the ARP target host replies faster than the switch can update its forwarding table, causing each ARP reply to be sent to the previous current_arp_slave. This will never pass the logic in bond_arp_rcv, as neither of the above conditions (a) or (b) are met. Some experimentation on a LAN shows ARP reply round trips in the 200 usec range, but my available switches never update their FDB in less than 4000 usec. This patch changes the logic in bond_arp_rcv to additionally accept an ARP reply for validation on any slave if there is a current ARP slave and it sent an ARP probe during the previous arp_interval. Fixes: aeea64ac717a ("bonding: don't trust arp requests unless active slave really works") Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 39 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 72ba774df7a77..bd744e31c434f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev); static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats); static void bond_slave_arr_handler(struct work_struct *work); +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); /*---------------------------- General routines -----------------------------*/ @@ -2397,7 +2399,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct arphdr *arp = (struct arphdr *)skb->data; - struct slave *curr_active_slave; + struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); @@ -2444,26 +2446,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, &sip, &tip); curr_active_slave = rcu_dereference(bond->curr_active_slave); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); - /* Backup slaves won't see the ARP reply, but do come through - * here for each ARP probe (so we swap the sip/tip to validate - * the probe). In a "redundant switch, common router" type of - * configuration, the ARP probe will (hopefully) travel from - * the active, through one switch, the router, then the other - * switch before reaching the backup. + /* We 'trust' the received ARP enough to validate it if: + * + * (a) the slave receiving the ARP is active (which includes the + * current ARP slave, if any), or + * + * (b) the receiving slave isn't active, but there is a currently + * active slave and it received valid arp reply(s) after it became + * the currently active slave, or + * + * (c) there is an ARP slave that sent an ARP during the prior ARP + * interval, and we receive an ARP reply on any slave. We accept + * these because switch FDB update delays may deliver the ARP + * reply to a slave other than the sender of the ARP request. * - * We 'trust' the arp requests if there is an active slave and - * it received valid arp reply(s) after it became active. This - * is done to avoid endless looping when we can't reach the + * Note: for (b), backup slaves are receiving the broadcast ARP + * request, not a reply. This request passes from the sending + * slave through the L2 switch(es) to the receiving slave. Since + * this is checking the request, sip/tip are swapped for + * validation. + * + * This is done to avoid endless looping when we can't reach the * arp_ip_target and fool ourselves with our own arp requests. */ - if (bond_is_active_slave(slave)) bond_validate_arp(bond, slave, sip, tip); else if (curr_active_slave && time_after(slave_last_rx(bond, curr_active_slave), curr_active_slave->last_link_up)) bond_validate_arp(bond, slave, tip, sip); + else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && + bond_time_in_interval(bond, + dev_trans_start(curr_arp_slave->dev), 1)) + bond_validate_arp(bond, slave, sip, tip); out_unlock: if (arp != (struct arphdr *)skb->data) From ffe8615cbb6abdbb66b8c8b33a3e7ea06c6662f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Feb 2016 06:23:28 -0800 Subject: [PATCH 1979/2091] ipv4: fix memory leaks in ip_cmsg_send() callers [ Upstream commit 919483096bfe75dda338e98d56da91a263746a0a ] Dmitry reported memory leaks of IP options allocated in ip_cmsg_send() when/if this function returns an error. Callers are responsible for the freeing. Many thanks to Dmitry for the report and diagnostic. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_sockglue.c | 2 ++ net/ipv4/ping.c | 4 +++- net/ipv4/raw.c | 4 +++- net/ipv4/udp.c | 4 +++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6ddde89996f44..b6c7bdea48533 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + + /* Our caller is responsible for freeing ipc->opt */ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 05ff44b758dfe..f6ee0d561aab3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -745,8 +745,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 561cd4b8fc6e0..c77aac75759d9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -543,8 +543,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); goto out; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b8c5ba7d5f73..a390174b96de6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -963,8 +963,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, sk->sk_family == AF_INET6); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; connected = 0; From 73fd505d34328b113fb9743688f245991409a830 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Thu, 11 Feb 2016 19:37:27 +0000 Subject: [PATCH 1980/2091] af_unix: Guard against other == sk in unix_dgram_sendmsg [ Upstream commit a5527dda344fff0514b7989ef7a755729769daa1 ] The unix_dgram_sendmsg routine use the following test if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { to determine if sk and other are in an n:1 association (either established via connect or by using sendto to send messages to an unrelated socket identified by address). This isn't correct as the specified address could have been bound to the sending socket itself or because this socket could have been connected to itself by the time of the unix_peer_get but disconnected before the unix_state_lock(other). In both cases, the if-block would be entered despite other == sk which might either block the sender unintentionally or lead to trying to unlock the same spin lock twice for a non-blocking send. Add a other != sk check to guard against this. Fixes: 7d267278a9ec ("unix: avoid use-after-free in ep_remove_wait_queue") Reported-By: Philipp Hahn Signed-off-by: Rainer Weikusat Tested-by: Philipp Hahn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d644042868056..535a642a16889 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1714,7 +1714,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + /* other == sk && unix_peer(other) != sk if + * - unix_peer(sk) == NULL, destination address bound to sk + * - unix_peer(sk) == sk by time of get but disconnected before lock + */ + if (other != sk && + unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { if (timeo) { timeo = unix_wait_for_peer(other, timeo); From ce643bc32f94c94d5d018c116ed0138ec19bfb4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 12 Feb 2016 16:42:14 +0100 Subject: [PATCH 1981/2091] qmi_wwan: add "4G LTE usb-modem U901" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aac8d3c282e024c344c5b86dc1eab7af88bb9716 ] Thomas reports: T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=05c6 ProdID=6001 Rev=00.00 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber=1234567890ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 71190dc1eacfe..cffb25280a3b7 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -542,6 +542,7 @@ static const struct usb_device_id products[] = { /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ + {QMI_FIXED_INTF(0x05c6, 0x6001, 3)}, /* 4G LTE usb-modem U901 */ {QMI_FIXED_INTF(0x05c6, 0x7000, 0)}, {QMI_FIXED_INTF(0x05c6, 0x7001, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7002, 1)}, From 7a9a967fa8bd958586896bb89f5d44ebbbbeaec3 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 17 Feb 2016 17:24:22 +0200 Subject: [PATCH 1982/2091] net/mlx4_en: Count HW buffer overrun only once [ Upstream commit 281e8b2fdf8e4ef366b899453cae50e09b577ada ] RdropOvflw counts overrun of HW buffer, therefore should be used for rx_fifo_errors only. Currently RdropOvflw counter is mistakenly also set into rx_missed_errors and rx_over_errors too, which makes the device total dropped packets accounting to show wrong results. Fix that. Use it for rx_fifo_errors only. Fixes: c27a02cd94d6 ('mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC') Signed-off-by: Amir Vadai Signed-off-by: Eugenia Emantayev Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/en_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 0a56f010c8468..760a8b3879128 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -223,11 +223,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_missed_errors = 0; stats->tx_aborted_errors = 0; stats->tx_carrier_errors = 0; stats->tx_fifo_errors = 0; From 5c72a2dd6fedf29368fa67b15c7c076dd93cf3dd Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 17 Feb 2016 17:24:23 +0200 Subject: [PATCH 1983/2091] net/mlx4_en: Choose time-stamping shift value according to HW frequency [ Upstream commit 31c128b66e5b28f468076e4f3ca3025c35342041 ] Previously, the shift value used for time-stamping was constant and didn't depend on the HW chip frequency. Change that to take the frequency into account and calculate the maximal value in cycles per wraparound of ten seconds. This time slot was chosen since it gives a good accuracy in time synchronization. Algorithm for shift value calculation: * Round up the maximal value in cycles to nearest power of two * Calculate maximal multiplier by division of all 64 bits set to above result * Then, invert the function clocksource_khz2mult() to get the shift from maximal mult value Fixes: ec693d47010e ('net/mlx4_en: Add HW timestamping (TS) support') Signed-off-by: Eugenia Emantayev Reviewed-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 8a083d73efdba..dae2ebb53af72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; +#define MLX4_EN_WRAP_AROUND_SEC 10ULL + +/* This function calculates the max shift that enables the user range + * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. + */ +static u32 freq_to_shift(u16 freq) +{ + u32 freq_khz = freq * 1000; + u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? + max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + /* calculate max possible multiplier in order to fit in 64bit */ + u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + + /* This comes from the reverse of clocksource_khz2mult */ + return ilog2(div_u64(max_mul * freq_khz, 1000000)); +} + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; @@ -247,12 +265,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; mdev->cycles.mask = CLOCKSOURCE_MASK(48); - /* Using shift to make calculation more accurate. Since current HW - * clock frequency is 427 MHz, and cycles are given using a 48 bits - * register, the biggest shift when calculating using u64, is 14 - * (max_cycles * multiplier < 2^64) - */ - mdev->cycles.shift = 14; + mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); mdev->cycles.mult = clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; From 29d526aca29faa3236c9787c2512c5ba4c82092a Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 17 Feb 2016 17:24:27 +0200 Subject: [PATCH 1984/2091] net/mlx4_en: Avoid changing dev->features directly in run-time [ Upstream commit 925ab1aa9394bbaeac47ee5b65d3fdf0fb8135cf ] It's forbidden to manually change dev->features in run-time. Currently, this is done in the driver to make sure that GSO_UDP_TUNNEL is advertized only when VXLAN tunnel is set. However, since the stack actually does features intersection with hw_enc_features, we can safely revert to advertizing features early when registering the netdevice. Fixes: f4a1edd56120 ('net/mlx4_en: Advertize encapsulation offloads [...]') Signed-off-by: Eugenia Emantayev Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a5a0b8420d26b..e9189597000de 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2330,8 +2330,6 @@ static void mlx4_en_add_vxlan_offloads(struct work_struct *work) /* set offloads */ priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; - priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2342,8 +2340,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) /* unset offloads */ priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); - priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2940,6 +2936,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + mdev->pndev[port] = dev; mdev->upper[port] = NULL; From 89afe37040ed95a8959d67efc784a27948f0a76f Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 15 Feb 2016 16:24:44 +1300 Subject: [PATCH 1985/2091] l2tp: Fix error creating L2TP tunnels [ Upstream commit 853effc55b0f975abd6d318cca486a9c1b67e10f ] A previous commit (33f72e6) added notification via netlink for tunnels when created/modified/deleted. If the notification returned an error, this error was returned from the tunnel function. If there were no listeners, the error code ESRCH was returned, even though having no listeners is not an error. Other calls to this and other similar notification functions either ignore the error code, or filter ESRCH. This patch checks for ESRCH and does not flag this as an error. Reviewed-by: Hamish Martin Signed-off-by: Mark Tomlinson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_netlink.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 9e13c2ff87897..fe92a08b3cd55 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family, ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); @@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family, ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, session, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); From dda12d1bb3534b095c3d3e537e081339e0b7192f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 15 Feb 2016 17:01:10 +0100 Subject: [PATCH 1986/2091] pppoe: fix reference counting in PPPoE proxy [ Upstream commit 29e73269aa4d36f92b35610c25f8b01c789b0dc8 ] Drop reference on the relay_po socket when __pppoe_xmit() succeeds. This is already handled correctly in the error path. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ppp/pppoe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 9c8fabed4444f..d1c4bc1c4df09 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -395,6 +395,8 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_put; + + sock_put(sk_pppox(relay_po)); } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; From 1610a64d0c495cd84d848e5ff4e92f1f9f0ec6e5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 18 Feb 2016 21:21:19 +0800 Subject: [PATCH 1987/2091] route: check and remove route cache when we get route [ Upstream commit deed49df7390d5239024199e249190328f1651e7 ] Since the gc of ipv4 route was removed, the route cached would has no chance to be removed, and even it has been timeout, it still could be used, cause no code to check it's expires. Fix this issue by checking and removing route cache when we get route. Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/ip_fib.h | 1 + net/ipv4/route.c | 77 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 54271ed0ed45b..13f1a97f6b2b8 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -59,6 +59,7 @@ struct fib_nh_exception { struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; + struct rcu_head rcu; }; struct fnhe_hash_bucket { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f45f2a12f37b2..1d3cdb4d4ebcb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -125,6 +125,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ @@ -753,7 +754,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, 0); + 0, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1538,6 +1539,36 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1592,11 +1623,20 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) + if (fnhe) { rth = rcu_dereference(fnhe->fnhe_rth_input); - else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(&FIB_RES_NH(*res), daddr); + fnhe = NULL; + } else { + goto rt_cache; + } + } + + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); +rt_cache: if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1945,19 +1985,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); - if (fnhe) + if (fnhe) { prth = &fnhe->fnhe_rth_output; - else { - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; + rth = rcu_dereference(*prth); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(nh, fl4->daddr); + fnhe = NULL; + } else { + goto rt_cache; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } + + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); + +rt_cache: if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; @@ -2504,7 +2554,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; From 3aa450dcb11d582aab3a6aaf19b67380ab4322bc Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Tue, 16 Feb 2016 21:43:16 -0500 Subject: [PATCH 1988/2091] rtnl: RTM_GETNETCONF: fix wrong return value [ Upstream commit a97eb33ff225f34a8124774b3373fd244f0e83ce ] An error response from a RTM_GETNETCONF request can return the positive error value EINVAL in the struct nlmsgerr that can mislead userspace. Signed-off-by: Anton Protopopov Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/devinet.c | 2 +- net/ipv6/addrconf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 419d23c53ec75..280d46f947ea8 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1839,7 +1839,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fcfbd05a8e0b4..f555f4fc1d628 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -569,7 +569,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; From fdbc49cb20c7711e07010acd7bcf5e6839cf2a86 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 19 Feb 2016 04:27:48 +0300 Subject: [PATCH 1989/2091] unix_diag: fix incorrect sign extension in unix_lookup_by_ino [ Upstream commit b5f0549231ffb025337be5a625b0ff9f52b016f0 ] The value passed by unix_diag_get_exact to unix_lookup_by_ino has type __u32, but unix_lookup_by_ino's argument ino has type int, which is not a problem yet. However, when ino is compared with sock_i_ino return value of type unsigned long, ino is sign extended to signed long, and this results to incorrect comparison on 64-bit architectures for inode numbers greater than INT_MAX. This bug was found by strace test suite. Fixes: 5d3cae8bc39d ("unix_diag: Dumping exact socket core") Signed-off-by: Dmitry V. Levin Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/unix/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/diag.c b/net/unix/diag.c index c512f64d52876..4d9679701a6df 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -220,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static struct sock *unix_lookup_by_ino(int ino) +static struct sock *unix_lookup_by_ino(unsigned int ino) { int i; struct sock *sk; From 00731a62f0f167760ff7a536a30b9eb441794aaf Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 18 Feb 2016 16:10:57 -0500 Subject: [PATCH 1990/2091] sctp: Fix port hash table size computation [ Upstream commit d9749fb5942f51555dc9ce1ac0dbb1806960a975 ] Dmitry Vyukov noted recently that the sctp_port_hashtable had an error in its size computation, observing that the current method never guaranteed that the hashsize (measured in number of entries) would be a power of two, which the input hash function for that table requires. The root cause of the problem is that two values need to be computed (one, the allocation order of the storage requries, as passed to __get_free_pages, and two the number of entries for the hash table). Both need to be ^2, but for different reasons, and the existing code is simply computing one order value, and using it as the basis for both, which is wrong (i.e. it assumes that ((1< Reported-by: Dmitry Vyukov CC: Dmitry Vyukov CC: Vladislav Yasevich CC: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sctp/protocol.c | 47 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e13c3c3ea4ac1..9d134ab3351f5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -60,6 +60,8 @@ #include #include +#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) + /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -1332,6 +1334,8 @@ static __init int sctp_init(void) unsigned long limit; int max_share; int order; + int num_entries; + int max_entry_order; sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); @@ -1384,14 +1388,24 @@ static __init int sctp_init(void) /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. + * Though not identical. Start by getting a goal size */ if (totalram_pages >= (128 * 1024)) goal = totalram_pages >> (22 - PAGE_SHIFT); else goal = totalram_pages >> (24 - PAGE_SHIFT); - for (order = 0; (1UL << order) < goal; order++) - ; + /* Then compute the page order for said goal */ + order = get_order(goal); + + /* Now compute the required page order for the maximum sized table we + * want to create + */ + max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES * + sizeof(struct sctp_bind_hashbucket)); + + /* Limit the page order by that maximum hash table size */ + order = min(order, max_entry_order); do { sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / @@ -1425,27 +1439,42 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); } - /* Allocate and initialize the SCTP port hash table. */ + /* Allocate and initialize the SCTP port hash table. + * Note that order is initalized to start at the max sized + * table we want to support. If we can't get that many pages + * reduce the order and try again + */ do { - sctp_port_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_bind_hashbucket); - if ((sctp_port_hashsize > (64 * 1024)) && order > 0) - continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); + if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } + + /* Now compute the number of entries that will fit in the + * port hash space we allocated + */ + num_entries = (1UL << order) * PAGE_SIZE / + sizeof(struct sctp_bind_hashbucket); + + /* And finish by rounding it down to the nearest power of two + * this wastes some memory of course, but its needed because + * the hash function operates based on the assumption that + * that the number of entries is a power of two + */ + sctp_port_hashsize = rounddown_pow_of_two(num_entries); + for (i = 0; i < sctp_port_hashsize; i++) { spin_lock_init(&sctp_port_hashtable[i].lock); INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - pr_info("Hash tables configured (established %d bind %d)\n", - sctp_assoc_hashsize, sctp_port_hashsize); + pr_info("Hash tables configured (established %d bind %d/%d)\n", + sctp_assoc_hashsize, sctp_port_hashsize, num_entries); sctp_sysctl_register(); From b9a9cfdbf7254f4a231cc8ddf685cc29d3a9c6e5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 4 Mar 2016 16:55:54 -0500 Subject: [PATCH 1991/2091] Linux 4.1.19 Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 001375cfd815e..06107f683bbe8 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 18 +SUBLEVEL = 19 EXTRAVERSION = NAME = Series 4800 From 4b4bc57ac1c2fd740f39e5849d45b9bcf6fbaf8b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:22 +0000 Subject: [PATCH 1992/2091] iscsi-target: Fix rx_login_comp hang after login failure [ Upstream commit ca82c2bded29b38d36140bfa1e76a7bbfcade390 ] This patch addresses a case where iscsi_target_do_tx_login_io() fails sending the last login response PDU, after the RX/TX threads have already been started. The case centers around iscsi_target_rx_thread() not invoking allow_signal(SIGINT) before the send_sig(SIGINT, ...) occurs from the failure path, resulting in RX thread hanging indefinately on iscsi_conn->rx_login_comp. Note this bug is a regression introduced by: commit e54198657b65625085834847ab6271087323ffea Author: Nicholas Bellinger Date: Wed Jul 22 23:14:19 2015 -0700 iscsi-target: Fix iscsit_start_kthreads failure OOPs To address this bug, complete ->rx_login_complete for good measure in the failure path, and immediately return from RX thread context if connection state did not actually reach full feature phase (TARG_CONN_STATE_LOGGED_IN). Cc: Sagi Grimberg Cc: # v3.10+ Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target.c | 13 ++++++++++++- drivers/target/iscsi/iscsi_target_nego.c | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 2e58279fab609..6f50e9d958de7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4095,6 +4095,17 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } +static bool iscsi_target_check_conn_state(struct iscsi_conn *conn) +{ + bool ret; + + spin_lock_bh(&conn->state_lock); + ret = (conn->conn_state != TARG_CONN_STATE_LOGGED_IN); + spin_unlock_bh(&conn->state_lock); + + return ret; +} + int iscsi_target_rx_thread(void *arg) { int ret, rc; @@ -4112,7 +4123,7 @@ int iscsi_target_rx_thread(void *arg) * incoming iscsi/tcp socket I/O, and/or failing the connection. */ rc = wait_for_completion_interruptible(&conn->rx_login_comp); - if (rc < 0) + if (rc < 0 || iscsi_target_check_conn_state(conn)) return 0; if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index f9cde91418367..9a96f1712b7a3 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -393,6 +393,7 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log if (login->login_complete) { if (conn->rx_thread && conn->rx_thread_active) { send_sig(SIGINT, conn->rx_thread, 1); + complete(&conn->rx_login_comp); kthread_stop(conn->rx_thread); } if (conn->tx_thread && conn->tx_thread_active) { From 5dd73bb38031a5d8f81bf7d9b4f10135ab950f64 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:23 +0000 Subject: [PATCH 1993/2091] target: Fix race for SCF_COMPARE_AND_WRITE_POST checking [ Upstream commit 057085e522f8bf94c2e691a5b76880f68060f8ba ] This patch addresses a race + use after free where the first stage of COMPARE_AND_WRITE in compare_and_write_callback() is rescheduled after the backend sends the secondary WRITE, resulting in second stage compare_and_write_post() callback completing in target_complete_ok_work() before the first can return. Because current code depends on checking se_cmd->se_cmd_flags after return from se_cmd->transport_complete_callback(), this results in first stage having SCF_COMPARE_AND_WRITE_POST set, which incorrectly falls through into second stage CAW processing code, eventually triggering a NULL pointer dereference due to use after free. To address this bug, pass in a new *post_ret parameter into se_cmd->transport_complete_callback(), and depend upon this value instead of ->se_cmd_flags to determine when to return or fall through into ->queue_status() code for CAW. Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_sbc.c | 13 +++++++++---- drivers/target/target_core_transport.c | 14 ++++++++------ include/target/target_core_base.h | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 733824e3825f4..a3bfafca5f418 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -321,7 +321,8 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o return 0; } -static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success) +static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success, + int *post_ret) { unsigned char *buf, *addr; struct scatterlist *sg; @@ -385,7 +386,8 @@ sbc_execute_rw(struct se_cmd *cmd) cmd->data_direction); } -static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success) +static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success, + int *post_ret) { struct se_device *dev = cmd->se_dev; @@ -395,8 +397,10 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success) * sent to the backend driver. */ spin_lock_irq(&cmd->t_state_lock); - if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) + if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) { cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + *post_ret = 1; + } spin_unlock_irq(&cmd->t_state_lock); /* @@ -408,7 +412,8 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success) return TCM_NO_SENSE; } -static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success) +static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success, + int *post_ret) { struct se_device *dev = cmd->se_dev; struct scatterlist *write_sg = NULL, *sg; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 3881504b40d83..316de3f98237b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1638,7 +1638,7 @@ bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags) void transport_generic_request_failure(struct se_cmd *cmd, sense_reason_t sense_reason) { - int ret = 0; + int ret = 0, post_ret = 0; pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08x" " CDB: 0x%02x\n", cmd, cmd->se_tfo->get_task_tag(cmd), @@ -1661,7 +1661,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, */ if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd, false); + cmd->transport_complete_callback(cmd, false, &post_ret); switch (sense_reason) { case TCM_NON_EXISTENT_LUN: @@ -2056,11 +2056,13 @@ static void target_complete_ok_work(struct work_struct *work) */ if (cmd->transport_complete_callback) { sense_reason_t rc; + bool caw = (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE); + bool zero_dl = !(cmd->data_length); + int post_ret = 0; - rc = cmd->transport_complete_callback(cmd, true); - if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) { - if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && - !cmd->data_length) + rc = cmd->transport_complete_callback(cmd, true, &post_ret); + if (!rc && !post_ret) { + if (caw && zero_dl) goto queue_rsp; return; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 480e9f82dfea8..b7a0594efc7e6 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -522,7 +522,7 @@ struct se_cmd { sense_reason_t (*execute_cmd)(struct se_cmd *); sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, u32, enum dma_data_direction); - sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); + sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *); unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; From 7a2903243093459623817fa1b25a63c5d4c60f85 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sun, 6 Mar 2016 01:24:24 +0000 Subject: [PATCH 1994/2091] target: fix COMPARE_AND_WRITE non zero SGL offset data corruption [ Upstream commit d94e5a61357a04938ce14d6033b4d33a3c5fd780 ] target_core_sbc's compare_and_write functionality suffers from taking data at the wrong memory location when writing a CAW request to disk when a SGL offset is non-zero. This can happen with loopback and vhost-scsi fabric drivers when SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC is used to map existing user-space SGL memory into COMPARE_AND_WRITE READ/WRITE payload buffers. Given the following sample LIO subtopology, % targetcli ls /loopback/ o- loopback ................................. [1 Target] o- naa.6001405ebb8df14a ....... [naa.60014059143ed2b3] o- luns ................................... [2 LUNs] o- lun0 ................ [iblock/ram0 (/dev/ram0)] o- lun1 ................ [iblock/ram1 (/dev/ram1)] % lsscsi -g [3:0:1:0] disk LIO-ORG IBLOCK 4.0 /dev/sdc /dev/sg3 [3:0:1:1] disk LIO-ORG IBLOCK 4.0 /dev/sdd /dev/sg4 the following bug can be observed in Linux 4.3 and 4.4~rc1: % perl -e 'print chr$_ for 0..255,reverse 0..255' >rand % perl -e 'print "\0" x 512' >zero % cat rand >/dev/sdd % sg_compare_and_write -i rand -D zero --lba 0 /dev/sdd % sg_compare_and_write -i zero -D rand --lba 0 /dev/sdd Miscompare reported % hexdump -Cn 512 /dev/sdd 00000000 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00000200 Rather than writing all-zeroes as instructed with the -D file, it corrupts the data in the sector by splicing some of the original bytes in. The page of the first entry of cmd->t_data_sg includes the CDB, and sg->offset is set to a position past the CDB. I presume that sg->offset is also the right choice to use for subsequent sglist members. Signed-off-by: Jan Engelhardt Tested-by: Douglas Gilbert Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_sbc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index a3bfafca5f418..46b966d09af2f 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -509,11 +509,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes if (block_size < PAGE_SIZE) { sg_set_page(&write_sg[i], m.page, block_size, - block_size); + m.piter.sg->offset + block_size); } else { sg_miter_next(&m); sg_set_page(&write_sg[i], m.page, block_size, - 0); + m.piter.sg->offset); } len -= block_size; i++; From 3286a2fea80a6ee53eadb95fdd900e3e81b0fbce Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:25 +0000 Subject: [PATCH 1995/2091] target: Fix LUN_RESET active I/O handling for ACK_KREF commit febe562c20dfa8f33bee7d419c6b517986a5aa33 upstream. This patch fixes a NULL pointer se_cmd->cmd_kref < 0 refcount bug during TMR LUN_RESET with active se_cmd I/O, that can be triggered during se_cmd descriptor shutdown + release via core_tmr_drain_state_list() code. To address this bug, add common __target_check_io_state() helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE checking, and set CMD_T_ABORTED + obtain ->cmd_kref for both cases ahead of last target_put_sess_cmd() after TFO->aborted_task() -> transport_cmd_finish_abort() callback has completed. It also introduces SCF_ACK_KREF to determine when transport_cmd_finish_abort() needs to drop the second extra reference, ahead of calling target_put_sess_cmd() for the final kref_put(&se_cmd->cmd_kref). It also updates transport_cmd_check_stop() to avoid holding se_cmd->t_state_lock while dropping se_cmd device state via target_remove_from_state_list(), now that core_tmr_drain_state_list() is holding the se_device lock while checking se_cmd state from within TMR logic. Finally, move transport_put_cmd() release of SGL + TMR + extended CDB memory into target_free_cmd_mem() in order to avoid potential resource leaks in TMR ABORT_TASK + LUN_RESET code-paths. Also update target_release_cmd_kref() accordingly. Reviewed-by: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_tmr.c | 64 +++++++++++++++++------- drivers/target/target_core_transport.c | 67 +++++++++++--------------- include/target/target_core_base.h | 1 + 3 files changed, 77 insertions(+), 55 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index adb8016955c49..103c0298b5269 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -110,6 +110,34 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } +static bool __target_check_io_state(struct se_cmd *se_cmd) +{ + struct se_session *sess = se_cmd->se_sess; + + assert_spin_locked(&sess->sess_cmd_lock); + WARN_ON_ONCE(!irqs_disabled()); + /* + * If command already reached CMD_T_COMPLETE state within + * target_complete_cmd(), this se_cmd has been passed to + * fabric driver and will not be aborted. + * + * Otherwise, obtain a local se_cmd->cmd_kref now for TMR + * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as + * long as se_cmd->cmd_kref is still active unless zero. + */ + spin_lock(&se_cmd->t_state_lock); + if (se_cmd->transport_state & CMD_T_COMPLETE) { + pr_debug("Attempted to abort io tag: %u already complete," + " skipping\n", se_cmd->se_tfo->get_task_tag(se_cmd)); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + se_cmd->transport_state |= CMD_T_ABORTED; + spin_unlock(&se_cmd->t_state_lock); + + return kref_get_unless_zero(&se_cmd->cmd_kref); +} + void core_tmr_abort_task( struct se_device *dev, struct se_tmr_req *tmr, @@ -136,25 +164,20 @@ void core_tmr_abort_task( printk("ABORT_TASK: Found referenced %s task_tag: %u\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - printk("ABORT_TASK: ref_tag: %u already complete, skipping\n", ref_tag); - spin_unlock(&se_cmd->t_state_lock); + if (!__target_check_io_state(se_cmd)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_put_sess_cmd(se_cmd); goto out; } - se_cmd->transport_state |= CMD_T_ABORTED; - spin_unlock(&se_cmd->t_state_lock); list_del_init(&se_cmd->se_cmd_list); - kref_get(&se_cmd->cmd_kref); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_cmd); transport_cmd_finish_abort(se_cmd, true); + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %d\n", ref_tag); @@ -259,8 +282,10 @@ static void core_tmr_drain_state_list( struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_task_list); + struct se_session *sess; struct se_cmd *cmd, *next; unsigned long flags; + int rc; /* * Complete outstanding commands with TASK_ABORTED SAM status. @@ -299,6 +324,16 @@ static void core_tmr_drain_state_list( if (prout_cmd == cmd) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); + rc = __target_check_io_state(cmd); + spin_unlock(&sess->sess_cmd_lock); + if (!rc) + continue; + list_move_tail(&cmd->state_list, &drain_task_list); cmd->state_active = false; } @@ -306,7 +341,7 @@ static void core_tmr_drain_state_list( while (!list_empty(&drain_task_list)) { cmd = list_entry(drain_task_list.next, struct se_cmd, state_list); - list_del(&cmd->state_list); + list_del_init(&cmd->state_list); pr_debug("LUN_RESET: %s cmd: %p" " ITT/CmdSN: 0x%08x/0x%08x, i_state: %d, t_state: %d" @@ -330,16 +365,11 @@ static void core_tmr_drain_state_list( * loop above, but we do it down here given that * cancel_work_sync may block. */ - if (cmd->t_state == TRANSPORT_COMPLETE) - cancel_work_sync(&cmd->work); - - spin_lock_irqsave(&cmd->t_state_lock, flags); - target_stop_cmd(cmd, &flags); - - cmd->transport_state |= CMD_T_ABORTED; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 316de3f98237b..458ecbdac2d28 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -555,9 +555,6 @@ void transport_deregister_session(struct se_session *se_sess) } EXPORT_SYMBOL(transport_deregister_session); -/* - * Called with cmd->t_state_lock held. - */ static void target_remove_from_state_list(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; @@ -582,10 +579,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, { unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (write_pending) - cmd->t_state = TRANSPORT_WRITE_PENDING; - if (remove_from_lists) { target_remove_from_state_list(cmd); @@ -595,6 +588,10 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, cmd->se_lun = NULL; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (write_pending) + cmd->t_state = TRANSPORT_WRITE_PENDING; + /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. @@ -649,6 +646,8 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { + bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); /* @@ -660,7 +659,7 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) if (transport_cmd_check_stop_to_fabric(cmd)) return; - if (remove) + if (remove && ack_kref) transport_put_cmd(cmd); } @@ -728,7 +727,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. */ - if (cmd->transport_state & CMD_T_ABORTED && + if (cmd->transport_state & CMD_T_ABORTED || cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); complete_all(&cmd->t_transport_stop_comp); @@ -2211,20 +2210,14 @@ static inline void transport_free_pages(struct se_cmd *cmd) } /** - * transport_release_cmd - free a command - * @cmd: command to free + * transport_put_cmd - release a reference to a command + * @cmd: command to release * - * This routine unconditionally frees a command, and reference counting - * or list removal must be done in the caller. + * This routine releases our reference to the command and frees it if possible. */ -static int transport_release_cmd(struct se_cmd *cmd) +static int transport_put_cmd(struct se_cmd *cmd) { BUG_ON(!cmd->se_tfo); - - if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) - core_tmr_release_req(cmd->se_tmr_req); - if (cmd->t_task_cdb != cmd->__t_task_cdb) - kfree(cmd->t_task_cdb); /* * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. @@ -2232,18 +2225,6 @@ static int transport_release_cmd(struct se_cmd *cmd) return target_put_sess_cmd(cmd); } -/** - * transport_put_cmd - release a reference to a command - * @cmd: command to release - * - * This routine releases our reference to the command and frees it if possible. - */ -static int transport_put_cmd(struct se_cmd *cmd) -{ - transport_free_pages(cmd); - return transport_release_cmd(cmd); -} - void *transport_kmap_data_sg(struct se_cmd *cmd) { struct scatterlist *sg = cmd->t_data_sg; @@ -2441,14 +2422,13 @@ static void transport_write_pending_qf(struct se_cmd *cmd) int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { - unsigned long flags; int ret = 0; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + transport_wait_for_tasks(cmd); - ret = transport_release_cmd(cmd); + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) transport_wait_for_tasks(cmd); @@ -2457,11 +2437,8 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) * has already added se_cmd to state_list, but fabric has * failed command before I/O submission. */ - if (cmd->state_active) { - spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->state_active) target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - } if (cmd->se_lun) transport_lun_remove_cmd(cmd); @@ -2506,6 +2483,16 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) } EXPORT_SYMBOL(target_get_sess_cmd); +static void target_free_cmd_mem(struct se_cmd *cmd) +{ + transport_free_pages(cmd); + + if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) + core_tmr_release_req(cmd->se_tmr_req); + if (cmd->t_task_cdb != cmd->__t_task_cdb) + kfree(cmd->t_task_cdb); +} + static void target_release_cmd_kref(struct kref *kref) __releases(&se_cmd->se_sess->sess_cmd_lock) { @@ -2514,17 +2501,20 @@ static void target_release_cmd_kref(struct kref *kref) if (list_empty(&se_cmd->se_cmd_list)) { spin_unlock(&se_sess->sess_cmd_lock); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return; } if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { spin_unlock(&se_sess->sess_cmd_lock); + target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } list_del(&se_cmd->se_cmd_list); spin_unlock(&se_sess->sess_cmd_lock); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); } @@ -2536,6 +2526,7 @@ int target_put_sess_cmd(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; if (!se_sess) { + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return 1; } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index b7a0594efc7e6..f8a1b85fa5197 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -167,6 +167,7 @@ enum se_cmd_flags_table { SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_COMPARE_AND_WRITE = 0x00080000, SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_ACK_KREF = 0x00400000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ From 29190c778c5d98b03a9bedf03b5d6771aa140e0c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:26 +0000 Subject: [PATCH 1996/2091] target: Fix TAS handling for multi-session se_node_acls commit ebde1ca5a908b10312db4ecd7553e3ba039319ab upstream. This patch fixes a bug in TMR task aborted status (TAS) handling when multiple sessions are connected to the same target WWPN endpoint and se_node_acl descriptor, resulting in TASK_ABORTED status to not be generated for aborted se_cmds on the remote port. This is due to core_tmr_handle_tas_abort() incorrectly comparing se_node_acl instead of se_session, for which the multi-session case is expected to be sharing the same se_node_acl. Instead, go ahead and update core_tmr_handle_tas_abort() to compare tmr_sess + cmd->se_sess in order to determine if the LUN_RESET was received on a different I_T nexus, and TASK_ABORTED status response needs to be generated. Reviewed-by: Christoph Hellwig Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_tmr.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 103c0298b5269..a7e15b3a74045 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -79,7 +79,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) } static void core_tmr_handle_tas_abort( - struct se_node_acl *tmr_nacl, + struct se_session *tmr_sess, struct se_cmd *cmd, int tas) { @@ -87,7 +87,7 @@ static void core_tmr_handle_tas_abort( /* * TASK ABORTED status (TAS) bit support */ - if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) { + if (tmr_sess && tmr_sess != cmd->se_sess && tas) { remove = false; transport_send_task_abort(cmd); } @@ -277,7 +277,7 @@ static void core_tmr_drain_tmr_list( static void core_tmr_drain_state_list( struct se_device *dev, struct se_cmd *prout_cmd, - struct se_node_acl *tmr_nacl, + struct se_session *tmr_sess, int tas, struct list_head *preempt_and_abort_list) { @@ -368,7 +368,7 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); + core_tmr_handle_tas_abort(tmr_sess, cmd, tas); target_put_sess_cmd(cmd); } } @@ -381,6 +381,7 @@ int core_tmr_lun_reset( { struct se_node_acl *tmr_nacl = NULL; struct se_portal_group *tmr_tpg = NULL; + struct se_session *tmr_sess = NULL; int tas; /* * TASK_ABORTED status bit, this is configurable via ConfigFS @@ -399,8 +400,9 @@ int core_tmr_lun_reset( * or struct se_device passthrough.. */ if (tmr && tmr->task_cmd && tmr->task_cmd->se_sess) { - tmr_nacl = tmr->task_cmd->se_sess->se_node_acl; - tmr_tpg = tmr->task_cmd->se_sess->se_tpg; + tmr_sess = tmr->task_cmd->se_sess; + tmr_nacl = tmr_sess->se_node_acl; + tmr_tpg = tmr_sess->se_tpg; if (tmr_nacl && tmr_tpg) { pr_debug("LUN_RESET: TMR caller fabric: %s" " initiator port %s\n", @@ -413,7 +415,7 @@ int core_tmr_lun_reset( dev->transport->name, tas); core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list); - core_tmr_drain_state_list(dev, prout_cmd, tmr_nacl, tas, + core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas, preempt_and_abort_list); /* From 08367c9e8f0337b0fea23acb459fb4bc40cb7be7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:27 +0000 Subject: [PATCH 1997/2091] target: Fix remote-port TMR ABORT + se_cmd fabric stop commit 0f4a943168f31d29a1701908931acaba518b131a upstream. To address the bug where fabric driver level shutdown of se_cmd occurs at the same time when TMR CMD_T_ABORTED is happening resulting in a -1 ->cmd_kref, this patch adds a CMD_T_FABRIC_STOP bit that is used to determine when TMR + driver I_T nexus shutdown is happening concurrently. It changes target_sess_cmd_list_set_waiting() to obtain se_cmd->cmd_kref + set CMD_T_FABRIC_STOP, and drop local reference in target_wait_for_sess_cmds() and invoke extra target_put_sess_cmd() during Task Aborted Status (TAS) when necessary. Also, it adds a new target_wait_free_cmd() wrapper around transport_wait_for_tasks() for the special case within transport_generic_free_cmd() to set CMD_T_FABRIC_STOP, and is now aware of CMD_T_ABORTED + CMD_T_TAS status bits to know when an extra transport_put_cmd() during TAS is required. Note transport_generic_free_cmd() is expected to block on cmd->cmd_wait_comp in order to follow what iscsi-target expects during iscsi_conn context se_cmd shutdown. Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_tmr.c | 55 +++++++--- drivers/target/target_core_transport.c | 137 +++++++++++++++++++------ include/target/target_core_base.h | 2 + 3 files changed, 147 insertions(+), 47 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index a7e15b3a74045..ad48837ead424 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -78,16 +78,18 @@ void core_tmr_release_req(struct se_tmr_req *tmr) kfree(tmr); } -static void core_tmr_handle_tas_abort( - struct se_session *tmr_sess, - struct se_cmd *cmd, - int tas) +static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { - bool remove = true; + unsigned long flags; + bool remove = true, send_tas; /* * TASK ABORTED status (TAS) bit support */ - if (tmr_sess && tmr_sess != cmd->se_sess && tas) { + spin_lock_irqsave(&cmd->t_state_lock, flags); + send_tas = (cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + + if (send_tas) { remove = false; transport_send_task_abort(cmd); } @@ -110,7 +112,8 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } -static bool __target_check_io_state(struct se_cmd *se_cmd) +static bool __target_check_io_state(struct se_cmd *se_cmd, + struct se_session *tmr_sess, int tas) { struct se_session *sess = se_cmd->se_sess; @@ -118,21 +121,33 @@ static bool __target_check_io_state(struct se_cmd *se_cmd) WARN_ON_ONCE(!irqs_disabled()); /* * If command already reached CMD_T_COMPLETE state within - * target_complete_cmd(), this se_cmd has been passed to - * fabric driver and will not be aborted. + * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, + * this se_cmd has been passed to fabric driver and will + * not be aborted. * * Otherwise, obtain a local se_cmd->cmd_kref now for TMR * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as * long as se_cmd->cmd_kref is still active unless zero. */ spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - pr_debug("Attempted to abort io tag: %u already complete," + if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) { + pr_debug("Attempted to abort io tag: %u already complete or" + " fabric stop, skipping\n", + se_cmd->se_tfo->get_task_tag(se_cmd)); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { + pr_debug("Attempted to abort io tag: %u already shutdown," " skipping\n", se_cmd->se_tfo->get_task_tag(se_cmd)); spin_unlock(&se_cmd->t_state_lock); return false; } se_cmd->transport_state |= CMD_T_ABORTED; + + if ((tmr_sess != se_cmd->se_sess) && tas) + se_cmd->transport_state |= CMD_T_TAS; + spin_unlock(&se_cmd->t_state_lock); return kref_get_unless_zero(&se_cmd->cmd_kref); @@ -164,7 +179,7 @@ void core_tmr_abort_task( printk("ABORT_TASK: Found referenced %s task_tag: %u\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - if (!__target_check_io_state(se_cmd)) { + if (!__target_check_io_state(se_cmd, se_sess, 0)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_put_sess_cmd(se_cmd); goto out; @@ -234,7 +249,8 @@ static void core_tmr_drain_tmr_list( spin_lock(&sess->sess_cmd_lock); spin_lock(&cmd->t_state_lock); - if (!(cmd->transport_state & CMD_T_ACTIVE)) { + if (!(cmd->transport_state & CMD_T_ACTIVE) || + (cmd->transport_state & CMD_T_FABRIC_STOP)) { spin_unlock(&cmd->t_state_lock); spin_unlock(&sess->sess_cmd_lock); continue; @@ -244,15 +260,22 @@ static void core_tmr_drain_tmr_list( spin_unlock(&sess->sess_cmd_lock); continue; } + if (sess->sess_tearing_down || cmd->cmd_wait_set) { + spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); + continue; + } cmd->transport_state |= CMD_T_ABORTED; spin_unlock(&cmd->t_state_lock); rc = kref_get_unless_zero(&cmd->cmd_kref); - spin_unlock(&sess->sess_cmd_lock); if (!rc) { printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n"); + spin_unlock(&sess->sess_cmd_lock); continue; } + spin_unlock(&sess->sess_cmd_lock); + list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); } spin_unlock_irqrestore(&dev->se_tmr_lock, flags); @@ -329,7 +352,7 @@ static void core_tmr_drain_state_list( continue; spin_lock(&sess->sess_cmd_lock); - rc = __target_check_io_state(cmd); + rc = __target_check_io_state(cmd, tmr_sess, tas); spin_unlock(&sess->sess_cmd_lock); if (!rc) continue; @@ -368,7 +391,7 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(tmr_sess, cmd, tas); + core_tmr_handle_tas_abort(cmd, tas); target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 458ecbdac2d28..43b6ee9cac497 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2420,18 +2420,33 @@ static void transport_write_pending_qf(struct se_cmd *cmd) } } +static bool +__transport_wait_for_tasks(struct se_cmd *, bool, bool *, bool *, + unsigned long *flags); + +static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas) +{ + unsigned long flags; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + __transport_wait_for_tasks(cmd, true, aborted, tas, &flags); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); +} + int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { int ret = 0; + bool aborted = false, tas = false; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); /* * Handle WRITE failure case where transport_generic_new_cmd() * has already added se_cmd to state_list, but fabric has @@ -2443,7 +2458,21 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) if (cmd->se_lun) transport_lun_remove_cmd(cmd); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); + } + /* + * If the task has been internally aborted due to TMR ABORT_TASK + * or LUN_RESET, target_core_tmr.c is responsible for performing + * the remaining calls to target_put_sess_cmd(), and not the + * callers of this function. + */ + if (aborted) { + pr_debug("Detected CMD_T_ABORTED for ITT: %u\n", + cmd->se_tfo->get_task_tag(cmd)); + wait_for_completion(&cmd->cmd_wait_comp); + cmd->se_tfo->release_cmd(cmd); + ret = 1; } return ret; } @@ -2498,6 +2527,7 @@ static void target_release_cmd_kref(struct kref *kref) { struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); struct se_session *se_sess = se_cmd->se_sess; + bool fabric_stop; if (list_empty(&se_cmd->se_cmd_list)) { spin_unlock(&se_sess->sess_cmd_lock); @@ -2505,13 +2535,19 @@ static void target_release_cmd_kref(struct kref *kref) se_cmd->se_tfo->release_cmd(se_cmd); return; } - if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { + + spin_lock(&se_cmd->t_state_lock); + fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP); + spin_unlock(&se_cmd->t_state_lock); + + if (se_cmd->cmd_wait_set || fabric_stop) { + list_del_init(&se_cmd->se_cmd_list); spin_unlock(&se_sess->sess_cmd_lock); target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); spin_unlock(&se_sess->sess_cmd_lock); target_free_cmd_mem(se_cmd); @@ -2544,6 +2580,7 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) { struct se_cmd *se_cmd; unsigned long flags; + int rc; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (se_sess->sess_tearing_down) { @@ -2553,8 +2590,15 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) se_sess->sess_tearing_down = 1; list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) - se_cmd->cmd_wait_set = 1; + list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) { + rc = kref_get_unless_zero(&se_cmd->cmd_kref); + if (rc) { + se_cmd->cmd_wait_set = 1; + spin_lock(&se_cmd->t_state_lock); + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + spin_unlock(&se_cmd->t_state_lock); + } + } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); } @@ -2567,15 +2611,25 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) { struct se_cmd *se_cmd, *tmp_cmd; unsigned long flags; + bool tas; list_for_each_entry_safe(se_cmd, tmp_cmd, &se_sess->sess_wait_list, se_cmd_list) { - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" " %d\n", se_cmd, se_cmd->t_state, se_cmd->se_tfo->get_cmd_state(se_cmd)); + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + tas = (se_cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + if (!target_put_sess_cmd(se_cmd)) { + if (tas) + target_put_sess_cmd(se_cmd); + } + wait_for_completion(&se_cmd->cmd_wait_comp); pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d" " fabric state: %d\n", se_cmd, se_cmd->t_state, @@ -2618,34 +2672,38 @@ int transport_clear_lun_ref(struct se_lun *lun) return 0; } -/** - * transport_wait_for_tasks - wait for completion to occur - * @cmd: command to wait - * - * Called from frontend fabric context to wait for storage engine - * to pause and/or release frontend generated struct se_cmd. - */ -bool transport_wait_for_tasks(struct se_cmd *cmd) +static bool +__transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop, + bool *aborted, bool *tas, unsigned long *flags) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) { - unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + + if (fabric_stop) + cmd->transport_state |= CMD_T_FABRIC_STOP; + + if (cmd->transport_state & CMD_T_ABORTED) + *aborted = true; + + if (cmd->transport_state & CMD_T_TAS) + *tas = true; + if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) return false; - } if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) return false; - } - if (!(cmd->transport_state & CMD_T_ACTIVE)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + if (!(cmd->transport_state & CMD_T_ACTIVE)) + return false; + + if (fabric_stop && *aborted) return false; - } cmd->transport_state |= CMD_T_STOP; @@ -2654,20 +2712,37 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) cmd, cmd->se_tfo->get_task_tag(cmd), cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + spin_unlock_irqrestore(&cmd->t_state_lock, *flags); wait_for_completion(&cmd->t_transport_stop_comp); - spin_lock_irqsave(&cmd->t_state_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, *flags); cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); pr_debug("wait_for_tasks: Stopped wait_for_completion(" "&cmd->t_transport_stop_comp) for ITT: 0x%08x\n", cmd->se_tfo->get_task_tag(cmd)); + return true; +} + +/** + * transport_wait_for_tasks - wait for completion to occur + * @cmd: command to wait + * + * Called from frontend fabric context to wait for storage engine + * to pause and/or release frontend generated struct se_cmd. + */ +bool transport_wait_for_tasks(struct se_cmd *cmd) +{ + unsigned long flags; + bool ret, aborted = false, tas = false; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + ret = __transport_wait_for_tasks(cmd, false, &aborted, &tas, &flags); spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return true; + return ret; } EXPORT_SYMBOL(transport_wait_for_tasks); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index f8a1b85fa5197..2b40a1fab2935 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -538,6 +538,8 @@ struct se_cmd { #define CMD_T_DEV_ACTIVE (1 << 7) #define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) +#define CMD_T_TAS (1 << 10) +#define CMD_T_FABRIC_STOP (1 << 11) spinlock_t t_state_lock; struct completion t_transport_stop_comp; From 4aa04a257993e90d573a5dbfa4d3f3259e3f8ba1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:28 +0000 Subject: [PATCH 1998/2091] target: Fix race with SCF_SEND_DELAYED_TAS handling commit 310d3d314be7f0a84011ebdc4bdccbcae9755a87 upstream. This patch fixes a race between setting of SCF_SEND_DELAYED_TAS in transport_send_task_abort(), and check of the same bit in transport_check_aborted_status(). It adds a __transport_check_aborted_status() version that is used by target_execute_cmd() when se_cmd->t_state_lock is held, and a transport_check_aborted_status() wrapper for all other existing callers. Also, it handles the case where the check happens before transport_send_task_abort() gets called. For this, go ahead and set SCF_SEND_DELAYED_TAS early when necessary, and have transport_send_task_abort() send the abort. Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_transport.c | 53 ++++++++++++++++++++------ 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 43b6ee9cac497..be12b9d840522 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1835,19 +1835,21 @@ static bool target_handle_task_attr(struct se_cmd *cmd) return true; } +static int __transport_check_aborted_status(struct se_cmd *, int); + void target_execute_cmd(struct se_cmd *cmd) { - /* - * If the received CDB has aleady been aborted stop processing it here. - */ - if (transport_check_aborted_status(cmd, 1)) - return; - /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. + * + * If the received CDB has aleady been aborted stop processing it here. */ spin_lock_irq(&cmd->t_state_lock); + if (__transport_check_aborted_status(cmd, 1)) { + spin_unlock_irq(&cmd->t_state_lock); + return; + } if (cmd->transport_state & CMD_T_STOP) { pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", __func__, __LINE__, @@ -3028,8 +3030,13 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, } EXPORT_SYMBOL(transport_send_check_condition_and_sense); -int transport_check_aborted_status(struct se_cmd *cmd, int send_status) +static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) { + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + if (!(cmd->transport_state & CMD_T_ABORTED)) return 0; @@ -3037,19 +3044,37 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) * If cmd has been aborted but either no status is to be sent or it has * already been sent, just return */ - if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) + if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) { + if (send_status) + cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; return 1; + } - pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08x\n", - cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); + pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:" + " 0x%02x ITT: 0x%08x\n", cmd->t_task_cdb[0], + cmd->se_tfo->get_task_tag(cmd)); cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS; cmd->scsi_status = SAM_STAT_TASK_ABORTED; trace_target_cmd_complete(cmd); + + spin_unlock_irq(&cmd->t_state_lock); cmd->se_tfo->queue_status(cmd); + spin_lock_irq(&cmd->t_state_lock); return 1; } + +int transport_check_aborted_status(struct se_cmd *cmd, int send_status) +{ + int ret; + + spin_lock_irq(&cmd->t_state_lock); + ret = __transport_check_aborted_status(cmd, send_status); + spin_unlock_irq(&cmd->t_state_lock); + + return ret; +} EXPORT_SYMBOL(transport_check_aborted_status); void transport_send_task_abort(struct se_cmd *cmd) @@ -3071,11 +3096,17 @@ void transport_send_task_abort(struct se_cmd *cmd) */ if (cmd->data_direction == DMA_TO_DEVICE) { if (cmd->se_tfo->write_pending_status(cmd) != 0) { - cmd->transport_state |= CMD_T_ABORTED; + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) { + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto send_abort; + } cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); return; } } +send_abort: cmd->scsi_status = SAM_STAT_TASK_ABORTED; transport_lun_remove_cmd(cmd); From e94991ebb5b4d95735731683770cde961cdf9cb4 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Mar 2016 01:24:29 +0000 Subject: [PATCH 1999/2091] target: Fix linux-4.1.y specific compile warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linux-4.1.y specific patch to fix a previous v4.1 UNIT_ATTENTION read-copy-update conversion regression: commit 35afa65642a9a88c81913377b93a3a66220f8b9d Author: Nicholas Bellinger Date: Wed Sep 23 07:49:26 2015 +0000 target: Fix v4.1 UNIT_ATTENTION se_node_acl->device_list[] NULL pointer introduced the following compile warning: drivers/target/target_core_pr.c: In function ‘core_scsi3_pr_seq_non_holder’: drivers/target/target_core_pr.c:332:3: warning: ‘return’ with no value, in function returning non-void [-Wreturn-type] Go ahead and fix this up to always returning zero when no ACL device list exists within core_scsi3_pr_seq_non_holder(). Signed-off-by: Nicholas Bellinger Signed-off-by: Sasha Levin --- drivers/target/target_core_pr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 08aa7cc586941..57fd4e14d4eb2 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -329,7 +329,7 @@ static int core_scsi3_pr_seq_non_holder( * RESERVATION CONFLICT on some CDBs */ if (!se_sess->se_node_acl->device_list) - return; + return 0; se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; /* From eef85746c932a90df78492598c501315c7e0c5c1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 5 Jun 2015 13:42:53 -0400 Subject: [PATCH 2000/2091] perf tools: Update MANIFEST per files removed from kernel [ Upstream commit c8ad7063626406181a7ebab10cb31b4f741b13d4 ] Building perf out of kernel tree is currently broken because the MANIFEST file refers to kernel files that have been removed. With this patch make perf-targz-src-pkg succeeds as does building perf using the generated tarfile. Signed-off-by: David Ahern Link: http://lkml.kernel.org/r/1433526173-172332-1-git-send-email-david.ahern@oracle.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/MANIFEST | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 11ccbb22ea2b8..13d0458afc716 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -28,24 +28,20 @@ include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h -include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h -lib/find_next_bit.c lib/hweight.c lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h -arch/*/include/asm/perf_regs.h arch/*/include/uapi/asm/unistd*.h arch/*/include/uapi/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h -include/linux/magic.h include/linux/hw_breakpoint.h include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h From 86708275f1c1ba89c2d1ef7cd4a1c66a10b35b45 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 8 Feb 2016 15:30:18 +0100 Subject: [PATCH 2001/2091] xen/scsiback: correct frontend counting [ Upstream commit f285aa8db7cc4432c1a03f8b55ff34fe96317c11 ] When adding a new frontend to xen-scsiback don't decrement the number of active frontends in case of no error. Doing so results in a failure when trying to remove the xen-pvscsi nexus even if no domain is using it. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Cc: stable@vger.kernel.org Signed-off-by: David Vrabel Signed-off-by: Sasha Levin --- drivers/xen/xen-scsiback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index b7f51504f85ad..c561d530be2e9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -941,12 +941,12 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, spin_unlock_irqrestore(&info->v2p_lock, flags); out_free: - mutex_lock(&tpg->tv_tpg_mutex); - tpg->tv_tpg_fe_count--; - mutex_unlock(&tpg->tv_tpg_mutex); - - if (err) + if (err) { + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_fe_count--; + mutex_unlock(&tpg->tv_tpg_mutex); kfree(new); + } return err; } From 64ecdd296041c4eb9522c7cebed877c47d92a278 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Feb 2016 21:11:50 +0100 Subject: [PATCH 2002/2091] nfs: fix nfs_size_to_loff_t [ Upstream commit 50ab8ec74a153eb30db26529088bc57dd700b24c ] See http: //www.infradead.org/rpr.html X-Evolution-Source: 1451162204.2173.11@leira.trondhjem.org Content-Transfer-Encoding: 8bit Mime-Version: 1.0 We support OFFSET_MAX just fine, so don't round down below it. Also switch to using min_t to make the helper more readable. Signed-off-by: Christoph Hellwig Fixes: 433c92379d9c ("NFS: Clean up nfs_size_to_loff_t()") Cc: stable@vger.kernel.org # 2.6.23+ Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/nfs_fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce0838..150f43a9149cb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -540,9 +540,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, static inline loff_t nfs_size_to_loff_t(__u64 size) { - if (size > (__u64) OFFSET_MAX - 1) - return OFFSET_MAX - 1; - return (loff_t) size; + return min_t(u64, size, OFFSET_MAX); } static inline ino_t From 891985fa8e08d0c625ee71096fc3d10350eb051b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Feb 2016 14:16:27 +0100 Subject: [PATCH 2003/2091] libata: fix HDIO_GET_32BIT ioctl [ Upstream commit 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 ] As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not work correctly in compat mode with libata. I have investigated the issue further and found multiple problems that all appeared with the same commit that originally introduced HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy a 'long' variable containing either 0 or 1 to user space. The problems with this are: * On big-endian machines, this will always write a zero because it stores the wrong byte into user space. * In compat mode, the upper three bytes of the variable are updated by the compat_hdio_ioctl() function, but they now contain uninitialized stack data. * The hdparm tool calling this ioctl uses a 'static long' variable to store the result. This means at least the upper bytes are initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT would fill them with data that remains stale when the low byte is overwritten. Fortunately libata doesn't implement any of the affected ioctl commands, so this would only happen when we query both an IDE and an ATA device in the same command such as "hdparm -N -c /dev/hda /dev/sda" * The libata code for unknown reasons started using ATA_IOC_GET_IO32 and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, while the ioctl commands that were added later use the normal HDIO_* names. This is harmless but rather confusing. This addresses all four issues by changing the code to use put_user() on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem does, and by clarifying the names of the ioctl commands. Signed-off-by: Arnd Bergmann Reported-by: Soohoon Lee Tested-by: Soohoon Lee Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 11 +++++------ include/linux/ata.h | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0d7f0da3a2692..ae7cfcb562dca 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap) int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, int cmd, void __user *arg) { - int val = -EINVAL, rc = -EINVAL; + unsigned long val; + int rc = -EINVAL; unsigned long flags; switch (cmd) { - case ATA_IOC_GET_IO32: + case HDIO_GET_32BIT: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); - if (copy_to_user(arg, &val, 1)) - return -EFAULT; - return 0; + return put_user(val, (unsigned long __user *)arg); - case ATA_IOC_SET_IO32: + case HDIO_SET_32BIT: val = (unsigned long) arg; rc = 0; spin_lock_irqsave(ap->lock, flags); diff --git a/include/linux/ata.h b/include/linux/ata.h index 5dfbcd8887bb5..2e5fb1c312512 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -487,8 +487,8 @@ enum ata_tf_protocols { }; enum ata_ioctls { - ATA_IOC_GET_IO32 = 0x309, - ATA_IOC_SET_IO32 = 0x324, + ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ + ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ }; /* core structures */ From 18ddf12c2de57556909778af1ab31177bf53d5cb Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 7 Mar 2016 12:23:04 -0500 Subject: [PATCH 2004/2091] iwlwifi: mvm: don't allow sched scans without matches to be started [ Upstream commit 5e56276e7555b34550d51459a801ff75eca8b907 ] The firmware can perform a scheduled scan with not matchsets passed, but it can't send notification that results were found. Since the userspace then cannot know when we got new results and the firmware wouldn't trigger a wake in case we are sleeping, it's better not to allow scans without matchsets. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=110831 Cc: [3.17+] Signed-off-by: Luca Coelho Signed-off-by: Emmanuel Grumbach [SL: Backport to 4.1] Signed-off-by: Sasha Levin --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 9779c1e5688c3..90e8b662e44d3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -2797,6 +2797,10 @@ static int iwl_mvm_mac_sched_scan_start(struct ieee80211_hw *hw, struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + /* we don't support "match all" in the firmware */ + if (!req->n_match_sets) + return -EOPNOTSUPP; + if (!(mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { ret = iwl_mvm_cancel_scan_wait_notif(mvm, IWL_MVM_SCAN_OS); if (ret) From b0a4f565b1dba16cb98842d3129fa0f57445b044 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 16 Nov 2015 12:40:48 -0500 Subject: [PATCH 2005/2091] xen/pciback: Save xen_pci_op commands before processing it [ Upstream commit 8135cf8b092723dbfcc611fe6fdcb3a36c9951c5 ] Double fetch vulnerabilities that happen when a variable is fetched twice from shared memory but a security check is only performed the first time. The xen_pcibk_do_op function performs a switch statements on the op->cmd value which is stored in shared memory. Interestingly this can result in a double fetch vulnerability depending on the performed compiler optimization. This patch fixes it by saving the xen_pci_op command before processing it. We also use 'barrier' to make sure that the compiler does not perform any optimization. This is part of XSA155. CC: stable@vger.kernel.org Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/pciback.h | 1 + drivers/xen/xen-pciback/pciback_ops.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 58e38d586f524..4d529f3e40df9 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -37,6 +37,7 @@ struct xen_pcibk_device { struct xen_pci_sharedinfo *sh_info; unsigned long flags; struct work_struct op_work; + struct xen_pci_op op; }; struct xen_pcibk_dev_data { diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index c4a0666de6f5e..a0e0e3ed4905c 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -298,9 +298,11 @@ void xen_pcibk_do_op(struct work_struct *data) container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; - struct xen_pci_op *op = &pdev->sh_info->op; + struct xen_pci_op *op = &pdev->op; int test_intx = 0; + *op = pdev->sh_info->op; + barrier(); dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) @@ -342,6 +344,17 @@ void xen_pcibk_do_op(struct work_struct *data) if ((dev_data->enable_intx != test_intx)) xen_pcibk_control_isr(dev, 0 /* no reset */); } + pdev->sh_info->op.err = op->err; + pdev->sh_info->op.value = op->value; +#ifdef CONFIG_PCI_MSI + if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { + unsigned int i; + + for (i = 0; i < op->value; i++) + pdev->sh_info->op.msix_entries[i].vector = + op->msix_entries[i].vector; + } +#endif /* Tell the driver domain that we're done. */ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); From 05584b5815ecb05a6b8f394eef566781eb66b821 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Feb 2016 16:10:24 -0500 Subject: [PATCH 2006/2091] xen/pciback: Save the number of MSI-X entries to be copied later. [ Upstream commit 4cf5aa2ffe17403385d75a5b1d9d97071500ea18 ] commit d159457b84395927b5a52adb72f748dd089ad5e5 upstream. Commit 8135cf8b092723dbfcc611fe6fdcb3a36c9951c5 (xen/pciback: Save xen_pci_op commands before processing it) broke enabling MSI-X because it would never copy the resulting vectors into the response. The number of vectors requested was being overwritten by the return value (typically zero for success). Save the number of vectors before processing the op, so the correct number of vectors are copied afterwards. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/pciback_ops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index a0e0e3ed4905c..e8babfd78b351 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -300,6 +300,9 @@ void xen_pcibk_do_op(struct work_struct *data) struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->op; int test_intx = 0; +#ifdef CONFIG_PCI_MSI + unsigned int nr = 0; +#endif *op = pdev->sh_info->op; barrier(); @@ -328,6 +331,7 @@ void xen_pcibk_do_op(struct work_struct *data) op->err = xen_pcibk_disable_msi(pdev, dev, op); break; case XEN_PCI_OP_enable_msix: + nr = op->value; op->err = xen_pcibk_enable_msix(pdev, dev, op); break; case XEN_PCI_OP_disable_msix: @@ -350,7 +354,7 @@ void xen_pcibk_do_op(struct work_struct *data) if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { unsigned int i; - for (i = 0; i < op->value; i++) + for (i = 0; i < nr; i++) pdev->sh_info->op.msix_entries[i].vector = op->msix_entries[i].vector; } From a6c762381cc34ff6df5f75afa46e7f153116cf78 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 2 Nov 2015 18:07:44 -0500 Subject: [PATCH 2007/2091] xen/pciback: Return error on XEN_PCI_OP_enable_msix when device has MSI or MSI-X enabled [ Upstream commit 5e0ce1455c09dd61d029b8ad45d82e1ac0b6c4c9 ] The guest sequence of: a) XEN_PCI_OP_enable_msix b) XEN_PCI_OP_enable_msix results in hitting an NULL pointer due to using freed pointers. The device passed in the guest MUST have MSI-X capability. The a) constructs and SysFS representation of MSI and MSI groups. The b) adds a second set of them but adding in to SysFS fails (duplicate entry). 'populate_msi_sysfs' frees the newly allocated msi_irq_groups (note that in a) pdev->msi_irq_groups is still set) and also free's ALL of the MSI-X entries of the device (the ones allocated in step a) and b)). The unwind code: 'free_msi_irqs' deletes all the entries and tries to delete the pdev->msi_irq_groups (which hasn't been set to NULL). However the pointers in the SysFS are already freed and we hit an NULL pointer further on when 'strlen' is attempted on a freed pointer. The patch adds a simple check in the XEN_PCI_OP_enable_msix to guard against that. The check for msi_enabled is not stricly neccessary. This is part of XSA-157 CC: stable@vger.kernel.org Reviewed-by: David Vrabel Reviewed-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/pciback_ops.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index e8babfd78b351..91b809e98952c 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -201,9 +201,16 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) return -EINVAL; + if (dev->msix_enabled) + return -EALREADY; + + if (dev->msi_enabled) + return -ENXIO; + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); if (entries == NULL) return -ENOMEM; From fda3e3e7b638f742149fb32aa9b691413bb91f41 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 2 Nov 2015 18:13:27 -0500 Subject: [PATCH 2008/2091] xen/pciback: Don't allow MSI-X ops if PCI_COMMAND_MEMORY is not set. [ Upstream commit 408fb0e5aa7fda0059db282ff58c3b2a4278baa0 ] commit f598282f51 ("PCI: Fix the NIU MSI-X problem in a better way") teaches us that dealing with MSI-X can be troublesome. Further checks in the MSI-X architecture shows that if the PCI_COMMAND_MEMORY bit is turned of in the PCI_COMMAND we may not be able to access the BAR (since they are memory regions). Since the MSI-X tables are located in there.. that can lead to us causing PCIe errors. Inhibit us performing any operation on the MSI-X unless the MEMORY bit is set. Note that Xen hypervisor with: "x86/MSI-X: access MSI-X table only after having enabled MSI-X" will return: xen_pciback: 0000:0a:00.1: error -6 enabling MSI-X for guest 3! When the generic MSI code tries to setup the PIRQ without MEMORY bit set. Which means with later versions of Xen (4.6) this patch is not neccessary. This is part of XSA-157 CC: stable@vger.kernel.org Reviewed-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/pciback_ops.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 91b809e98952c..7d858ff958eda 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -197,6 +197,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, struct xen_pcibk_dev_data *dev_data; int i, result; struct msix_entry *entries; + u16 cmd; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", @@ -208,7 +209,12 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, if (dev->msix_enabled) return -EALREADY; - if (dev->msi_enabled) + /* + * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able + * to access the BARs where the MSI-X entries reside. + */ + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) return -ENXIO; entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); From a7b4133de42f7eb7a926e1a5a1aaf0c1e7bfad12 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Feb 2016 16:10:23 -0500 Subject: [PATCH 2009/2091] xen/pciback: Check PF instead of VF for PCI_COMMAND_MEMORY [ Upstream commit d52a24819677bbb45eb1ce93a42daa1ae6c4d61d ] commit 8d47065f7d1980dde52abb874b301054f3013602 upstream. Commit 408fb0e5aa7fda0059db282ff58c3b2a4278baa0 (xen/pciback: Don't allow MSI-X ops if PCI_COMMAND_MEMORY is not set) prevented enabling MSI-X on passed-through virtual functions, because it checked the VF for PCI_COMMAND_MEMORY but this is not a valid bit for VFs. Instead, check the physical function for PCI_COMMAND_MEMORY. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/pciback_ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 7d858ff958eda..9cf4653b6bd7a 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -212,8 +212,9 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, /* * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able * to access the BARs where the MSI-X entries reside. + * But VF devices are unique in which the PF needs to be checked. */ - pci_read_config_word(dev, PCI_COMMAND, &cmd); + pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) return -ENXIO; From 9a30ae51a5bb55570004afba8016611cff882cb2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Feb 2016 16:10:26 -0500 Subject: [PATCH 2010/2091] xen/pcifront: Fix mysterious crashes when NUMA locality information was extracted. [ Upstream commit 4d8c8bd6f2062c9988817183a91fe2e623c8aa5e ] Occasionaly PV guests would crash with: pciback 0000:00:00.1: Xen PCI mapped GSI0 to IRQ16 BUG: unable to handle kernel paging request at 0000000d1a8c0be0 .. snip.. ] find_next_bit+0xb/0x10 [] cpumask_next_and+0x22/0x40 [] pci_device_probe+0xb8/0x120 [] ? driver_sysfs_add+0x77/0xa0 [] driver_probe_device+0x1a4/0x2d0 [] ? pci_match_device+0xdd/0x110 [] __device_attach_driver+0xa7/0xb0 [] ? __driver_attach+0xa0/0xa0 [] bus_for_each_drv+0x62/0x90 [] __device_attach+0xbd/0x110 [] device_attach+0xb/0x10 [] pci_bus_add_device+0x3c/0x70 [] pci_bus_add_devices+0x38/0x80 [] pcifront_scan_root+0x13e/0x1a0 [] pcifront_backend_changed+0x262/0x60b [] ? xenbus_gather+0xd6/0x160 [] ? put_object+0x2f/0x50 [] xenbus_otherend_changed+0x9d/0xa0 [] backend_changed+0xe/0x10 [] xenwatch_thread+0xc8/0x190 [] ? woken_wake_function+0x10/0x10 which was the result of two things: When we call pci_scan_root_bus we would pass in 'sd' (sysdata) pointer which was an 'pcifront_sd' structure. However in the pci_device_add it expects that the 'sd' is 'struct sysdata' and sets the dev->node to what is in sd->node (offset 4): set_dev_node(&dev->dev, pcibus_to_node(bus)); __pcibus_to_node(const struct pci_bus *bus) { const struct pci_sysdata *sd = bus->sysdata; return sd->node; } However our structure was pcifront_sd which had nothing at that offset: struct pcifront_sd { int domain; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct pcifront_device * pdev; /* 8 8 */ } That is an hole - filled with garbage as we used kmalloc instead of kzalloc (the second problem). This patch fixes the issue by: 1) Use kzalloc to initialize to a well known state. 2) Put 'struct pci_sysdata' at the start of 'pcifront_sd'. That way access to the 'node' will access the right offset. Signed-off-by: Konrad Rzeszutek Wilk Cc: Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Sasha Levin --- drivers/pci/xen-pcifront.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 7cfd2db02deb3..914655e896770 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -52,7 +52,7 @@ struct pcifront_device { }; struct pcifront_sd { - int domain; + struct pci_sysdata sd; struct pcifront_device *pdev; }; @@ -66,7 +66,9 @@ static inline void pcifront_init_sd(struct pcifront_sd *sd, unsigned int domain, unsigned int bus, struct pcifront_device *pdev) { - sd->domain = domain; + /* Because we do not expose that information via XenBus. */ + sd->sd.node = first_online_node; + sd->sd.domain = domain; sd->pdev = pdev; } @@ -464,8 +466,8 @@ static int pcifront_scan_root(struct pcifront_device *pdev, dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", domain, bus); - bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); - sd = kmalloc(sizeof(*sd), GFP_KERNEL); + bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!bus_entry || !sd) { err = -ENOMEM; goto err_out; From f0b73a580df467705a7a316daf03a0ecdac1ec8c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Feb 2016 08:09:10 -0200 Subject: [PATCH 2011/2091] [media] adv7604: fix tx 5v detect regression [ Upstream commit 0ba4581c84cfb39fd527f6b3457f1c97f6356c04 ] The 5 volt detect functionality broke in 3.14: the code reads IO register 0x70 again after it has already been cleared. Instead it should use the cached irq_reg_0x70 value and the io_write to 0x71 to clear 0x70 can be dropped since this has already been done. Signed-off-by: Hans Verkuil Cc: # for v3.14 and up Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/adv7604.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index 60ffcf098befa..5f92ec23bb075 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -1911,10 +1911,9 @@ static int adv76xx_isr(struct v4l2_subdev *sd, u32 status, bool *handled) } /* tx 5v detect */ - tx_5v = io_read(sd, 0x70) & info->cable_det_mask; + tx_5v = irq_reg_0x70 & info->cable_det_mask; if (tx_5v) { v4l2_dbg(1, debug, sd, "%s: tx_5v: 0x%x\n", __func__, tx_5v); - io_write(sd, 0x71, tx_5v); adv76xx_s_detect_tx_5v_ctrl(sd); if (handled) *handled = true; From 40b43e3cc942d3149c4a6ef4692703ee64e8ba62 Mon Sep 17 00:00:00 2001 From: John Youn Date: Tue, 16 Feb 2016 20:10:53 -0800 Subject: [PATCH 2012/2091] usb: dwc3: Fix assignment of EP transfer resources [ Upstream commit c450960187f45d4260db87c7dd4fc0bceb5565d8 ] The assignement of EP transfer resources was not handled properly in the dwc3 driver. Commit aebda6187181 ("usb: dwc3: Reset the transfer resource index on SET_INTERFACE") previously fixed one aspect of this where resources may be exhausted with multiple calls to SET_INTERFACE. However, it introduced an issue where composite devices with multiple interfaces can be assigned the same transfer resources for different endpoints. This patch solves both issues. The assignment of transfer resources cannot perfectly follow the data book due to the fact that the controller driver does not have all knowledge of the configuration in advance. It is given this information piecemeal by the composite gadget framework after every SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook programming model in this scenario can cause errors. For two reasons: 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION and SET_INTERFACE (8.1.5). This is incorrect in the scenario of multiple interfaces. 2) The databook does not mention doing more DEPXFERCFG for new endpoint on alt setting (8.1.6). The following simplified method is used instead: All hardware endpoints can be assigned a transfer resource and this setting will stay persistent until either a core reset or hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and do DEPXFERCFG for every hardware endpoint as well. We are guaranteed that there are as many transfer resources as endpoints. This patch triggers off of the calling dwc3_gadget_start_config() for EP0-out, which always happens first, and which should only happen in one of the above conditions. Fixes: aebda6187181 ("usb: dwc3: Reset the transfer resource index on SET_INTERFACE") Cc: # v3.2+ Reported-by: Ravi Babu Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/core.h | 1 - drivers/usb/dwc3/ep0.c | 5 --- drivers/usb/dwc3/gadget.c | 70 +++++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 173edd4ca20eb..be245d073f15e 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -829,7 +829,6 @@ struct dwc3 { unsigned pullups_connected:1; unsigned resize_fifos:1; unsigned setup_packet_pending:1; - unsigned start_config_issued:1; unsigned three_stage_setup:1; unsigned usb3_lpm_capable:1; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 06ecd1e6871ce..00f2c456f94bb 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -545,7 +545,6 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) int ret; u32 reg; - dwc->start_config_issued = false; cfg = le16_to_cpu(ctrl->wValue); switch (state) { @@ -727,10 +726,6 @@ static int dwc3_ep0_std_request(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY"); ret = dwc3_ep0_set_isoch_delay(dwc, ctrl); break; - case USB_REQ_SET_INTERFACE: - dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE"); - dwc->start_config_issued = false; - /* Fall through */ default: dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver"); ret = dwc3_ep0_delegate_req(dwc, ctrl); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6fbf461d523c6..b886226be2418 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -388,24 +388,66 @@ static void dwc3_free_trb_pool(struct dwc3_ep *dep) dep->trb_pool_dma = 0; } +static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep); + +/** + * dwc3_gadget_start_config - Configure EP resources + * @dwc: pointer to our controller context structure + * @dep: endpoint that is being enabled + * + * The assignment of transfer resources cannot perfectly follow the + * data book due to the fact that the controller driver does not have + * all knowledge of the configuration in advance. It is given this + * information piecemeal by the composite gadget framework after every + * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook + * programming model in this scenario can cause errors. For two + * reasons: + * + * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION + * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of + * multiple interfaces. + * + * 2) The databook does not mention doing more DEPXFERCFG for new + * endpoint on alt setting (8.1.6). + * + * The following simplified method is used instead: + * + * All hardware endpoints can be assigned a transfer resource and this + * setting will stay persistent until either a core reset or + * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and + * do DEPXFERCFG for every hardware endpoint as well. We are + * guaranteed that there are as many transfer resources as endpoints. + * + * This function is called for each endpoint when it is being enabled + * but is triggered only when called for EP0-out, which always happens + * first, and which should only happen in one of the above conditions. + */ static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep) { struct dwc3_gadget_ep_cmd_params params; u32 cmd; + int i; + int ret; + + if (dep->number) + return 0; memset(¶ms, 0x00, sizeof(params)); + cmd = DWC3_DEPCMD_DEPSTARTCFG; - if (dep->number != 1) { - cmd = DWC3_DEPCMD_DEPSTARTCFG; - /* XferRscIdx == 0 for ep0 and 2 for the remaining */ - if (dep->number > 1) { - if (dwc->start_config_issued) - return 0; - dwc->start_config_issued = true; - cmd |= DWC3_DEPCMD_PARAM(2); - } + ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, ¶ms); + if (ret) + return ret; - return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, ¶ms); + for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) { + struct dwc3_ep *dep = dwc->eps[i]; + + if (!dep) + continue; + + ret = dwc3_gadget_set_xfer_resource(dwc, dep); + if (ret) + return ret; } return 0; @@ -519,10 +561,6 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, struct dwc3_trb *trb_st_hw; struct dwc3_trb *trb_link; - ret = dwc3_gadget_set_xfer_resource(dwc, dep); - if (ret) - return ret; - dep->endpoint.desc = desc; dep->comp_desc = comp_desc; dep->type = usb_endpoint_type(desc); @@ -1589,8 +1627,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc3_writel(dwc->regs, DWC3_DCFG, reg); - dwc->start_config_issued = false; - /* Start with SuperSpeed Default */ dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512); @@ -2167,7 +2203,6 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_DCTL, reg); dwc3_disconnect_gadget(dwc); - dwc->start_config_issued = false; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->setup_packet_pending = false; @@ -2218,7 +2253,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) dwc3_stop_active_transfers(dwc); dwc3_clear_stall_all_ep(dwc); - dwc->start_config_issued = false; /* Reset device address to zero */ reg = dwc3_readl(dwc->regs, DWC3_DCFG); From 038d8248fc62423b16810878f14d59070a0076cb Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 17 Feb 2016 10:41:41 -0500 Subject: [PATCH 2013/2091] NFSv4: Fix a dentry leak on alias use [ Upstream commit d9dfd8d741683347ee159d25f5b50c346a0df557 ] In the case where d_add_unique() finds an appropriate alias to use it will have already incremented the reference count. An additional dget() to swap the open context's dentry is unnecessary and will leak a reference. Signed-off-by: Benjamin Coddington Fixes: 275bb307865a3 ("NFSv4: Move dentry instantiation into the NFSv4-...") Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2c4f41c343668..84706204cc336 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2331,9 +2331,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, dentry = d_add_unique(dentry, igrab(state->inode)); if (dentry == NULL) { dentry = opendata->dentry; - } else if (dentry != ctx->dentry) { + } else { dput(ctx->dentry); - ctx->dentry = dget(dentry); + ctx->dentry = dentry; } nfs_set_verifier(dentry, nfs_save_change_attribute(d_inode(opendata->dir))); From 619400280f304285af5ae99fe674f11ebf9e40cc Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Fri, 29 Jan 2016 00:07:30 +0300 Subject: [PATCH 2014/2091] USB: option: add support for SIM7100E [ Upstream commit 3158a8d416f4e1b79dcc867d67cb50013140772c ] $ lsusb: Bus 001 Device 101: ID 1e0e:9001 Qualcomm / Option $ usb-devices: T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#=101 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 2 P: Vendor=1e0e ProdID=9001 Rev= 2.32 S: Manufacturer=SimTech, Incorporated S: Product=SimTech, Incorporated S: SerialNumber=0123456789ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) The last interface (6) is used for Android Composite ADB interface. Serial port layout: 0: QCDM/DIAG 1: NMEA 2: AT 3: AT/PPP 4: audio Signed-off-by: Andrey Skvortsov Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 88540596973f1..a581361c0de62 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -318,6 +318,7 @@ static void option_instat_callback(struct urb *urb); #define TOSHIBA_PRODUCT_G450 0x0d45 #define ALINK_VENDOR_ID 0x1e0e +#define SIMCOM_PRODUCT_SIM7100E 0x9001 /* Yes, ALINK_VENDOR_ID */ #define ALINK_PRODUCT_PH300 0x9100 #define ALINK_PRODUCT_3GU 0x9200 @@ -610,6 +611,10 @@ static const struct option_blacklist_info zte_1255_blacklist = { .reserved = BIT(3) | BIT(4), }; +static const struct option_blacklist_info simcom_sim7100e_blacklist = { + .reserved = BIT(5) | BIT(6), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1657,6 +1662,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) }, { USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) }, { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, + { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E), + .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist }, From d15706c9c9d206bb2ad9c8a589da380daf1356aa Mon Sep 17 00:00:00 2001 From: Ken Lin Date: Mon, 1 Feb 2016 14:57:25 -0500 Subject: [PATCH 2015/2091] USB: cp210x: add IDs for GE B650V3 and B850V3 boards [ Upstream commit 6627ae19385283b89356a199d7f03c75ba35fb29 ] Add USB ID for cp2104/5 devices on GE B650v3 and B850v3 boards. Signed-off-by: Ken Lin Signed-off-by: Akshay Bhat Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 1dd9919081f80..a7caf53d8b5e6 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ + { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ From 664a608ef37cf8f8bf155d865876314618686756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 12 Feb 2016 16:40:00 +0100 Subject: [PATCH 2016/2091] USB: option: add "4G LTE usb-modem U901" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d061c1caa31d4d9792cfe48a2c6b309a0e01ef46 ] Thomas reports: T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=05c6 ProdID=6001 Rev=00.00 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber=1234567890ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Cc: Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index a581361c0de62..2590f1e467f28 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1135,6 +1135,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */ + { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */ + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ From 79151933d58376c5d7429047b397c6d59fad8968 Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Mon, 16 Nov 2015 11:22:16 -0500 Subject: [PATCH 2017/2091] ahci: Order SATA device IDs for codename Lewisburg [ Upstream commit 4d92f0099a06ef0e36c7673f7c090f1a448b2d1b ] This change was to preserve the ascending order of device IDs. There was an exception with the first two Lewisburg device IDs to keep all device IDs of the same kind grouped by code name. Signed-off-by: Alexandra Yates signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 666fd8a1500ac..1e284c1598411 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -362,6 +362,16 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From 2340493041d9a65fd16a3f356f0a0fda4e48934b Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Tue, 3 Nov 2015 14:14:18 -0800 Subject: [PATCH 2018/2091] ahci: add new Intel device IDs [ Upstream commit 56e74338a535cbcc2f2da08b1ea1a92920194364 ] Adding Intel codename Lewisburg platform device IDs for SATA. Signed-off-by: Alexandra Yates Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 1e284c1598411..fd9787a3d18b5 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -332,6 +332,16 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x1f37), board_ahci_avn }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */ + { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ + { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */ { PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */ From 5294fcf306de471172dd4a1ed7b35592fea858cd Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Wed, 17 Feb 2016 19:36:20 -0800 Subject: [PATCH 2019/2091] Adding Intel Lewisburg device IDs for SATA [ Upstream commit f5bdd66c705484b4bc77eb914be15c1b7881fae7 ] This patch complements the list of device IDs previously added for lewisburg sata. Signed-off-by: Alexandra Yates Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index fd9787a3d18b5..34825d63d4835 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -373,15 +373,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From 9077dc77a92526b028111db2943a26ff8e475d03 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 18 Feb 2016 14:07:52 +0100 Subject: [PATCH 2020/2091] hwmon: (ads1015) Handle negative conversion values correctly [ Upstream commit acc146943957d7418a6846f06e029b2c5e87e0d5 ] Make the divisor signed as DIV_ROUND_CLOSEST is undefined for negative dividends when the divisor is unsigned. Signed-off-by: Peter Rosin Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/ads1015.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index f155b83804819..2b3105c8aed39 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -126,7 +126,7 @@ static int ads1015_reg_to_mv(struct i2c_client *client, unsigned int channel, struct ads1015_data *data = i2c_get_clientdata(client); unsigned int pga = data->channel_data[channel].pga; int fullscale = fullscale_table[pga]; - const unsigned mask = data->id == ads1115 ? 0x7fff : 0x7ff0; + const int mask = data->id == ads1115 ? 0x7fff : 0x7ff0; return DIV_ROUND_CLOSEST(reg * fullscale, mask); } From e0a8bcb7cd5e71dfb3d8d5ed3e2db6d9a7c2b0df Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 19 Feb 2016 18:09:51 -0600 Subject: [PATCH 2021/2091] hwmon: (gpio-fan) Remove un-necessary speed_index lookup for thermal hook [ Upstream commit 000e0949148382c4962489593a2f05504c2a6771 ] Thermal hook gpio_fan_get_cur_state is only interested in knowing the current speed index that was setup in the system, this is already available as part of fan_data->speed_index which is always set by set_fan_speed. Using get_fan_speed_index is useful when we have no idea about the fan speed configuration (for example during fan_ctrl_init). When thermal framework invokes gpio_fan_get_cur_state=>get_fan_speed_index via gpio_fan_get_cur_state especially in a polled configuration for thermal governor, we basically hog the i2c interface to the extent that other functions fail to get any traffic out :(. Instead, just provide the last state set in the driver - since the gpio fan driver is responsible for the fan state immaterial of override, the fan_data->speed_index should accurately reflect the state. Fixes: b5cf88e46bad ("(gpio-fan): Add thermal control hooks") Reported-by: Tony Lindgren Cc: Guenter Roeck Cc: Eduardo Valentin Signed-off-by: Nishanth Menon Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/gpio-fan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index a3dae6d0082a0..83ea8c8039faf 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -406,16 +406,11 @@ static int gpio_fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { struct gpio_fan_data *fan_data = cdev->devdata; - int r; if (!fan_data) return -EINVAL; - r = get_fan_speed_index(fan_data); - if (r < 0) - return r; - - *state = r; + *state = fan_data->speed_index; return 0; } From cbd382759e953e56701202591e765e7e17957eef Mon Sep 17 00:00:00 2001 From: Lisa Du Date: Wed, 17 Feb 2016 09:32:52 +0800 Subject: [PATCH 2022/2091] drivers: android: correct the size of struct binder_uintptr_t for BC_DEAD_BINDER_DONE [ Upstream commit 7a64cd887fdb97f074c3fda03bee0bfb9faceac3 ] There's one point was missed in the patch commit da49889deb34 ("staging: binder: Support concurrent 32 bit and 64 bit processes."). When configure BINDER_IPC_32BIT, the size of binder_uintptr_t was 32bits, but size of void * is 64bit on 64bit system. Correct it here. Signed-off-by: Lisa Du Signed-off-by: Nicolas Boichat Fixes: da49889deb34 ("staging: binder: Support concurrent 32 bit and 64 bit processes.") Cc: Acked-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6607f3c6ace10..f1a26d937d984 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2074,7 +2074,7 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; - ptr += sizeof(void *); + ptr += sizeof(cookie); list_for_each_entry(w, &proc->delivered_death, entry) { struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work); From 0861e96600db2ecbd9659b3eafc292641e3ec69c Mon Sep 17 00:00:00 2001 From: Gerhard Uttenthaler Date: Tue, 22 Dec 2015 17:29:16 +0100 Subject: [PATCH 2023/2091] can: ems_usb: Fix possible tx overflow [ Upstream commit 90cfde46586d2286488d8ed636929e936c0c9ab2 ] This patch fixes the problem that more CAN messages could be sent to the interface as could be send on the CAN bus. This was more likely for slow baud rates. The sleeping _start_xmit was woken up in the _write_bulk_callback. Under heavy TX load this produced another bulk transfer without checking the free_slots variable and hence caused the overflow in the interface. Signed-off-by: Gerhard Uttenthaler Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/ems_usb.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 866bac0ae7e96..339b0c5ce60c3 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -117,6 +117,9 @@ MODULE_LICENSE("GPL v2"); */ #define EMS_USB_ARM7_CLOCK 8000000 +#define CPC_TX_QUEUE_TRIGGER_LOW 25 +#define CPC_TX_QUEUE_TRIGGER_HIGH 35 + /* * CAN-Message representation in a CPC_MSG. Message object type is * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or @@ -278,6 +281,11 @@ static void ems_usb_read_interrupt_callback(struct urb *urb) switch (urb->status) { case 0: dev->free_slots = dev->intr_in_buffer[1]; + if(dev->free_slots > CPC_TX_QUEUE_TRIGGER_HIGH){ + if (netif_queue_stopped(netdev)){ + netif_wake_queue(netdev); + } + } break; case -ECONNRESET: /* unlink */ @@ -529,8 +537,6 @@ static void ems_usb_write_bulk_callback(struct urb *urb) /* Release context */ context->echo_index = MAX_TX_URBS; - if (netif_queue_stopped(netdev)) - netif_wake_queue(netdev); } /* @@ -590,7 +596,7 @@ static int ems_usb_start(struct ems_usb *dev) int err, i; dev->intr_in_buffer[0] = 0; - dev->free_slots = 15; /* initial size */ + dev->free_slots = 50; /* initial size */ for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; @@ -838,7 +844,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne /* Slow down tx path */ if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS || - dev->free_slots < 5) { + dev->free_slots < CPC_TX_QUEUE_TRIGGER_LOW) { netif_stop_queue(netdev); } } From 82f8c49d31dc618d1508a2870595eab2f7d5cbbd Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 21 Feb 2016 19:09:22 -0500 Subject: [PATCH 2024/2091] dm: fix dm_rq_target_io leak on faults with .request_fn DM w/ blk-mq paths [ Upstream commit 4328daa2e79ed904a42ce00a9f38b9c36b44b21a ] Using request-based DM mpath configured with the following stacking (.request_fn DM mpath ontop of scsi-mq paths): echo Y > /sys/module/scsi_mod/parameters/use_blk_mq echo N > /sys/module/dm_mod/parameters/use_blk_mq 'struct dm_rq_target_io' would leak if a request is requeued before a blk-mq clone is allocated (or fails to allocate). free_rq_tio() wasn't being called. kmemleak reported: unreferenced object 0xffff8800b90b98c0 (size 112): comm "kworker/7:1H", pid 5692, jiffies 4295056109 (age 78.589s) hex dump (first 32 bytes): 00 d0 5c 2c 03 88 ff ff 40 00 bf 01 00 c9 ff ff ..\,....@....... e0 d9 b1 34 00 88 ff ff 00 00 00 00 00 00 00 00 ...4............ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0xc3/0x1e0 [] mempool_alloc_slab+0x15/0x20 [] mempool_alloc+0x6e/0x170 [] dm_old_prep_fn+0x3c/0x180 [dm_mod] [] blk_peek_request+0x168/0x290 [] dm_request_fn+0xb2/0x1b0 [dm_mod] [] __blk_run_queue+0x33/0x40 [] blk_delay_work+0x25/0x40 [] process_one_work+0x14f/0x3d0 [] worker_thread+0x125/0x4b0 [] kthread+0xd8/0xf0 [] ret_from_fork+0x3f/0x70 [] 0xffffffffffffffff crash> struct -o dm_rq_target_io struct dm_rq_target_io { ... } SIZE: 112 Fixes: e5863d9ad7 ("dm: allocate requests in target when stacking on blk-mq devices") Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8b72ceee0f61e..62610aafaac77 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1146,6 +1146,8 @@ static void dm_unprep_request(struct request *rq) if (clone) free_rq_clone(clone); + else if (!tio->md->queue->mq_ops) + free_rq_tio(tio); } /* From 907c39407e1407d50768943367852c5abd5cde14 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 19 Feb 2016 14:44:14 +0100 Subject: [PATCH 2025/2091] s390/compat: correct restore of high gprs on signal return [ Upstream commit 342300cc9cd3428bc6bfe5809bfcc1b9a0f06702 ] git commit 8070361799ae1e3f4ef347bd10f0a508ac10acfb "s390: add support for vector extension" broke 31-bit compat processes in regard to signal handling. The restore_sigregs_ext32() function is used to restore the additional elements from the user space signal frame. Among the additional elements are the upper registers halves for 64-bit register support for 31-bit processes. The copy_from_user that is used to retrieve the high-gprs array from the user stack uses an incorrect length, 8 bytes instead of 64 bytes. This causes incorrect upper register halves to get loaded. Cc: stable@vger.kernel.org # 3.8+ Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin --- arch/s390/kernel/compat_signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index c78ba51ae285b..24b7e554db277 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -293,7 +293,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, /* Restore high gprs from signal stack */ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(&sregs_ext->gprs_high))) + sizeof(sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; From a7927a04ac273abc6e15ffe31120f95d1f49b023 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Feb 2016 18:05:10 -0500 Subject: [PATCH 2026/2091] drm/radeon/pm: adjust display configuration after powerstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 39d4275058baf53e89203407bf3841ff2c74fa32 ] set_power_state defaults to no displays, so we need to update the display configuration after setting up the powerstate on the first call. In most cases this is not an issue since ends up getting called multiple times at any given modeset and the proper order is achieved in the display changed handling at the top of the function. Reviewed-by: Christian König Acked-by: Jordan Lazare Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_pm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 91764320c56f6..763b523e671fc 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1079,8 +1079,6 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* update display watermarks based on new power state */ radeon_bandwidth_update(rdev); - /* update displays */ - radeon_dpm_display_configuration_changed(rdev); rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; @@ -1101,6 +1099,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) radeon_dpm_post_set_power_state(rdev); + /* update displays */ + radeon_dpm_display_configuration_changed(rdev); + if (rdev->asic->dpm.force_performance_level) { if (rdev->pm.dpm.thermal_active) { enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; From 4ba9f8051f7fc263cc5915fd6c2ac6d9195418b4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 18 Feb 2016 18:55:54 +0000 Subject: [PATCH 2027/2091] sunrpc/cache: fix off-by-one in qword_get() [ Upstream commit b7052cd7bcf3c1478796e93e3dff2b44c9e82943 ] The qword_get() function NUL-terminates its output buffer. If the input string is in hex format \xXXXX... and the same length as the output buffer, there is an off-by-one: int qword_get(char **bpp, char *dest, int bufsize) { ... while (len < bufsize) { ... *dest++ = (h << 4) | l; len++; } ... *dest = '\0'; return len; } This patch ensures the NUL terminator doesn't fall outside the output buffer. Signed-off-by: Stefan Hajnoczi Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Signed-off-by: Sasha Levin --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2928afffbb81f..8d79e70bd9786 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1218,7 +1218,7 @@ int qword_get(char **bpp, char *dest, int bufsize) if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; - while (len < bufsize) { + while (len < bufsize - 1) { int h, l; h = hex_to_bin(bp[0]); From b29de09dab6c486a7cecdf4741632fa48d903428 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 16 Feb 2016 14:47:31 +0000 Subject: [PATCH 2028/2091] KVM: arm/arm64: vgic: Ensure bitmaps are long enough [ Upstream commit 236cf17c2502007a9d2dda3c39fb0d9a6bd03cc2 ] When we allocate bitmaps in vgic_vcpu_init_maps, we divide the number of bits we need by 8 to figure out how many bytes to allocate. However, bitmap elements are always accessed as unsigned longs, and if we didn't happen to allocate a size such that size % sizeof(unsigned long) == 0, bitmap accesses may go past the end of the allocation. When using KASAN (which does byte-granular access checks), this results in a continuous stream of BUGs whenever these bitmaps are accessed: ============================================================================= BUG kmalloc-128 (Tainted: G B ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in vgic_init.part.25+0x55c/0x990 age=7493 cpu=3 pid=1730 INFO: Slab 0xffffffbde6d5da40 objects=16 used=15 fp=0xffffffc935769700 flags=0x4000000000000080 INFO: Object 0xffffffc935769500 @offset=1280 fp=0x (null) Bytes b4 ffffffc9357694f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769540: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769550: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769560: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769570: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 3 PID: 1740 Comm: kvm-vcpu-0 Tainted: G B 4.4.0+ #17 Hardware name: ARM Juno development board (r1) (DT) Call trace: [] dump_backtrace+0x0/0x280 [] show_stack+0x14/0x20 [] dump_stack+0x100/0x188 [] print_trailer+0xfc/0x168 [] object_err+0x3c/0x50 [] kasan_report_error+0x244/0x558 [] __asan_report_load8_noabort+0x48/0x50 [] __bitmap_or+0xc0/0xc8 [] kvm_vgic_flush_hwstate+0x1bc/0x650 [] kvm_arch_vcpu_ioctl_run+0x2ec/0xa60 [] kvm_vcpu_ioctl+0x474/0xa68 [] do_vfs_ioctl+0x5b8/0xcb0 [] SyS_ioctl+0x8c/0xa0 [] el0_svc_naked+0x24/0x28 Memory state around the buggy address: ffffffc935769400: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc935769480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffffffc935769500: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffffc935769580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc935769600: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Fix the issue by always allocating a multiple of sizeof(unsigned long), as we do elsewhere in the vgic code. Fixes: c1bfb577a ("arm/arm64: KVM: vgic: switch to dynamic allocation") Cc: stable@vger.kernel.org Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 950064a0942dd..934d56f6803c3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1602,8 +1602,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + int sz = nr_longs * sizeof(unsigned long); vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); From 0ccb848f62b5c9077cdeb903e324ca635806f804 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Feb 2016 17:50:23 +0100 Subject: [PATCH 2029/2091] KVM: x86: fix missed hardware breakpoints [ Upstream commit 172b2386ed16a9143d9a456aae5ec87275c61489 ] Sometimes when setting a breakpoint a process doesn't stop on it. This is because the debug registers are not loaded correctly on VCPU load. The following simple reproducer from Oleg Nesterov tries using debug registers in two threads. To see the bug, run a 2-VCPU guest with "taskset -c 0" and run "./bp 0 1" inside the guest. #include #include #include #include #include #include #include #include #include #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len) { unsigned long dr7; dr7 = ((len | type) & 0xf) << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); if (enable) dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); return dr7; } int write_dr(int pid, int dr, unsigned long val) { return ptrace(PTRACE_POKEUSER, pid, offsetof (struct user, u_debugreg[dr]), val); } void set_bp(pid_t pid, void *addr) { unsigned long dr7; assert(write_dr(pid, 0, (long)addr) == 0); dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1); assert(write_dr(pid, 7, dr7) == 0); } void *get_rip(int pid) { return (void*)ptrace(PTRACE_PEEKUSER, pid, offsetof(struct user, regs.rip), 0); } void test(int nr) { void *bp_addr = &&label + nr, *bp_hit; int pid; printf("test bp %d\n", nr); assert(nr < 16); // see 16 asm nops below pid = fork(); if (!pid) { assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0); kill(getpid(), SIGSTOP); for (;;) { label: asm ( "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" ); } } assert(pid == wait(NULL)); set_bp(pid, bp_addr); for (;;) { assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0); assert(pid == wait(NULL)); bp_hit = get_rip(pid); if (bp_hit != bp_addr) fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n", bp_hit - &&label, nr); } } int main(int argc, const char *argv[]) { while (--argc) { int nr = atoi(*++argv); if (!fork()) test(nr); } while (wait(NULL) > 0) ; return 0; } Cc: stable@vger.kernel.org Suggested-by: Nadav Amit Reported-by: Andrey Wagin Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fed4c84eac443..0dbf28a6d83d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2968,6 +2968,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) accumulate_steal_time(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) From 56029ce94469d7e183e8241e214837ac92c8520b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 19 Feb 2016 13:11:46 +0100 Subject: [PATCH 2030/2091] KVM: async_pf: do not warn on page allocation failures [ Upstream commit d7444794a02ff655eda87e3cc54e86b940e7736f ] In async_pf we try to allocate with NOWAIT to get an element quickly or fail. This code also handle failures gracefully. Lets silence potential page allocation failures under load. qemu-system-s39: page allocation failure: order:0,mode:0x2200000 [...] Call Trace: ([<00000000001146b8>] show_trace+0xf8/0x148) [<000000000011476a>] show_stack+0x62/0xe8 [<00000000004a36b8>] dump_stack+0x70/0x98 [<0000000000272c3a>] warn_alloc_failed+0xd2/0x148 [<000000000027709e>] __alloc_pages_nodemask+0x94e/0xb38 [<00000000002cd36a>] new_slab+0x382/0x400 [<00000000002cf7ac>] ___slab_alloc.constprop.30+0x2dc/0x378 [<00000000002d03d0>] kmem_cache_alloc+0x160/0x1d0 [<0000000000133db4>] kvm_setup_async_pf+0x6c/0x198 [<000000000013dee8>] kvm_arch_vcpu_ioctl_run+0xd48/0xd58 [<000000000012fcaa>] kvm_vcpu_ioctl+0x372/0x690 [<00000000002f66f6>] do_vfs_ioctl+0x3be/0x510 [<00000000002f68ec>] SyS_ioctl+0xa4/0xb8 [<0000000000781c5e>] system_call+0xd6/0x264 [<000003ffa24fa06a>] 0x3ffa24fa06a Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f9..f84f5856520a4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -169,7 +169,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, * do alloc nowait since if we are going to sleep anyway we * may as well sleep faulting in page */ - work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); + work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); if (!work) return 0; From 621a963c422618d1793d9245302766e87cdabb83 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 24 Feb 2016 09:04:24 -0500 Subject: [PATCH 2031/2091] tracing: Fix showing function event in available_events [ Upstream commit d045437a169f899dfb0f6f7ede24cc042543ced9 ] The ftrace:function event is only displayed for parsing the function tracer data. It is not used to enable function tracing, and does not include an "enable" file in its event directory. Originally, this event was kept separate from other events because it did not have a ->reg parameter. But perf added a "reg" parameter for its use which caused issues, because it made the event available to functions where it was not compatible for. Commit 9b63776fa3ca9 "tracing: Do not enable function event with enable" added a TRACE_EVENT_FL_IGNORE_ENABLE flag that prevented the function event from being enabled by normal trace events. But this commit missed keeping the function event from being displayed by the "available_events" directory, which is used to show what events can be enabled by set_event. One documented way to enable all events is to: cat available_events > set_event But because the function event is displayed in the available_events, this now causes an INVALID error: cat: write error: Invalid argument Reported-by: Chunyu Hu Fixes: 9b63776fa3ca9 "tracing: Do not enable function event with enable" Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- kernel/trace/trace_events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c4de47fc5cca0..f69ec1295b0b9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -683,7 +683,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ - if (call->class && call->class->reg) + if (call->class && call->class->reg && + !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } From 265570c9977908479db74fd07b710ec5d5c96e12 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 17 Feb 2016 20:04:08 +0100 Subject: [PATCH 2032/2091] libceph: don't bail early from try_read() when skipping a message [ Upstream commit e7a88e82fe380459b864e05b372638aeacb0f52d ] The contract between try_read() and try_write() is that when called each processes as much data as possible. When instructed by osd_client to skip a message, try_read() is violating this contract by returning after receiving and discarding a single message instead of checking for more. try_write() then gets a chance to write out more requests, generating more replies/skips for try_read() to handle, forcing the messenger into a starvation loop. Cc: stable@vger.kernel.org # 3.10+ Reported-by: Varada Kari Signed-off-by: Ilya Dryomov Tested-by: Varada Kari Reviewed-by: Alex Elder Signed-off-by: Sasha Levin --- net/ceph/messenger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e51af69c61bfe..ff92dee168ba3 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2328,7 +2328,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; - return 0; + return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); @@ -2361,7 +2361,7 @@ static int read_partial_message(struct ceph_connection *con) sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; - return 0; + return 1; } BUG_ON(!con->in_msg); From 66333f910d64623fa9cc886c259d57e6d24863cd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 Feb 2016 11:38:57 +0100 Subject: [PATCH 2033/2091] libceph: use the right footer size when skipping a message [ Upstream commit dbc0d3caff5b7591e0cf8e34ca686ca6f4479ee1 ] ceph_msg_footer is 21 bytes long, while ceph_msg_footer_old is only 13. Don't skip too much when CEPH_FEATURE_MSG_AUTH isn't negotiated. Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Sasha Levin --- net/ceph/messenger.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ff92dee168ba3..84201c21705ef 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1203,6 +1203,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, return new_piece; } +static size_t sizeof_footer(struct ceph_connection *con) +{ + return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? + sizeof(struct ceph_msg_footer) : + sizeof(struct ceph_msg_footer_old); +} + static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { BUG_ON(!msg); @@ -2326,7 +2333,7 @@ static int read_partial_message(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { @@ -2358,7 +2365,7 @@ static int read_partial_message(struct ceph_connection *con) /* skip this message */ dout("alloc_msg said skip message\n"); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; return 1; From 49670184289e48451171179470a7dbbdb63f9549 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 24 Feb 2016 11:05:25 +0800 Subject: [PATCH 2034/2091] usb: chipidea: otg: change workqueue ci_otg as freezable [ Upstream commit d144dfea8af7108f613139623e63952ed7e69c0c ] If we use USB ID pin as wakeup source, and there is a USB block device on this USB OTG (ID) cable, the system will be deadlock after system resume. The root cause for this problem is: the workqueue ci_otg may try to remove hcd before the driver resume has finished, and hcd will disconnect the device on it, then, it will call device_release_driver, and holds the device lock "dev->mutex", but it is never unlocked since it waits workqueue writeback to run to flush the block information, but the workqueue writeback is freezable, it is not thawed before driver resume has finished. When the driver (device: sd 0:0:0:0:) resume goes to dpm_complete, it tries to get its device lock "dev->mutex", but it can't get it forever, then the deadlock occurs. Below call stacks show the situation. So, in order to fix this problem, we need to change workqueue ci_otg as freezable, then the work item in this workqueue will be run after driver's resume, this workqueue will not be blocked forever like above case since the workqueue writeback has been thawed too. Tested at: i.mx6qdl-sabresd and i.mx6sx-sdb. [ 555.178869] kworker/u2:13 D c07de74c 0 826 2 0x00000000 [ 555.185310] Workqueue: ci_otg ci_otg_work [ 555.189353] Backtrace: [ 555.191849] [] (__schedule) from [] (schedule+0x48/0xa0) [ 555.198912] r10:ee471ba0 r9:00000000 r8:00000000 r7:00000002 r6:ee470000 r5:ee471ba4 [ 555.206867] r4:ee470000 [ 555.209453] [] (schedule) from [] (schedule_timeout+0x15c/0x1e0) [ 555.217212] r4:7fffffff r3:edc2b000 [ 555.220862] [] (schedule_timeout) from [] (wait_for_common+0x94/0x144) [ 555.229140] r8:00000000 r7:00000002 r6:ee470000 r5:ee471ba4 r4:7fffffff [ 555.235980] [] (wait_for_common) from [] (wait_for_completion+0x18/0x1c) [ 555.244430] r10:00000001 r9:c0b5563c r8:c0042e48 r7:ef086000 r6:eea4372c r5:ef131b00 [ 555.252383] r4:00000000 [ 555.254970] [] (wait_for_completion) from [] (flush_work+0x19c/0x234) [ 555.263177] [] (flush_work) from [] (flush_delayed_work+0x48/0x4c) [ 555.271106] r8:ed5b5000 r7:c0b38a3c r6:eea439cc r5:eea4372c r4:eea4372c [ 555.277958] [] (flush_delayed_work) from [] (bdi_unregister+0x84/0xec) [ 555.286236] r4:eea43520 r3:20000153 [ 555.289885] [] (bdi_unregister) from [] (blk_cleanup_queue+0x180/0x29c) [ 555.298250] r5:eea43808 r4:eea43400 [ 555.301909] [] (blk_cleanup_queue) from [] (__scsi_remove_device+0x48/0xb8) [ 555.310623] r7:00000000 r6:20000153 r5:ededa950 r4:ededa800 [ 555.316403] [] (__scsi_remove_device) from [] (scsi_forget_host+0x64/0x68) [ 555.325028] r5:ededa800 r4:ed5b5000 [ 555.328689] [] (scsi_forget_host) from [] (scsi_remove_host+0x78/0x104) [ 555.337054] r5:ed5b5068 r4:ed5b5000 [ 555.340709] [] (scsi_remove_host) from [] (usb_stor_disconnect+0x50/0xb4) [ 555.349247] r6:ed5b56e4 r5:ed5b5818 r4:ed5b5690 r3:00000008 [ 555.355025] [] (usb_stor_disconnect) from [] (usb_unbind_interface+0x78/0x25c) [ 555.363997] r8:c13919b4 r7:edd3c000 r6:edd3c020 r5:ee551c68 r4:ee551c00 r3:c04cdf7c [ 555.371892] [] (usb_unbind_interface) from [] (__device_release_driver+0x8c/0x118) [ 555.381213] r10:00000001 r9:edd90c00 r8:c13919b4 r7:ee551c68 r6:c0b546e0 r5:c0b5563c [ 555.389167] r4:edd3c020 [ 555.391752] [] (__device_release_driver) from [] (device_release_driver+0x28/0x34) [ 555.401071] r5:edd3c020 r4:edd3c054 [ 555.404721] [] (device_release_driver) from [] (bus_remove_device+0xe0/0x110) [ 555.413607] r5:edd3c020 r4:ef17f04c [ 555.417253] [] (bus_remove_device) from [] (device_del+0x114/0x21c) [ 555.425270] r6:edd3c028 r5:edd3c020 r4:ee551c00 r3:00000000 [ 555.431045] [] (device_del) from [] (usb_disable_device+0xa4/0x1e8) [ 555.439061] r8:edd3c000 r7:eded8000 r6:00000000 r5:00000001 r4:ee551c00 [ 555.445906] [] (usb_disable_device) from [] (usb_disconnect+0x74/0x224) [ 555.454271] r9:edd90c00 r8:ee551000 r7:ee551c68 r6:ee551c9c r5:ee551c00 r4:00000001 [ 555.462156] [] (usb_disconnect) from [] (usb_disconnect+0x1d8/0x224) [ 555.470259] r10:00000001 r9:edd90000 r8:ee471e2c r7:ee551468 r6:ee55149c r5:ee551400 [ 555.478213] r4:00000001 [ 555.480797] [] (usb_disconnect) from [] (usb_remove_hcd+0xa0/0x1ac) [ 555.488813] r10:00000001 r9:ee471eb0 r8:00000000 r7:ef3d9500 r6:eded810c r5:eded80b0 [ 555.496765] r4:eded8000 [ 555.499351] [] (usb_remove_hcd) from [] (host_stop+0x28/0x64) [ 555.506847] r6:eeb50010 r5:eded8000 r4:eeb51010 [ 555.511563] [] (host_stop) from [] (ci_otg_work+0xc4/0x124) [ 555.518885] r6:00000001 r5:eeb50010 r4:eeb502a0 r3:c04d4130 [ 555.524665] [] (ci_otg_work) from [] (process_one_work+0x194/0x420) [ 555.532682] r6:ef086000 r5:eeb502a0 r4:edc44480 [ 555.537393] [] (process_one_work) from [] (worker_thread+0x34/0x514) [ 555.545496] r10:edc44480 r9:ef086000 r8:c0b1a100 r7:ef086034 r6:00000088 r5:edc44498 [ 555.553450] r4:ef086000 [ 555.556032] [] (worker_thread) from [] (kthread+0xdc/0xf8) [ 555.563268] r10:00000000 r9:00000000 r8:00000000 r7:c004577c r6:edc44480 r5:eddc15c0 [ 555.571221] r4:00000000 [ 555.573804] [] (kthread) from [] (ret_from_fork+0x14/0x24) [ 555.581040] r7:00000000 r6:00000000 r5:c004b9d8 r4:eddc15c0 [ 553.429383] sh D c07de74c 0 694 691 0x00000000 [ 553.435801] Backtrace: [ 553.438295] [] (__schedule) from [] (schedule+0x48/0xa0) [ 553.445358] r10:edd3c054 r9:edd3c078 r8:edddbd50 r7:edcbbc00 r6:c1377c34 r5:60000153 [ 553.453313] r4:eddda000 [ 553.455896] [] (schedule) from [] (schedule_preempt_disabled+0x10/0x14) [ 553.464261] r4:edd3c058 r3:0000000a [ 553.467910] [] (schedule_preempt_disabled) from [] (mutex_lock_nested+0x1a0/0x3e8) [ 553.477254] [] (mutex_lock_nested) from [] (dpm_complete+0xc0/0x1b0) [ 553.485358] r10:00561408 r9:edd3c054 r8:c0b4863c r7:edddbd90 r6:c0b485d8 r5:edd3c020 [ 553.493313] r4:edd3c0d0 [ 553.495896] [] (dpm_complete) from [] (dpm_resume_end+0x1c/0x20) [ 553.503652] r9:00000000 r8:c0b1a9d0 r7:c1334ec0 r6:c1334edc r5:00000003 r4:00000010 [ 553.511544] [] (dpm_resume_end) from [] (suspend_devices_and_enter+0x158/0x504) [ 553.520604] r4:00000000 r3:c1334efc [ 553.524250] [] (suspend_devices_and_enter) from [] (pm_suspend+0x234/0x2cc) [ 553.532961] r10:00561408 r9:ed6b7300 r8:00000004 r7:c1334eec r6:00000000 r5:c1334ee8 [ 553.540914] r4:00000003 [ 553.543493] [] (pm_suspend) from [] (state_store+0x6c/0xc0) [ 555.703684] 7 locks held by kworker/u2:13/826: [ 555.708140] #0: ("%s""ci_otg"){++++.+}, at: [] process_one_work+0x128/0x420 [ 555.716277] #1: ((&ci->work)){+.+.+.}, at: [] process_one_work+0x128/0x420 [ 555.724317] #2: (usb_bus_list_lock){+.+.+.}, at: [] usb_remove_hcd+0x98/0x1ac [ 555.732626] #3: (&dev->mutex){......}, at: [] usb_disconnect+0x48/0x224 [ 555.740403] #4: (&dev->mutex){......}, at: [] usb_disconnect+0x48/0x224 [ 555.748179] #5: (&dev->mutex){......}, at: [] device_release_driver+0x20/0x34 [ 555.756487] #6: (&shost->scan_mutex){+.+.+.}, at: [] scsi_remove_host+0x20/0x104 Cc: #v3.14+ Cc: Jun Li Signed-off-by: Peter Chen Signed-off-by: Sasha Levin --- drivers/usb/chipidea/otg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index ad6c87a4653c2..fbc6285905a6d 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -118,7 +118,7 @@ static void ci_otg_work(struct work_struct *work) int ci_hdrc_otg_init(struct ci_hdrc *ci) { INIT_WORK(&ci->work, ci_otg_work); - ci->wq = create_singlethread_workqueue("ci_otg"); + ci->wq = create_freezable_workqueue("ci_otg"); if (!ci->wq) { dev_err(ci->dev, "can't create workqueue\n"); return -ENODEV; From 0f1b871592766fbadf7074a414d602fd7be5570a Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 18 May 2015 15:31:20 +0800 Subject: [PATCH 2035/2091] ALSA: hda/realtek - Support Dell headset mode for ALC298 [ Upstream commit 977e627684df0f60bdf2a768ec4772f42fe843fc ] Dell create new platform with ALC298 codec. This patch will enable headset mode for ALC298/ALC3266 platform. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index df34c78a6ced9..15f7225c8b5c4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4570,6 +4570,7 @@ enum { ALC288_FIXUP_DISABLE_AAMIX, ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, + ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5131,6 +5132,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC292_FIXUP_DISABLE_AAMIX }, + [ALC298_FIXUP_DELL1_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01a1913d }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5414,6 +5425,13 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x1d, 0x40700001}, \ {0x1e, 0x411111f0} +#define ALC298_STANDARD_PINS \ + {0x18, 0x411111f0}, \ + {0x19, 0x411111f0}, \ + {0x1a, 0x411111f0}, \ + {0x1e, 0x411111f0}, \ + {0x1f, 0x411111f0} + static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, ALC255_STANDARD_PINS, @@ -5708,6 +5726,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x16, 0x411111f0}, {0x18, 0x411111f0}, {0x19, 0x411111f0}), + SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC298_STANDARD_PINS, + {0x12, 0x90a60130}, + {0x13, 0x40000000}, + {0x14, 0x411111f0}, + {0x17, 0x90170140}, + {0x1d, 0x4068a36d}, + {0x21, 0x03211020}), {} }; From 5352c4b683d846849860a8235ee0a2eaf34147d6 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 26 Oct 2015 15:37:39 +0800 Subject: [PATCH 2036/2091] ALSA: hda/realtek - Dell XPS one ALC3260 speaker no sound after resume back [ Upstream commit 6ed1131fe196ad7ffc13acc1a1eadc08a1db0303 ] This machine had I2S codec for speaker output. It need to refill the I2S codec initial verb after resume back. Signed-off-by: Kailang Yang Reported-and-tested-by: George Gugulea Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 15f7225c8b5c4..121aa66cb9705 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4571,6 +4571,7 @@ enum { ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC275_FIXUP_DELL_XPS, }; static const struct hda_fixup alc269_fixups[] = { @@ -5142,6 +5143,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC275_FIXUP_DELL_XPS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Enables internal speaker */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x1f}, + {0x20, AC_VERB_SET_PROC_COEF, 0x00c0}, + {0x20, AC_VERB_SET_COEF_INDEX, 0x30}, + {0x20, AC_VERB_SET_PROC_COEF, 0x00b1}, + {} + } + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5157,6 +5169,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER), From 4c440d72709998411c30c9bf6e503980bd655dc3 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 24 Nov 2015 11:08:18 +0800 Subject: [PATCH 2037/2091] ALSA: hda - Fix headphone noise after Dell XPS 13 resume back from S3 [ Upstream commit 8c69729b4439bbda88c3073df7243f755cc418ed ] We have a machine Dell XPS 13 with the codec alc256, after resume back from S3, the headphone has noise when play sound. Through comparing with the coeff vaule before and after S3, we found restoring a coeff register will help remove noise. BugLink: https://bugs.launchpad.net/bugs/1519168 Cc: Kailang Yang Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 121aa66cb9705..996edc8860fd8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4572,6 +4572,7 @@ enum { ALC292_FIXUP_DISABLE_AAMIX, ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC275_FIXUP_DELL_XPS, + ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5154,6 +5155,17 @@ static const struct hda_fixup alc269_fixups[] = { {} } }, + [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Disable pass-through path for FRONT 14h */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x36}, + {0x20, AC_VERB_SET_PROC_COEF, 0x1737}, + {} + }, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5193,6 +5205,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX), + SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 559e8bf80c9676b40f1ddcb7a90e1eaa2f3c6a2a Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 8 Dec 2015 12:27:18 +0800 Subject: [PATCH 2038/2091] ALSA: hda - Fixing speaker noise on the two latest thinkpad models [ Upstream commit 23adc192b862b69ad80a40bd5206e337f41264ac ] We have two latest thinkpad laptop models which are all based on the Intel skylake platforms, and all of them have the codec alc293 on them. When the machines boot to the desktop, an greeting dialogue shows up with the notification sound. But on these two models, there is noise with the notification sound. We have 3 SKUs for each of the models, all of them have this problem. So far, this problem is only specific to these two thinkpad models, we did not find this problem on the old thinkpad models with the codec alc293 or alc292. A workaround for this problem is disabling the aamix. Cc: stable@vger.kernel.org BugLink: https://bugs.launchpad.net/bugs/1523517 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 996edc8860fd8..60c3416121c95 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4573,6 +4573,7 @@ enum { ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC275_FIXUP_DELL_XPS, ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, + ALC293_FIXUP_LENOVO_SPK_NOISE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5166,6 +5167,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC293_FIXUP_LENOVO_SPK_NOISE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5312,6 +5319,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -5321,6 +5329,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), From 42929fe6d85ae5a3916f79d5c70792fb35d48603 Mon Sep 17 00:00:00 2001 From: Kailang Date: Mon, 28 Dec 2015 11:35:24 +0800 Subject: [PATCH 2039/2091] ALSA: hda - Add mic mute hotkey quirk for Lenovo ThinkCentre AIO [ Upstream commit 3694cb2947db50753caf432db067487eafae7b9b ] The Lenovo ThinkCenter AIO uses Line2 (NID 0x1b) to implement the micmute hotkey, here we register an input device and use Line2 unsol event to collect the hotkey pressing or releasing. In the meanwhile, the micmute led is controlled by GPIO2, so we use an existing function alc_fixup_gpio_mic_mute_hook() to control the led. [Hui: And there are two places to register the input device, to make the code simple and clean, move the two same code sections into a function.] Cc: Signed-off-by: Kailang Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 84 +++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 60c3416121c95..5787bc5b18114 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3477,6 +3477,29 @@ static void gpio2_mic_hotkey_event(struct hda_codec *codec, input_sync(spec->kb_dev); } +static int alc_register_micmute_input_device(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + spec->kb_dev = input_allocate_device(); + if (!spec->kb_dev) { + codec_err(codec, "Out of memory (input_allocate_device)\n"); + return -ENOMEM; + } + spec->kb_dev->name = "Microphone Mute Button"; + spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY); + spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE); + + if (input_register_device(spec->kb_dev)) { + codec_err(codec, "input_register_device failed\n"); + input_free_device(spec->kb_dev); + spec->kb_dev = NULL; + return -ENOMEM; + } + + return 0; +} + static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3494,20 +3517,8 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { - spec->kb_dev = input_allocate_device(); - if (!spec->kb_dev) { - codec_err(codec, "Out of memory (input_allocate_device)\n"); + if (alc_register_micmute_input_device(codec) != 0) return; - } - spec->kb_dev->name = "Microphone Mute Button"; - spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY); - spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE); - if (input_register_device(spec->kb_dev)) { - codec_err(codec, "input_register_device failed\n"); - input_free_device(spec->kb_dev); - spec->kb_dev = NULL; - return; - } snd_hda_add_verbs(codec, gpio_init); snd_hda_codec_write_cache(codec, codec->core.afg, 0, @@ -3537,6 +3548,47 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec, } } +static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + /* Line2 = mic mute hotkey + GPIO2 = mic mute LED */ + static const struct hda_verb gpio_init[] = { + { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 }, + {} + }; + + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + if (alc_register_micmute_input_device(codec) != 0) + return; + + snd_hda_add_verbs(codec, gpio_init); + snd_hda_jack_detect_enable_callback(codec, 0x1b, + gpio2_mic_hotkey_event); + + spec->gen.cap_sync_hook = alc_fixup_gpio_mic_mute_hook; + spec->gpio_led = 0; + spec->mute_led_polarity = 0; + spec->gpio_mic_led_mask = 0x04; + return; + } + + if (!spec->kb_dev) + return; + + switch (action) { + case HDA_FIXUP_ACT_PROBE: + spec->init_amp = ALC_INIT_DEFAULT; + break; + case HDA_FIXUP_ACT_FREE: + input_unregister_device(spec->kb_dev); + spec->kb_dev = NULL; + } +} + static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4574,6 +4626,7 @@ enum { ALC275_FIXUP_DELL_XPS, ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, ALC293_FIXUP_LENOVO_SPK_NOISE, + ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, }; static const struct hda_fixup alc269_fixups[] = { @@ -5173,6 +5226,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_THINKPAD_ACPI }, + [ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_lenovo_line2_mic_hotkey, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5320,6 +5377,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), + SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 4c3192278b461a23930dd39eaaa683898c1d0009 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 25 Feb 2016 15:19:38 +0800 Subject: [PATCH 2040/2091] ALSA: hda - Fixing background noise on Dell Inspiron 3162 [ Upstream commit 7cb32ae09a6490c27bc3c110ee42d808a5670142 ] commit 3b43b71f05d3ecd01c4116254666d9492301697d upstream. After login to the desktop on Dell Inspiron 3162, there's a very loud background noise comes from the builtin speaker. The noise does not go away even if the speaker is muted. The noise disappears after using the aamix fixup. Codec: Realtek ALC3234 Address: 0 AFG Function Id: 0x1 (unsol 1) Vendor Id: 0x10ec0255 Subsystem Id: 0x10280725 Revision Id: 0x100002 No Modem Function Group found BugLink: http://bugs.launchpad.net/bugs/1549620 Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5787bc5b18114..1cc2e6be0c009 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4627,6 +4627,7 @@ enum { ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, + ALC255_FIXUP_DELL_SPK_NOISE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5230,6 +5231,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_lenovo_line2_mic_hotkey, }, + [ALC255_FIXUP_DELL_SPK_NOISE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5270,6 +5277,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX), SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 79e6eddd93bc3dfa020a57886d666dea9b9f452e Mon Sep 17 00:00:00 2001 From: Mike Krinkin Date: Wed, 24 Feb 2016 21:02:31 +0300 Subject: [PATCH 2041/2091] KVM: x86: MMU: fix ubsan index-out-of-range warning [ Upstream commit 17e4bce0ae63c7e03f3c7fa8d80890e7af3d4971 ] Ubsan reports the following warning due to a typo in update_accessed_dirty_bits template, the patch fixes the typo: [ 168.791851] ================================================================================ [ 168.791862] UBSAN: Undefined behaviour in arch/x86/kvm/paging_tmpl.h:252:15 [ 168.791866] index 4 is out of range for type 'u64 [4]' [ 168.791871] CPU: 0 PID: 2950 Comm: qemu-system-x86 Tainted: G O L 4.5.0-rc5-next-20160222 #7 [ 168.791873] Hardware name: LENOVO 23205NG/23205NG, BIOS G2ET95WW (2.55 ) 07/09/2013 [ 168.791876] 0000000000000000 ffff8801cfcaf208 ffffffff81c9f780 0000000041b58ab3 [ 168.791882] ffffffff82eb2cc1 ffffffff81c9f6b4 ffff8801cfcaf230 ffff8801cfcaf1e0 [ 168.791886] 0000000000000004 0000000000000001 0000000000000000 ffffffffa1981600 [ 168.791891] Call Trace: [ 168.791899] [] dump_stack+0xcc/0x12c [ 168.791904] [] ? _atomic_dec_and_lock+0xc4/0xc4 [ 168.791910] [] ubsan_epilogue+0xd/0x8a [ 168.791914] [] __ubsan_handle_out_of_bounds+0x15c/0x1a3 [ 168.791918] [] ? __ubsan_handle_shift_out_of_bounds+0x2bd/0x2bd [ 168.791922] [] ? get_user_pages_fast+0x2bf/0x360 [ 168.791954] [] ? kvm_largepages_enabled+0x30/0x30 [kvm] [ 168.791958] [] ? __get_user_pages_fast+0x360/0x360 [ 168.791987] [] paging64_walk_addr_generic+0x1b28/0x2600 [kvm] [ 168.792014] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] [ 168.792019] [] ? debug_check_no_locks_freed+0x350/0x350 [ 168.792044] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] [ 168.792076] [] paging64_gva_to_gpa+0x7d/0x110 [kvm] [ 168.792121] [] ? paging64_walk_addr_generic+0x2600/0x2600 [kvm] [ 168.792130] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792178] [] emulator_read_write_onepage+0x27a/0x1150 [kvm] [ 168.792208] [] ? __kvm_read_guest_page+0x54/0x70 [kvm] [ 168.792234] [] ? kvm_task_switch+0x160/0x160 [kvm] [ 168.792238] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792263] [] emulator_read_write+0xe7/0x6d0 [kvm] [ 168.792290] [] ? em_cr_write+0x230/0x230 [kvm] [ 168.792314] [] emulator_write_emulated+0x15/0x20 [kvm] [ 168.792340] [] segmented_write+0xf8/0x130 [kvm] [ 168.792367] [] ? em_lgdt+0x20/0x20 [kvm] [ 168.792374] [] ? vmx_read_guest_seg_ar+0x42/0x1e0 [kvm_intel] [ 168.792400] [] writeback+0x3f2/0x700 [kvm] [ 168.792424] [] ? em_sidt+0xa0/0xa0 [kvm] [ 168.792449] [] ? x86_decode_insn+0x1b3d/0x4f70 [kvm] [ 168.792474] [] x86_emulate_insn+0x572/0x3010 [kvm] [ 168.792499] [] x86_emulate_instruction+0x3bd/0x2110 [kvm] [ 168.792524] [] ? reexecute_instruction.part.110+0x2e0/0x2e0 [kvm] [ 168.792532] [] handle_ept_misconfig+0x61/0x460 [kvm_intel] [ 168.792539] [] ? handle_pause+0x450/0x450 [kvm_intel] [ 168.792546] [] vmx_handle_exit+0xd6a/0x1ad0 [kvm_intel] [ 168.792572] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] [ 168.792597] [] kvm_arch_vcpu_ioctl_run+0xd3d/0x6090 [kvm] [ 168.792621] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] [ 168.792627] [] ? __ww_mutex_lock_interruptible+0x1630/0x1630 [ 168.792651] [] ? kvm_arch_vcpu_runnable+0x4f0/0x4f0 [kvm] [ 168.792656] [] ? preempt_notifier_unregister+0x190/0x190 [ 168.792681] [] ? kvm_arch_vcpu_load+0x127/0x650 [kvm] [ 168.792704] [] kvm_vcpu_ioctl+0x553/0xda0 [kvm] [ 168.792727] [] ? vcpu_put+0x40/0x40 [kvm] [ 168.792732] [] ? debug_check_no_locks_freed+0x350/0x350 [ 168.792735] [] ? _raw_spin_unlock+0x27/0x40 [ 168.792740] [] ? handle_mm_fault+0x1673/0x2e40 [ 168.792744] [] ? trace_hardirqs_on_caller+0x478/0x6c0 [ 168.792747] [] ? trace_hardirqs_on+0xd/0x10 [ 168.792751] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792756] [] do_vfs_ioctl+0x1b0/0x12b0 [ 168.792759] [] ? ioctl_preallocate+0x210/0x210 [ 168.792763] [] ? __fget+0x273/0x4a0 [ 168.792766] [] ? __fget+0x50/0x4a0 [ 168.792770] [] ? __fget_light+0x96/0x2b0 [ 168.792773] [] SyS_ioctl+0x79/0x90 [ 168.792777] [] entry_SYSCALL_64_fastpath+0x23/0xc1 [ 168.792780] ================================================================================ Signed-off-by: Mike Krinkin Reviewed-by: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6e6d115fe9b54..d537c9badeb66 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -257,7 +257,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, return ret; mark_page_dirty(vcpu->kvm, table_gfn); - walker->ptes[level] = pte; + walker->ptes[level - 1] = pte; } return 0; } From 74d11976ff45dfe15b8a965d72237ac98533f788 Mon Sep 17 00:00:00 2001 From: Thomas Betker Date: Tue, 10 Nov 2015 22:18:15 +0100 Subject: [PATCH 2042/2091] Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin" [ Upstream commit 157078f64b8a9cd7011b6b900b2f2498df850748 ] This reverts commit 5ffd3412ae55 ("jffs2: Fix lock acquisition order bug in jffs2_write_begin"). The commit modified jffs2_write_begin() to remove a deadlock with jffs2_garbage_collect_live(), but this introduced new deadlocks found by multiple users. page_lock() actually has to be called before mutex_lock(&c->alloc_sem) or mutex_lock(&f->sem) because jffs2_write_end() and jffs2_readpage() are called with the page locked, and they acquire c->alloc_sem and f->sem, resp. In other words, the lock order in jffs2_write_begin() was correct, and it is the jffs2_garbage_collect_live() path that has to be changed. Revert the commit to get rid of the new deadlocks, and to clear the way for a better fix of the original deadlock. Reported-by: Deng Chao Reported-by: Ming Liu Reported-by: wangzaiwei Signed-off-by: Thomas Betker Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- fs/jffs2/file.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index f509f62e12f6e..3361979d728c0 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; - uint32_t alloc_len = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; - jffs2_dbg(1, "%s()\n", __func__); - - if (pageofs > inode->i_size) { - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) - return ret; - } - - mutex_lock(&f->sem); pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) { - if (alloc_len) - jffs2_complete_reservation(c); - mutex_unlock(&f->sem); + if (!pg) return -ENOMEM; - } *pagep = pg; - if (alloc_len) { + jffs2_dbg(1, "%s()\n", __func__); + + if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; + uint32_t alloc_len; jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs); + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) + goto out_page; + + mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); @@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; + mutex_unlock(&f->sem); } /* @@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, * case of a short-copy. */ if (!PageUptodate(pg)) { + mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); + mutex_unlock(&f->sem); if (ret) goto out_page; } - mutex_unlock(&f->sem); jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); return ret; out_page: unlock_page(pg); page_cache_release(pg); - mutex_unlock(&f->sem); return ret; } From e0dae728bf0878ad831440ff5d2e90ec10b794a4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Feb 2016 12:37:20 +0000 Subject: [PATCH 2043/2091] jffs2: Fix page lock / f->sem deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 49e91e7079febe59a20ca885a87dd1c54240d0f1 ] With this fix, all code paths should now be obtaining the page lock before f->sem. Reported-by: Szabó Tamás Tested-by: Thomas Betker Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- fs/jffs2/README.Locking | 5 +---- fs/jffs2/gc.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index 3ea36554107fc..8918ac905a3b1 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking @@ -2,10 +2,6 @@ JFFS2 LOCKING DOCUMENTATION --------------------------- -At least theoretically, JFFS2 does not require the Big Kernel Lock -(BKL), which was always helpfully obtained for it by Linux 2.4 VFS -code. It has its own locking, as described below. - This document attempts to describe the existing locking rules for JFFS2. It is not expected to remain perfectly up to date, but ought to be fairly close. @@ -69,6 +65,7 @@ Ordering constraints: any f->sem held. 2. Never attempt to lock two file mutexes in one thread. No ordering rules have been made for doing so. + 3. Never lock a page cache page with f->sem held. erase_completion_lock spinlock diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 5a2dec2b064c9..95d5880a63ee1 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era BUG_ON(start > orig_start); } - /* First, use readpage() to read the appropriate page into the page cache */ - /* Q: What happens if we actually try to GC the _same_ page for which commit_write() - * triggered garbage collection in the first place? - * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the - * page OK. We'll actually write it out again in commit_write, which is a little - * suboptimal, but at least we're correct. - */ + /* The rules state that we must obtain the page lock *before* f->sem, so + * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's + * actually going to *change* so we're safe; we only allow reading. + * + * It is important to note that jffs2_write_begin() will ensure that its + * page is marked Uptodate before allocating space. That means that if we + * end up here trying to GC the *same* page that jffs2_write_begin() is + * trying to write out, read_cache_page() will not deadlock. */ + mutex_unlock(&f->sem); pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); + mutex_lock(&f->sem); if (IS_ERR(pg_ptr)) { pr_warn("read_cache_page() returned error: %ld\n", From 49f76896f87b49592781f09d6e6c3f868051a6d7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Feb 2016 14:04:46 +0000 Subject: [PATCH 2044/2091] Fix directory hardlinks from deleted directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit be629c62a603e5935f8177fd8a19e014100a259e ] When a directory is deleted, we don't take too much care about killing off all the dirents that belong to it — on the basis that on remount, the scan will conclude that the directory is dead anyway. This doesn't work though, when the deleted directory contained a child directory which was moved *out*. In the early stages of the fs build we can then end up with an apparent hard link, with the child directory appearing both in its true location, and as a child of the original directory which are this stage of the mount process we don't *yet* know is defunct. To resolve this, take out the early special-casing of the "directories shall not have hard links" rule in jffs2_build_inode_pass1(), and let the normal nlink processing happen for directories as well as other inodes. Then later in the build process we can set ic->pino_nlink to the parent inode#, as is required for directories during normal operaton, instead of the nlink. And complain only *then* about hard links which are still in evidence even after killing off all the unreachable paths. Reported-by: Liu Song Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- fs/jffs2/build.c | 75 ++++++++++++++++++++++++++++++++++----------- fs/jffs2/nodelist.h | 6 +++- 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index a3750f902adcb..c1f04947d7dcf 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -49,7 +49,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, - struct jffs2_inode_cache *ic) + struct jffs2_inode_cache *ic, + int *dir_hardlinks) { struct jffs2_full_dirent *fd; @@ -68,19 +69,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", fd->name, fd->ino, ic->ino); jffs2_mark_node_obsolete(c, fd->raw); + /* Clear the ic/raw union so it doesn't cause problems later. */ + fd->ic = NULL; continue; } + /* From this point, fd->raw is no longer used so we can set fd->ic */ + fd->ic = child_ic; + child_ic->pino_nlink++; + /* If we appear (at this stage) to have hard-linked directories, + * set a flag to trigger a scan later */ if (fd->type == DT_DIR) { - if (child_ic->pino_nlink) { - JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", - fd->name, fd->ino, ic->ino); - /* TODO: What do we do about it? */ - } else { - child_ic->pino_nlink = ic->ino; - } - } else - child_ic->pino_nlink++; + child_ic->flags |= INO_FLAGS_IS_DIR; + if (child_ic->pino_nlink > 1) + *dir_hardlinks = 1; + } dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); /* Can't free scan_dents so far. We might need them in pass 2 */ @@ -94,8 +97,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, */ static int jffs2_build_filesystem(struct jffs2_sb_info *c) { - int ret; - int i; + int ret, i, dir_hardlinks = 0; struct jffs2_inode_cache *ic; struct jffs2_full_dirent *fd; struct jffs2_full_dirent *dead_fds = NULL; @@ -119,7 +121,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) /* Now scan the directory tree, increasing nlink according to every dirent found. */ for_each_inode(i, c, ic) { if (ic->scan_dents) { - jffs2_build_inode_pass1(c, ic); + jffs2_build_inode_pass1(c, ic, &dir_hardlinks); cond_resched(); } } @@ -155,6 +157,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) } dbg_fsbuild("pass 2a complete\n"); + + if (dir_hardlinks) { + /* If we detected directory hardlinks earlier, *hopefully* + * they are gone now because some of the links were from + * dead directories which still had some old dirents lying + * around and not yet garbage-collected, but which have + * been discarded above. So clear the pino_nlink field + * in each directory, so that the final scan below can + * print appropriate warnings. */ + for_each_inode(i, c, ic) { + if (ic->flags & INO_FLAGS_IS_DIR) + ic->pino_nlink = 0; + } + } dbg_fsbuild("freeing temporary data structures\n"); /* Finally, we can scan again and free the dirent structs */ @@ -162,6 +178,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) while(ic->scan_dents) { fd = ic->scan_dents; ic->scan_dents = fd->next; + /* We do use the pino_nlink field to count nlink of + * directories during fs build, so set it to the + * parent ino# now. Now that there's hopefully only + * one. */ + if (fd->type == DT_DIR) { + if (!fd->ic) { + /* We'll have complained about it and marked the coresponding + raw node obsolete already. Just skip it. */ + continue; + } + + /* We *have* to have set this in jffs2_build_inode_pass1() */ + BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); + + /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks + * is set. Otherwise, we know this should never trigger anyway, so + * we don't do the check. And ic->pino_nlink still contains the nlink + * value (which is 1). */ + if (dir_hardlinks && fd->ic->pino_nlink) { + JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", + fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); + /* Should we unlink it from its previous parent? */ + } + + /* For directories, ic->pino_nlink holds that parent inode # */ + fd->ic->pino_nlink = ic->ino; + } jffs2_free_full_dirent(fd); } ic->scan_dents = NULL; @@ -240,11 +283,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, /* Reduce nlink of the child. If it's now zero, stick it on the dead_fds list to be cleaned up later. Else just free the fd */ - - if (fd->type == DT_DIR) - child_ic->pino_nlink = 0; - else - child_ic->pino_nlink--; + child_ic->pino_nlink--; if (!child_ic->pino_nlink) { dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index fa35ff79ab358..0637271f37701 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -194,6 +194,7 @@ struct jffs2_inode_cache { #define INO_STATE_CLEARING 6 /* In clear_inode() */ #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ +#define INO_FLAGS_IS_DIR 0x02 /* is a directory */ #define RAWNODE_CLASS_INODE_CACHE 0 #define RAWNODE_CLASS_XATTR_DATUM 1 @@ -249,7 +250,10 @@ struct jffs2_readinode_info struct jffs2_full_dirent { - struct jffs2_raw_node_ref *raw; + union { + struct jffs2_raw_node_ref *raw; + struct jffs2_inode_cache *ic; /* Just during part of build */ + }; struct jffs2_full_dirent *next; uint32_t version; uint32_t ino; /* == zero for unlink */ From b3c3bdf0959ee2c27305d70bc737da8d5153cdfc Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 3 Feb 2016 15:20:39 +0800 Subject: [PATCH 2045/2091] ALSA: hda/realtek - Support Dell headset mode for ALC225 [ Upstream commit cfc5a845e62853edd36e564c23c64588f4adcae6 ] Dell create new platform with ALC298 codec. This patch will enable headset mode for ALC225/ALC3253 platform. Signed-off-by: Kailang Yang Cc: # v4.4+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1cc2e6be0c009..2c1f2c8438a2d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5477,6 +5477,9 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, {} }; +#define ALC225_STANDARD_PINS \ + {0x12, 0xb7a60130}, \ + {0x21, 0x04211020} #define ALC255_STANDARD_PINS \ {0x18, 0x411111f0}, \ @@ -5534,6 +5537,12 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x1f, 0x411111f0} static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC225_STANDARD_PINS, + {0x14, 0x901701a0}), + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC225_STANDARD_PINS, + {0x14, 0x901701b0}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, ALC255_STANDARD_PINS, {0x12, 0x40300000}, From fdd575639f6beb61e90be54fcdd14dd9f9b9627f Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Thu, 25 Feb 2016 09:37:05 +0100 Subject: [PATCH 2046/2091] ALSA: hda - Fixup speaker pass-through control for nid 0x14 on ALC225 [ Upstream commit 2ae955774f29bbd7d16149cb0ae8d0319bf2ecc4 ] On one of the machines we enable, we found that the actual speaker volume did not always correspond to the volume set in alsamixer. This patch fixes that problem. This patch was orginally written by Kailang @ Realtek, I've rebased it to fit sound git master. Cc: stable@vger.kernel.org BugLink: https://bugs.launchpad.net/bugs/1549660 Co-Authored-By: Kailang Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2c1f2c8438a2d..d11e7d491c0ca 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3772,6 +3772,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, static void alc_headset_mode_default(struct hda_codec *codec) { + static struct coef_fw coef0225[] = { + UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10), + {} + }; static struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xc089), WRITE_COEF(0x45, 0xc489), @@ -3813,6 +3817,9 @@ static void alc_headset_mode_default(struct hda_codec *codec) }; switch (codec->core.vendor_id) { + case 0x10ec0225: + alc_process_coef_fw(codec, coef0225); + break; case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4628,6 +4635,7 @@ enum { ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, + ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5237,6 +5245,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Disable pass-through path for FRONT 14h */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x36 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 }, + {} + }, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5537,10 +5556,10 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x1f, 0x411111f0} static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { - SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x14, 0x901701a0}), - SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x14, 0x901701b0}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, From 715b09c999aeb1c6e48a1d10b93443d4cdbab01b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 25 Feb 2016 14:31:59 +0100 Subject: [PATCH 2047/2091] ALSA: hda - Fix headset support and noise on HP EliteBook 755 G2 [ Upstream commit f883982dc1b117f04579f0896821cd9f2e397f94 ] HP EliteBook 755 G2 with ALC3228 (ALC280) codec [103c:221c] requires the known fixup (ALC269_FIXUP_HEADSET_MIC) for making the headset mic working. Also, it suffers from the loopback noise problem, so we should disable aamix path as well. Reported-by: Derick Eddington Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d11e7d491c0ca..404be2d67086d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4636,6 +4636,7 @@ enum { ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, ALC255_FIXUP_DELL_SPK_NOISE, ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC280_FIXUP_HP_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -5256,6 +5257,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC280_FIXUP_HP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5356,6 +5363,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 91d212c02743084892b687fb5cf166fffc01d0f9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 23 Feb 2016 13:03:30 +0100 Subject: [PATCH 2048/2091] iommu/amd: Fix boot warning when device 00:00.0 is not iommu covered [ Upstream commit 38e45d02ea9f194b89d6bf41e52ccafc8e2c2b47 ] The setup code for the performance counters in the AMD IOMMU driver tests whether the counters can be written. It tests to setup a counter for device 00:00.0, which fails on systems where this particular device is not covered by the IOMMU. Fix this by not relying on device 00:00.0 but only on the IOMMU being present. Cc: stable@vger.kernel.org Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 450ef5001a65a..44c1c5701454a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -227,6 +227,10 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write); + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -1192,8 +1196,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* Check if the performance counters can be written to */ - if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || - (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || + if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) || + (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; @@ -2362,22 +2366,15 @@ u8 amd_iommu_pc_get_max_counters(u16 devid) } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) { - struct amd_iommu *iommu; u32 offset; u32 max_offset_lim; - /* Make sure the IOMMU PC resource is available */ - if (!amd_iommu_pc_present) - return -ENODEV; - - /* Locate the iommu associated with the device ID */ - iommu = amd_iommu_rlookup_table[devid]; - /* Check for valid iommu and pc register indexing */ - if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) + if (WARN_ON((fxn > 0x28) || (fxn & 7))) return -ENODEV; offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); @@ -2401,3 +2398,16 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); + +int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present || iommu == NULL) + return -ENODEV; + + return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn, + value, is_write); +} From 15115bf3b764c4f2b36ea202f45181fd18d4a574 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 10 Feb 2016 15:48:01 -0600 Subject: [PATCH 2049/2091] iommu/amd: Apply workaround for ATS write permission check [ Upstream commit 358875fd52ab8f00f66328cbf1a1d2486f265829 ] The AMD Family 15h Models 30h-3Fh (Kaveri) BIOS and Kernel Developer's Guide omitted part of the BIOS IOMMU L2 register setup specification. Without this setup the IOMMU L2 does not fully respect write permissions when handling an ATS translation request. The IOMMU L2 will set PTE dirty bit when handling an ATS translation with write permission request, even when PTE RW bit is clear. This may occur by direct translation (which would cause a PPR) or by prefetch request from the ATC. This is observed in practice when the IOMMU L2 modifies a PTE which maps a pagecache page. The ext4 filesystem driver BUGs when asked to writeback these (non-modified) pages. Enable ATS write permission check in the Kaveri IOMMU L2 if BIOS has not. Signed-off-by: Jay Cornwall Cc: # v3.19+ Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 44c1c5701454a..1750db0ef61cb 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1069,6 +1069,34 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90); } +/* + * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) + * Workaround: + * BIOS should enable ATS write permission check by setting + * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b + */ +static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) +{ + u32 value; + + if ((boot_cpu_data.x86 != 0x15) || + (boot_cpu_data.x86_model < 0x30) || + (boot_cpu_data.x86_model > 0x3f)) + return; + + /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ + value = iommu_read_l2(iommu, 0x47); + + if (value & BIT(0)) + return; + + /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ + iommu_write_l2(iommu, 0x47, value | BIT(0)); + + pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n", + dev_name(&iommu->dev->dev)); +} + /* * This function clues the initialization function for one IOMMU * together and also allocates the command buffer and programs the @@ -1343,6 +1371,7 @@ static int iommu_init_pci(struct amd_iommu *iommu) } amd_iommu_erratum_746_workaround(iommu); + amd_iommu_ats_write_check_workaround(iommu); iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, amd_iommu_groups, "ivhd%d", From d017f850a3b6f84e6d847c6cbb01eaf6ce61f4f6 Mon Sep 17 00:00:00 2001 From: Harvey Hunt Date: Wed, 24 Feb 2016 15:16:43 +0000 Subject: [PATCH 2050/2091] libata: Align ata_device's id on a cacheline [ Upstream commit 4ee34ea3a12396f35b26d90a094c75db95080baa ] The id buffer in ata_device is a DMA target, but it isn't explicitly cacheline aligned. Due to this, adjacent fields can be overwritten with stale data from memory on non coherent architectures. As a result, the kernel is sometimes unable to communicate with an ATA device. Fix this by ensuring that the id buffer is cacheline aligned. This issue is similar to that fixed by Commit 84bda12af31f ("libata: align ap->sector_buf"). Signed-off-by: Harvey Hunt Cc: linux-kernel@vger.kernel.org Cc: # 2.6.18 Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/libata.h b/include/linux/libata.h index e0e33787c485e..11c2dd114732a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -717,7 +717,7 @@ struct ata_device { union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ - }; + } ____cacheline_aligned; /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; From 18d609bb0b8c6823a750b32106be5685ca3daff7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 26 Feb 2016 12:28:40 +0100 Subject: [PATCH 2051/2091] KVM: x86: fix root cause for missed hardware breakpoints [ Upstream commit 70e4da7a8ff62f2775337b705f45c804bb450454 ] Commit 172b2386ed16 ("KVM: x86: fix missed hardware breakpoints", 2016-02-10) worked around a case where the debug registers are not loaded correctly on preemption and on the first entry to KVM_RUN. However, Xiao Guangrong pointed out that the root cause must be that KVM_DEBUGREG_BP_ENABLED is not being set correctly. This can indeed happen due to the lazy debug exit mechanism, which does not call kvm_update_dr7. Fix it by replacing the existing loop (more or less equivalent to kvm_update_dr0123) with calls to all the kvm_update_dr* functions. Cc: stable@vger.kernel.org # 4.1+ Fixes: 172b2386ed16a9143d9a456aae5ec87275c61489 Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0dbf28a6d83d1..42f1575ab58d9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2968,7 +2968,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) accumulate_steal_time(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6372,12 +6371,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * KVM_DEBUGREG_WONT_EXIT again. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { - int i; - WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops->sync_dirty_debug_regs(vcpu); - for (i = 0; i < KVM_NR_DB_REGS; i++) - vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + kvm_update_dr0123(vcpu); + kvm_update_dr6(vcpu); + kvm_update_dr7(vcpu); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* From bb2b7d4ee6fc2c2dea54c12df9c0aea15e1a019c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Feb 2016 18:55:31 +0000 Subject: [PATCH 2052/2091] x86/mpx: Fix off-by-one comparison with nr_registers [ Upstream commit 9bf148cb0812595bfdf5100bd2c07e9bec9c6ef5 ] In the unlikely event that regno == nr_registers then we get an array overrun on regoff because the invalid register check is currently off-by-one. Fix this with a check that regno is >= nr_registers instead. Detected with static analysis using CoverityScan. Fixes: fcc7ffd67991 "x86, mpx: Decode MPX instruction to get bound violation information" Signed-off-by: Colin Ian King Acked-by: Dave Hansen Cc: Borislav Petkov Cc: "Kirill A . Shutemov" Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1456512931-3388-1-git-send-email-colin.king@canonical.com Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index f738c61bc891e..6a3c774eaff69 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -142,7 +142,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, break; } - if (regno > nr_registers) { + if (regno >= nr_registers) { WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } From d347d0e9ae617bd44ca7679786ebf11a06d50372 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 26 Feb 2016 15:19:28 -0800 Subject: [PATCH 2053/2091] mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED [ Upstream commit ad33bb04b2a6cee6c1f99fabb15cddbf93ff0433 ] pmd_trans_unstable()/pmd_none_or_trans_huge_or_clear_bad() were introduced to locklessy (but atomically) detect when a pmd is a regular (stable) pmd or when the pmd is unstable and can infinitely transition from pmd_none() and pmd_trans_huge() from under us, while only holding the mmap_sem for reading (for writing not). While holding the mmap_sem only for reading, MADV_DONTNEED can run from under us and so before we can assume the pmd to be a regular stable pmd we need to compare it against pmd_none() and pmd_trans_huge() in an atomic way, with pmd_trans_unstable(). The old pmd_trans_huge() left a tiny window for a race. Useful applications are unlikely to notice the difference as doing MADV_DONTNEED concurrently with a page fault would lead to undefined behavior. [akpm@linux-foundation.org: tidy up comment grammar/layout] Signed-off-by: Andrea Arcangeli Reported-by: Kirill A. Shutemov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 2a9e09870c201..701d9ad45c46f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3363,8 +3363,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(pmd_none(*pmd)) && unlikely(__pte_alloc(mm, vma, pmd, address))) return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) + /* + * If a huge pmd materialized under us just retry later. Use + * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd + * didn't become pmd_trans_huge under us and then back to pmd_none, as + * a result of MADV_DONTNEED running immediately after a huge pmd fault + * in a different thread of this mm, in turn leading to a misleading + * pmd_trans_huge() retval. All we have to ensure is that it is a + * regular pmd that we can walk with pte_offset_map() and we can do that + * through an atomic read in C, which is what pmd_trans_unstable() + * provides. + */ + if (unlikely(pmd_trans_unstable(pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd From 419ddc3099727a291a67d41498c3d1caddb75392 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 26 Feb 2016 15:19:31 -0800 Subject: [PATCH 2054/2091] mm: numa: quickly fail allocations for NUMA balancing on full nodes [ Upstream commit 8479eba7781fa9ffb28268840de6facfc12c35a7 ] Commit 4167e9b2cf10 ("mm: remove GFP_THISNODE") removed the GFP_THISNODE flag combination due to confusing semantics. It noted that alloc_misplaced_dst_page() was one such user after changes made by commit e97ca8e5b864 ("mm: fix GFP_THISNODE callers and clarify"). Unfortunately when GFP_THISNODE was removed, users of alloc_misplaced_dst_page() started waking kswapd and entering direct reclaim because the wrong GFP flags are cleared. The consequence is that workloads that used to fit into memory now get reclaimed which is addressed by this patch. The problem can be demonstrated with "mutilate" that exercises memcached which is software dedicated to memory object caching. The configuration uses 80% of memory and is run 3 times for varying numbers of clients. The results on a 4-socket NUMA box are mutilate 4.4.0 4.4.0 vanilla numaswap-v1 Hmean 1 8394.71 ( 0.00%) 8395.32 ( 0.01%) Hmean 4 30024.62 ( 0.00%) 34513.54 ( 14.95%) Hmean 7 32821.08 ( 0.00%) 70542.96 (114.93%) Hmean 12 55229.67 ( 0.00%) 93866.34 ( 69.96%) Hmean 21 39438.96 ( 0.00%) 85749.21 (117.42%) Hmean 30 37796.10 ( 0.00%) 50231.49 ( 32.90%) Hmean 47 18070.91 ( 0.00%) 38530.13 (113.22%) The metric is queries/second with the more the better. The results are way outside of the noise and the reason for the improvement is obvious from some of the vmstats 4.4.0 4.4.0 vanillanumaswap-v1r1 Minor Faults 1929399272 2146148218 Major Faults 19746529 3567 Swap Ins 57307366 9913 Swap Outs 50623229 17094 Allocation stalls 35909 443 DMA allocs 0 0 DMA32 allocs 72976349 170567396 Normal allocs 5306640898 5310651252 Movable allocs 0 0 Direct pages scanned 404130893 799577 Kswapd pages scanned 160230174 0 Kswapd pages reclaimed 55928786 0 Direct pages reclaimed 1843936 41921 Page writes file 2391 0 Page writes anon 50623229 17094 The vanilla kernel is swapping like crazy with large amounts of direct reclaim and kswapd activity. The figures are aggregate but it's known that the bad activity is throughout the entire test. Note that simple streaming anon/file memory consumers also see this problem but it's not as obvious. In those cases, kswapd is awake when it should not be. As there are at least two reclaim-related bugs out there, it's worth spelling out the user-visible impact. This patch only addresses bugs related to excessive reclaim on NUMA hardware when the working set is larger than a NUMA node. There is a bug related to high kswapd CPU usage but the reports are against laptops and other UMA hardware and is not addressed by this patch. Signed-off-by: Mel Gorman Cc: Vlastimil Babka Cc: Johannes Weiner Cc: David Rientjes Cc: [4.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 2c37b1a44a8c8..8c4841a6dc4ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1557,7 +1557,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & - ~GFP_IOFS, 0); + ~__GFP_WAIT, 0); return newpage; } From 6cb69cb2840d3c18a469d6c42dbdb060bdf2bb04 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 25 Feb 2016 18:17:38 +0100 Subject: [PATCH 2055/2091] hpfs: don't truncate the file when delete fails [ Upstream commit b6853f78e763d42c7a158d8de3549c9827c604ab ] The delete opration can allocate additional space on the HPFS filesystem due to btree split. The HPFS driver checks in advance if there is available space, so that it won't corrupt the btree if we run out of space during splitting. If there is not enough available space, the HPFS driver attempted to truncate the file, but this results in a deadlock since the commit 7dd29d8d865efdb00c0542a5d2c87af8c52ea6c7 ("HPFS: Introduce a global mutex and lock it on every callback from VFS"). This patch removes the code that tries to truncate the file and -ENOSPC is returned instead. If the user hits -ENOSPC on delete, he should try to delete other files (that are stored in a leaf btree node), so that the delete operation will make some space for deleting the file stored in non-leaf btree node. Reported-by: Al Viro Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # 2.6.39+ Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/hpfs/namei.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9e92c9c2d3195..b5f3cc7274f62 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -377,12 +377,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); dnode_secno dno; int r; - int rep = 0; int err; hpfs_lock(dir->i_sb); hpfs_adjust_length(name, &len); -again: + err = -ENOENT; de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); if (!de) @@ -402,33 +401,9 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) hpfs_error(dir->i_sb, "there was error when removing dirent"); err = -EFSERROR; break; - case 2: /* no space for deleting, try to truncate file */ - + case 2: /* no space for deleting */ err = -ENOSPC; - if (rep++) - break; - - dentry_unhash(dentry); - if (!d_unhashed(dentry)) { - hpfs_unlock(dir->i_sb); - return -ENOSPC; - } - if (generic_permission(inode, MAY_WRITE) || - !S_ISREG(inode->i_mode) || - get_write_access(inode)) { - d_rehash(dentry); - } else { - struct iattr newattrs; - /*pr_info("truncating file before delete.\n");*/ - newattrs.ia_size = 0; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - err = notify_change(dentry, &newattrs, NULL); - put_write_access(inode); - if (!err) - goto again; - } - hpfs_unlock(dir->i_sb); - return -ENOSPC; + break; default: drop_nlink(inode); err = 0; From 3960cde3e356057bd60adce1b625a7d178b9581c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Feb 2016 19:17:33 -0500 Subject: [PATCH 2056/2091] do_last(): don't let a bogus return value from ->open() et.al. to confuse us [ Upstream commit c80567c82ae4814a41287618e315a60ecf513be6 ] ... into returning a positive to path_openat(), which would interpret that as "symlink had been encountered" and proceed to corrupt memory, etc. It can only happen due to a bug in some ->open() instance or in some LSM hook, etc., so we report any such event *and* make sure it doesn't trick us into further unpleasantness. Cc: stable@vger.kernel.org # v3.6+, at least Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index ccd7f98d85b9f..438572278a9e3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3152,6 +3152,10 @@ static int do_last(struct nameidata *nd, struct path *path, goto exit_fput; } out: + if (unlikely(error > 0)) { + WARN_ON(1); + error = -EINVAL; + } if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); From 9b77cd137fd841d7a14e1c9428cfc49f4df0306e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Feb 2016 19:23:16 -0500 Subject: [PATCH 2057/2091] namei: ->d_inode of a pinned dentry is stable only for positives [ Upstream commit d4565649b6d6923369112758212b851adc407f0c ] both do_last() and walk_component() risk picking a NULL inode out of dentry about to become positive, *then* checking its flags and seeing that it's not negative anymore and using (already stale by then) value they'd fetched earlier. Usually ends up oopsing soon after that... Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 438572278a9e3..f3cc848da8bc4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1619,10 +1619,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path, if (err < 0) goto out_err; - inode = path->dentry->d_inode; err = -ENOENT; if (d_is_negative(path->dentry)) goto out_path_put; + inode = path->dentry->d_inode; } if (should_follow_link(path->dentry, follow)) { @@ -3078,6 +3078,7 @@ static int do_last(struct nameidata *nd, struct path *path, path_to_nameidata(path, nd); goto out; } + inode = path->dentry->d_inode; finish_lookup: /* we _can_ be in RCU mode here */ if (should_follow_link(path->dentry, !symlink_ok)) { From 1590808b43559d8330599158453f28f1b16ffd54 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 16:31:39 +0200 Subject: [PATCH 2058/2091] vfio: fix ioctl error handling [ Upstream commit 8160c4e455820d5008a1116d2dca35f0363bb062 ] Calling return copy_to_user(...) in an ioctl will not do the right thing if there's a pagefault: copy_to_user returns the number of bytes not copied in this case. Fix up vfio to do return copy_to_user(...)) ? -EFAULT : 0; everywhere. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci.c | 9 ++++++--- drivers/vfio/platform/vfio_platform_common.c | 9 ++++++--- drivers/vfio/vfio_iommu_type1.c | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e9851add6f4ef..c0f4ab83aaa83 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -446,7 +446,8 @@ static long vfio_pci_ioctl(void *device_data, info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct pci_dev *pdev = vdev->pdev; @@ -520,7 +521,8 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; } - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -555,7 +557,8 @@ static long vfio_pci_ioctl(void *device_data, else info.flags |= VFIO_IRQ_INFO_NORESIZE; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index abcff7a1aa667..973b24ffe3323 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -163,7 +163,8 @@ static long vfio_platform_ioctl(void *device_data, info.num_regions = vdev->num_regions; info.num_irqs = vdev->num_irqs; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; @@ -184,7 +185,8 @@ static long vfio_platform_ioctl(void *device_data, info.size = vdev->regions[info.index].size; info.flags = vdev->regions[info.index].flags; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -203,7 +205,8 @@ static long vfio_platform_ioctl(void *device_data, info.flags = vdev->irqs[info.index].flags; info.count = vdev->irqs[info.index].count; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 57d8c37a002b0..0922165407567 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -986,7 +986,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.iova_pgsizes = vfio_pgsize_bitmap(iommu); - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_IOMMU_MAP_DMA) { struct vfio_iommu_type1_dma_map map; @@ -1019,7 +1020,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (ret) return ret; - return copy_to_user((void __user *)arg, &unmap, minsz); + return copy_to_user((void __user *)arg, &unmap, minsz) ? + -EFAULT : 0; } return -ENOTTY; From 3aa7c24c31ee77017718dea1e4e43a2cbc969b53 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 27 Feb 2016 17:52:42 +0100 Subject: [PATCH 2059/2091] ALSA: ctl: Fix ioctls for X32 ABI [ Upstream commit 6236d8bb2afcfe71b88ecea554e0dc638090a45f ] The X32 ABI takes the same alignment like x86-64, and this may result in the incompatible struct size from ia32. Unfortunately, we hit this in some control ABI: struct snd_ctl_elem_value differs between them due to the position of 64bit variable array. This ends up with the unknown ioctl (ENOTTY) error. The fix is to add the compat entries for the new aligned struct. Reported-and-tested-by: Steven Newbury Cc: # v3.4+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/control_compat.c | 90 ++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index b9c0910fb8c4e..0608f216f3592 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -170,6 +170,19 @@ struct snd_ctl_elem_value32 { unsigned char reserved[128]; }; +#ifdef CONFIG_X86_X32 +/* x32 has a different alignment for 64bit values from ia32 */ +struct snd_ctl_elem_value_x32 { + struct snd_ctl_elem_id id; + unsigned int indirect; /* bit-field causes misalignment */ + union { + s32 integer[128]; + unsigned char data[512]; + s64 integer64[64]; + } value; + unsigned char reserved[128]; +}; +#endif /* CONFIG_X86_X32 */ /* get the value type and count of the control */ static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id, @@ -219,9 +232,11 @@ static int get_elem_size(int type, int count) static int copy_ctl_value_from_user(struct snd_card *card, struct snd_ctl_elem_value *data, - struct snd_ctl_elem_value32 __user *data32, + void __user *userdata, + void __user *valuep, int *typep, int *countp) { + struct snd_ctl_elem_value32 __user *data32 = userdata; int i, type, size; int uninitialized_var(count); unsigned int indirect; @@ -239,8 +254,9 @@ static int copy_ctl_value_from_user(struct snd_card *card, if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < count; i++) { + s32 __user *intp = valuep; int val; - if (get_user(val, &data32->value.integer[i])) + if (get_user(val, &intp[i])) return -EFAULT; data->value.integer.value[i] = val; } @@ -250,8 +266,7 @@ static int copy_ctl_value_from_user(struct snd_card *card, dev_err(card->dev, "snd_ioctl32_ctl_elem_value: unknown type %d\n", type); return -EINVAL; } - if (copy_from_user(data->value.bytes.data, - data32->value.data, size)) + if (copy_from_user(data->value.bytes.data, valuep, size)) return -EFAULT; } @@ -261,7 +276,8 @@ static int copy_ctl_value_from_user(struct snd_card *card, } /* restore the value to 32bit */ -static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32, +static int copy_ctl_value_to_user(void __user *userdata, + void __user *valuep, struct snd_ctl_elem_value *data, int type, int count) { @@ -270,22 +286,22 @@ static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32, if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < count; i++) { + s32 __user *intp = valuep; int val; val = data->value.integer.value[i]; - if (put_user(val, &data32->value.integer[i])) + if (put_user(val, &intp[i])) return -EFAULT; } } else { size = get_elem_size(type, count); - if (copy_to_user(data32->value.data, - data->value.bytes.data, size)) + if (copy_to_user(valuep, data->value.bytes.data, size)) return -EFAULT; } return 0; } -static int snd_ctl_elem_read_user_compat(struct snd_card *card, - struct snd_ctl_elem_value32 __user *data32) +static int ctl_elem_read_user(struct snd_card *card, + void __user *userdata, void __user *valuep) { struct snd_ctl_elem_value *data; int err, type, count; @@ -294,7 +310,9 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card, if (data == NULL) return -ENOMEM; - if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0) + err = copy_ctl_value_from_user(card, data, userdata, valuep, + &type, &count); + if (err < 0) goto error; snd_power_lock(card); @@ -303,14 +321,15 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card, err = snd_ctl_elem_read(card, data); snd_power_unlock(card); if (err >= 0) - err = copy_ctl_value_to_user(data32, data, type, count); + err = copy_ctl_value_to_user(userdata, valuep, data, + type, count); error: kfree(data); return err; } -static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, - struct snd_ctl_elem_value32 __user *data32) +static int ctl_elem_write_user(struct snd_ctl_file *file, + void __user *userdata, void __user *valuep) { struct snd_ctl_elem_value *data; struct snd_card *card = file->card; @@ -320,7 +339,9 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, if (data == NULL) return -ENOMEM; - if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0) + err = copy_ctl_value_from_user(card, data, userdata, valuep, + &type, &count); + if (err < 0) goto error; snd_power_lock(card); @@ -329,12 +350,39 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, err = snd_ctl_elem_write(card, file, data); snd_power_unlock(card); if (err >= 0) - err = copy_ctl_value_to_user(data32, data, type, count); + err = copy_ctl_value_to_user(userdata, valuep, data, + type, count); error: kfree(data); return err; } +static int snd_ctl_elem_read_user_compat(struct snd_card *card, + struct snd_ctl_elem_value32 __user *data32) +{ + return ctl_elem_read_user(card, data32, &data32->value); +} + +static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, + struct snd_ctl_elem_value32 __user *data32) +{ + return ctl_elem_write_user(file, data32, &data32->value); +} + +#ifdef CONFIG_X86_X32 +static int snd_ctl_elem_read_user_x32(struct snd_card *card, + struct snd_ctl_elem_value_x32 __user *data32) +{ + return ctl_elem_read_user(card, data32, &data32->value); +} + +static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file, + struct snd_ctl_elem_value_x32 __user *data32) +{ + return ctl_elem_write_user(file, data32, &data32->value); +} +#endif /* CONFIG_X86_X32 */ + /* add or replace a user control */ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, struct snd_ctl_elem_info32 __user *data32, @@ -393,6 +441,10 @@ enum { SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32), SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32), SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32), +#ifdef CONFIG_X86_X32 + SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32), + SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32), +#endif /* CONFIG_X86_X32 */ }; static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -431,6 +483,12 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_ctl_elem_add_compat(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE32: return snd_ctl_elem_add_compat(ctl, argp, 1); +#ifdef CONFIG_X86_X32 + case SNDRV_CTL_IOCTL_ELEM_READ_X32: + return snd_ctl_elem_read_user_x32(ctl->card, argp); + case SNDRV_CTL_IOCTL_ELEM_WRITE_X32: + return snd_ctl_elem_write_user_x32(ctl, argp); +#endif /* CONFIG_X86_X32 */ } down_read(&snd_ioctl_rwsem); From bffe692e21f78b45735efd8aa0dd17f7141888db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:23:09 +0100 Subject: [PATCH 2060/2091] ALSA: pcm: Fix ioctls for X32 ABI [ Upstream commit 513ace79b657e2022a592e77f24074e088681ecc ] X32 ABI uses the 64bit timespec in addition to 64bit alignment of 64bit values. This leads to incompatibilities in some PCM ioctls involved with snd_pcm_channel_info, snd_pcm_status and snd_pcm_sync_ptr structs. Fix the PCM compat ABI for these ioctls like the previous commit for ctl API. Reported-by: Steven Newbury Cc: # v3.4+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/pcm_compat.c | 177 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 9630e9f72b7ba..1f64ab0c2a95a 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -183,6 +183,14 @@ static int snd_pcm_ioctl_channel_info_compat(struct snd_pcm_substream *substream return err; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */ +static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, + struct snd_pcm_channel_info __user *src); +#define snd_pcm_ioctl_channel_info_x32(s, p) \ + snd_pcm_channel_info_user(s, p) +#endif /* CONFIG_X86_X32 */ + struct snd_pcm_status32 { s32 state; struct compat_timespec trigger_tstamp; @@ -243,6 +251,71 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream, return err; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_pcm_status_x32 { + s32 state; + u32 rsvd; /* alignment */ + struct timespec trigger_tstamp; + struct timespec tstamp; + u32 appl_ptr; + u32 hw_ptr; + s32 delay; + u32 avail; + u32 avail_max; + u32 overrange; + s32 suspended_state; + u32 audio_tstamp_data; + struct timespec audio_tstamp; + struct timespec driver_tstamp; + u32 audio_tstamp_accuracy; + unsigned char reserved[52-2*sizeof(struct timespec)]; +} __packed; + +#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst)) + +static int snd_pcm_status_user_x32(struct snd_pcm_substream *substream, + struct snd_pcm_status_x32 __user *src, + bool ext) +{ + struct snd_pcm_status status; + int err; + + memset(&status, 0, sizeof(status)); + /* + * with extension, parameters are read/write, + * get audio_tstamp_data from user, + * ignore rest of status structure + */ + if (ext && get_user(status.audio_tstamp_data, + (u32 __user *)(&src->audio_tstamp_data))) + return -EFAULT; + err = snd_pcm_status(substream, &status); + if (err < 0) + return err; + + if (clear_user(src, sizeof(*src))) + return -EFAULT; + if (put_user(status.state, &src->state) || + put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) || + put_timespec(&status.tstamp, &src->tstamp) || + put_user(status.appl_ptr, &src->appl_ptr) || + put_user(status.hw_ptr, &src->hw_ptr) || + put_user(status.delay, &src->delay) || + put_user(status.avail, &src->avail) || + put_user(status.avail_max, &src->avail_max) || + put_user(status.overrange, &src->overrange) || + put_user(status.suspended_state, &src->suspended_state) || + put_user(status.audio_tstamp_data, &src->audio_tstamp_data) || + put_timespec(&status.audio_tstamp, &src->audio_tstamp) || + put_timespec(&status.driver_tstamp, &src->driver_tstamp) || + put_user(status.audio_tstamp_accuracy, &src->audio_tstamp_accuracy)) + return -EFAULT; + + return err; +} +#endif /* CONFIG_X86_X32 */ + /* both for HW_PARAMS and HW_REFINE */ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream, int refine, @@ -469,6 +542,93 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_pcm_mmap_status_x32 { + s32 state; + s32 pad1; + u32 hw_ptr; + u32 pad2; /* alignment */ + struct timespec tstamp; + s32 suspended_state; + struct timespec audio_tstamp; +} __packed; + +struct snd_pcm_mmap_control_x32 { + u32 appl_ptr; + u32 avail_min; +}; + +struct snd_pcm_sync_ptr_x32 { + u32 flags; + u32 rsvd; /* alignment */ + union { + struct snd_pcm_mmap_status_x32 status; + unsigned char reserved[64]; + } s; + union { + struct snd_pcm_mmap_control_x32 control; + unsigned char reserved[64]; + } c; +} __packed; + +static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, + struct snd_pcm_sync_ptr_x32 __user *src) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + volatile struct snd_pcm_mmap_status *status; + volatile struct snd_pcm_mmap_control *control; + u32 sflags; + struct snd_pcm_mmap_control scontrol; + struct snd_pcm_mmap_status sstatus; + snd_pcm_uframes_t boundary; + int err; + + if (snd_BUG_ON(!runtime)) + return -EINVAL; + + if (get_user(sflags, &src->flags) || + get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || + get_user(scontrol.avail_min, &src->c.control.avail_min)) + return -EFAULT; + if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { + err = snd_pcm_hwsync(substream); + if (err < 0) + return err; + } + status = runtime->status; + control = runtime->control; + boundary = recalculate_boundary(runtime); + if (!boundary) + boundary = 0x7fffffff; + snd_pcm_stream_lock_irq(substream); + /* FIXME: we should consider the boundary for the sync from app */ + if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) + control->appl_ptr = scontrol.appl_ptr; + else + scontrol.appl_ptr = control->appl_ptr % boundary; + if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) + control->avail_min = scontrol.avail_min; + else + scontrol.avail_min = control->avail_min; + sstatus.state = status->state; + sstatus.hw_ptr = status->hw_ptr % boundary; + sstatus.tstamp = status->tstamp; + sstatus.suspended_state = status->suspended_state; + sstatus.audio_tstamp = status->audio_tstamp; + snd_pcm_stream_unlock_irq(substream); + if (put_user(sstatus.state, &src->s.status.state) || + put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || + put_timespec(&sstatus.tstamp, &src->s.status.tstamp) || + put_user(sstatus.suspended_state, &src->s.status.suspended_state) || + put_timespec(&sstatus.audio_tstamp, &src->s.status.audio_tstamp) || + put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || + put_user(scontrol.avail_min, &src->c.control.avail_min)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_X86_X32 */ /* */ @@ -487,7 +647,12 @@ enum { SNDRV_PCM_IOCTL_WRITEN_FRAMES32 = _IOW('A', 0x52, struct snd_xfern32), SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32), SNDRV_PCM_IOCTL_SYNC_PTR32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr32), - +#ifdef CONFIG_X86_X32 + SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info), + SNDRV_PCM_IOCTL_STATUS_X32 = _IOR('A', 0x20, struct snd_pcm_status_x32), + SNDRV_PCM_IOCTL_STATUS_EXT_X32 = _IOWR('A', 0x24, struct snd_pcm_status_x32), + SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32), +#endif /* CONFIG_X86_X32 */ }; static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -559,6 +724,16 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l return snd_pcm_ioctl_rewind_compat(substream, argp); case SNDRV_PCM_IOCTL_FORWARD32: return snd_pcm_ioctl_forward_compat(substream, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_PCM_IOCTL_STATUS_X32: + return snd_pcm_status_user_x32(substream, argp, false); + case SNDRV_PCM_IOCTL_STATUS_EXT_X32: + return snd_pcm_status_user_x32(substream, argp, true); + case SNDRV_PCM_IOCTL_SYNC_PTR_X32: + return snd_pcm_ioctl_sync_ptr_x32(substream, argp); + case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32: + return snd_pcm_ioctl_channel_info_x32(substream, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; From e59edea51ad3491f1c110acc1a3f2b0b6ee31f62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:28:08 +0100 Subject: [PATCH 2061/2091] ALSA: rawmidi: Fix ioctls X32 ABI [ Upstream commit 2251fbbc1539f05b0b206b37a602d5776be37252 ] Like the previous fixes for ctl and PCM, we need a fix for incompatible X32 ABI regarding the rawmidi: namely, struct snd_rawmidi_status has the timespec, and the size and the alignment on X32 differ from IA32. This patch fixes the incompatible ioctl for X32. Cc: # v3.4+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/rawmidi_compat.c | 53 +++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index 5268c1f58c25b..09a89094dcf72 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -94,9 +94,58 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_rawmidi_status_x32 { + s32 stream; + u32 rsvd; /* alignment */ + struct timespec tstamp; + u32 avail; + u32 xruns; + unsigned char reserved[16]; +} __attribute__((packed)); + +#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst)) + +static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, + struct snd_rawmidi_status_x32 __user *src) +{ + int err; + struct snd_rawmidi_status status; + + if (rfile->output == NULL) + return -EINVAL; + if (get_user(status.stream, &src->stream)) + return -EFAULT; + + switch (status.stream) { + case SNDRV_RAWMIDI_STREAM_OUTPUT: + err = snd_rawmidi_output_status(rfile->output, &status); + break; + case SNDRV_RAWMIDI_STREAM_INPUT: + err = snd_rawmidi_input_status(rfile->input, &status); + break; + default: + return -EINVAL; + } + if (err < 0) + return err; + + if (put_timespec(&status.tstamp, &src->tstamp) || + put_user(status.avail, &src->avail) || + put_user(status.xruns, &src->xruns)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_X86_X32 */ + enum { SNDRV_RAWMIDI_IOCTL_PARAMS32 = _IOWR('W', 0x10, struct snd_rawmidi_params32), SNDRV_RAWMIDI_IOCTL_STATUS32 = _IOWR('W', 0x20, struct snd_rawmidi_status32), +#ifdef CONFIG_X86_X32 + SNDRV_RAWMIDI_IOCTL_STATUS_X32 = _IOWR('W', 0x20, struct snd_rawmidi_status_x32), +#endif /* CONFIG_X86_X32 */ }; static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -115,6 +164,10 @@ static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsign return snd_rawmidi_ioctl_params_compat(rfile, argp); case SNDRV_RAWMIDI_IOCTL_STATUS32: return snd_rawmidi_ioctl_status_compat(rfile, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_RAWMIDI_IOCTL_STATUS_X32: + return snd_rawmidi_ioctl_status_x32(rfile, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; } From 6dcecec27d0ec5869503ff31b0316c3956a3aa89 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:36:14 +0100 Subject: [PATCH 2062/2091] ALSA: timer: Fix broken compat timer user status ioctl [ Upstream commit 3a72494ac2a3bd229db941d51e7efe2f6ccd947b ] The timer user status compat ioctl returned the bogus struct used for 64bit architectures instead of the 32bit one. This patch addresses it to return the proper struct. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer_compat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c index e05802ae6e1b1..8e7eddf35c6a4 100644 --- a/sound/core/timer_compat.c +++ b/sound/core/timer_compat.c @@ -70,13 +70,14 @@ static int snd_timer_user_status_compat(struct file *file, struct snd_timer_status32 __user *_status) { struct snd_timer_user *tu; - struct snd_timer_status status; + struct snd_timer_status32 status; tu = file->private_data; if (snd_BUG_ON(!tu->timeri)) return -ENXIO; memset(&status, 0, sizeof(status)); - status.tstamp = tu->tstamp; + status.tstamp.tv_sec = tu->tstamp.tv_sec; + status.tstamp.tv_nsec = tu->tstamp.tv_nsec; status.resolution = snd_timer_resolution(tu->timeri); status.lost = tu->timeri->lost; status.overrun = tu->overrun; From fb5b8ac0043cc2c0e8816341f8a875ab320563fe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:41:47 +0100 Subject: [PATCH 2063/2091] ALSA: timer: Fix ioctls for X32 ABI [ Upstream commit b24e7ad1fdc22177eb3e51584e1cfcb45d818488 ] X32 ABI takes the 64bit timespec, thus the timer user status ioctl becomes incompatible with IA32. This results in NOTTY error when the ioctl is issued. Meanwhile, this struct in X32 is essentially identical with the one in X86-64, so we can just bypassing to the existing code for this specific compat ioctl. Cc: # v3.4+ Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer_compat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c index 8e7eddf35c6a4..2e908225d754c 100644 --- a/sound/core/timer_compat.c +++ b/sound/core/timer_compat.c @@ -89,12 +89,21 @@ static int snd_timer_user_status_compat(struct file *file, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has the same struct as x86-64 */ +#define snd_timer_user_status_x32(file, s) \ + snd_timer_user_status(file, s) +#endif /* CONFIG_X86_X32 */ + /* */ enum { SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32), SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32), +#ifdef CONFIG_X86_X32 + SNDRV_TIMER_IOCTL_STATUS_X32 = _IOW('T', 0x14, struct snd_timer_status), +#endif /* CONFIG_X86_X32 */ }; static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -123,6 +132,10 @@ static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_timer_user_info_compat(file, argp); case SNDRV_TIMER_IOCTL_STATUS32: return snd_timer_user_status_compat(file, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_TIMER_IOCTL_STATUS_X32: + return snd_timer_user_status_x32(file, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; } From 94a7d752e43717119822fee8b04be903e694017e Mon Sep 17 00:00:00 2001 From: Justin Maggard Date: Tue, 9 Feb 2016 15:52:08 -0800 Subject: [PATCH 2064/2091] cifs: fix out-of-bounds access in lease parsing [ Upstream commit deb7deff2f00bdbbcb3d560dad2a89ef37df837d ] When opening a file, SMB2_open() attempts to parse the lease state from the SMB2 CREATE Response. However, the parsing code was not careful to ensure that the create contexts are not empty or invalid, which can lead to out- of-bounds memory access. This can be seen easily by trying to read a file from a OSX 10.11 SMB3 server. Here is sample crash output: BUG: unable to handle kernel paging request at ffff8800a1a77cc6 IP: [] SMB2_open+0x804/0x960 PGD 8f77067 PUD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 3 PID: 2876 Comm: cp Not tainted 4.5.0-rc3.x86_64.1+ #14 Hardware name: NETGEAR ReadyNAS 314 /ReadyNAS 314 , BIOS 4.6.5 10/11/2012 task: ffff880073cdc080 ti: ffff88005b31c000 task.ti: ffff88005b31c000 RIP: 0010:[] [] SMB2_open+0x804/0x960 RSP: 0018:ffff88005b31fa08 EFLAGS: 00010282 RAX: 0000000000000015 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffff88007eb8c8b0 RBP: ffff88005b31fad8 R08: 666666203d206363 R09: 6131613030383866 R10: 3030383866666666 R11: 00000000000002b0 R12: ffff8800660fd800 R13: ffff8800a1a77cc2 R14: 00000000424d53fe R15: ffff88005f5a28c0 FS: 00007f7c8a2897c0(0000) GS:ffff88007eb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff8800a1a77cc6 CR3: 000000005b281000 CR4: 00000000000006e0 Stack: ffff88005b31fa70 ffffffff88278789 00000000000001d3 ffff88005f5a2a80 ffffffff00000003 ffff88005d029d00 ffff88006fde05a0 0000000000000000 ffff88005b31fc78 ffff88006fde0780 ffff88005b31fb2f 0000000100000fe0 Call Trace: [] ? cifsConvertToUTF16+0x159/0x2d0 [] smb2_open_file+0x98/0x210 [] ? __kmalloc+0x1c/0xe0 [] cifs_open+0x2a4/0x720 [] do_dentry_open+0x1ff/0x310 [] ? cifsFileInfo_get+0x30/0x30 [] vfs_open+0x52/0x60 [] path_openat+0x170/0xf70 [] ? remove_wait_queue+0x48/0x50 [] do_filp_open+0x79/0xd0 [] ? __alloc_fd+0x3a/0x170 [] do_sys_open+0x114/0x1e0 [] SyS_open+0x19/0x20 [] entry_SYSCALL_64_fastpath+0x12/0x6a Code: 4d 8d 6c 07 04 31 c0 4c 89 ee e8 47 6f e5 ff 31 c9 41 89 ce 44 89 f1 48 c7 c7 28 b1 bd 88 31 c0 49 01 cd 4c 89 ee e8 2b 6f e5 ff <45> 0f b7 75 04 48 c7 c7 31 b1 bd 88 31 c0 4d 01 ee 4c 89 f6 e8 RIP [] SMB2_open+0x804/0x960 RSP CR2: ffff8800a1a77cc6 ---[ end trace d9f69ba64feee469 ]--- Signed-off-by: Justin Maggard Signed-off-by: Steve French CC: Stable Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 894f259d3989b..657a9c5c4fff2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1042,21 +1042,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, { char *data_offset; struct create_context *cc; - unsigned int next = 0; + unsigned int next; + unsigned int remaining; char *name; data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); + remaining = le32_to_cpu(rsp->CreateContextsLength); cc = (struct create_context *)data_offset; - do { - cc = (struct create_context *)((char *)cc + next); + while (remaining >= sizeof(struct create_context)) { name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) != 4 || - strncmp(name, "RqLs", 4)) { - next = le32_to_cpu(cc->Next); - continue; - } - return server->ops->parse_lease_buf(cc, epoch); - } while (next != 0); + if (le16_to_cpu(cc->NameLength) == 4 && + strncmp(name, "RqLs", 4) == 0) + return server->ops->parse_lease_buf(cc, epoch); + + next = le32_to_cpu(cc->Next); + if (!next) + break; + remaining -= next; + cc = (struct create_context *)((char *)cc + next); + } return 0; } From 78b821d76e779822877604052f06219294e9e038 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sat, 27 Feb 2016 11:58:18 +0300 Subject: [PATCH 2065/2091] CIFS: Fix SMB2+ interim response processing for read requests [ Upstream commit 6cc3b24235929b54acd5ecc987ef11a425bd209e ] For interim responses we only need to parse a header and update a number credits. Now it is done for all SMB2+ command except SMB2_READ which is wrong. Fix this by adding such processing. Signed-off-by: Pavel Shilovsky Tested-by: Shirish Pargaonkar CC: Stable Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifssmb.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f26ffbfc64d8b..f1a5067d54947 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1395,11 +1395,10 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock, * current bigbuf. */ static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = get_rfc1002_length(server->smallbuf); int remaining = rfclen + 4 - server->total_read; - struct cifs_readdata *rdata = mid->callback_data; while (remaining > 0) { int length; @@ -1413,10 +1412,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) remaining -= length; } - dequeue_mid(mid, rdata->result); return 0; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + int length; + struct cifs_readdata *rdata = mid->callback_data; + + length = discard_remaining_data(server); + dequeue_mid(mid, rdata->result); + return length; +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1445,6 +1454,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; + if (server->ops->is_status_pending && + server->ops->is_status_pending(buf, server, 0)) { + discard_remaining_data(server); + return -1; + } + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { From 092bb6bd432857cde601ab6f2716ef313f6f7023 Mon Sep 17 00:00:00 2001 From: Yadan Fan Date: Mon, 29 Feb 2016 14:44:57 +0800 Subject: [PATCH 2066/2091] Fix cifs_uniqueid_to_ino_t() function for s390x [ Upstream commit 1ee9f4bd1a97026a7b2d7ae9f1f74b45680d0003 ] This issue is caused by commit 02323db17e3a7 ("cifs: fix cifs_uniqueid_to_ino_t not to ever return 0"), when BITS_PER_LONG is 64 on s390x, the corresponding cifs_uniqueid_to_ino_t() function will cast 64-bit fileid to 32-bit by using (ino_t)fileid, because ino_t (typdefed __kernel_ino_t) is int type. It's defined in arch/s390/include/uapi/asm/posix_types.h #ifndef __s390x__ typedef unsigned long __kernel_ino_t; ... #else /* __s390x__ */ typedef unsigned int __kernel_ino_t; So the #ifdef condition is wrong for s390x, we can just still use one cifs_uniqueid_to_ino_t() function with comparing sizeof(ino_t) and sizeof(u64) to choose the correct execution accordingly. Signed-off-by: Yadan Fan CC: stable Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/cifsfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 252f5c15806bc..78a7b1d733543 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -31,19 +31,15 @@ * so that it will fit. We use hash_64 to convert the value to 31 bits, and * then add 1, to ensure that we don't end up with a 0 as the value. */ -#if BITS_PER_LONG == 64 static inline ino_t cifs_uniqueid_to_ino_t(u64 fileid) { + if ((sizeof(ino_t)) < (sizeof(u64))) + return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; + return (ino_t)fileid; + } -#else -static inline ino_t -cifs_uniqueid_to_ino_t(u64 fileid) -{ - return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; -} -#endif extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; From 222b341c1063cb7aa497d7ed051ccb60349f54bb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 17:32:07 +0200 Subject: [PATCH 2067/2091] arm/arm64: KVM: Fix ioctl error handling [ Upstream commit 4cad67fca3fc952d6f2ed9e799621f07666a560f ] Calling return copy_to_user(...) in an ioctl will not do the right thing if there's a pagefault: copy_to_user returns the number of bytes not copied in this case. Fix up kvm to do return copy_to_user(...)) ? -EFAULT : 0; everywhere. Cc: stable@vger.kernel.org Acked-by: Christoffer Dall Signed-off-by: Michael S. Tsirkin Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- arch/arm/kvm/guest.c | 2 +- arch/arm64/kvm/guest.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index d503fbb787d36..88993cc95e8e6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -155,7 +155,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } static unsigned long num_core_regs(void) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9535bd555d1d4..d4e04d2237c41 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -184,7 +184,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } /** From d850c054f3aeedf5e18290d14b097b5ed67fa9fb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 17:35:59 +0200 Subject: [PATCH 2068/2091] MIPS: kvm: Fix ioctl error handling. [ Upstream commit 887349f69f37e71e2a8bfbd743831625a0b2ff51 ] Calling return copy_to_user(...) or return copy_from_user in an ioctl will not do the right thing if there's a pagefault: copy_to_user/copy_from_user return the number of bytes not copied in this case. Fix up kvm on mips to do return copy_to_user(...)) ? -EFAULT : 0; and return copy_from_user(...)) ? -EFAULT : 0; everywhere. Signed-off-by: Michael S. Tsirkin Cc: Paolo Bonzini Cc: James Hogan Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Cc: kvm@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12709/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/kvm/mips.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 22ee0afc7d5dc..ace4ed7d41c6d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -700,7 +700,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_to_user(uaddr, vs, 16); + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -730,7 +730,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_from_user(vs, uaddr, 16); + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; } else { return -EINVAL; } From b9800dd1d9eeb4b5f81a485b270702a02b142b9b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Feb 2016 14:25:16 +0100 Subject: [PATCH 2069/2091] ALSA: hdspm: Fix wrong boolean ctl value accesses [ Upstream commit 537e48136295c5860a92138c5ea3959b9542868b ] snd-hdspm driver accesses enum item values (int) instead of boolean values (long) wrongly for some ctl elements. This patch fixes them. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/rme9652/hdspm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index cb666c73712d1..67c90f0599257 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -2261,7 +2261,7 @@ static int snd_hdspm_put_system_sample_rate(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - hdspm_set_dds_value(hdspm, ucontrol->value.enumerated.item[0]); + hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]); return 0; } @@ -4449,7 +4449,7 @@ static int snd_hdspm_get_tco_word_term(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = hdspm->tco->term; + ucontrol->value.integer.value[0] = hdspm->tco->term; return 0; } @@ -4460,8 +4460,8 @@ static int snd_hdspm_put_tco_word_term(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - if (hdspm->tco->term != ucontrol->value.enumerated.item[0]) { - hdspm->tco->term = ucontrol->value.enumerated.item[0]; + if (hdspm->tco->term != ucontrol->value.integer.value[0]) { + hdspm->tco->term = ucontrol->value.integer.value[0]; hdspm_tco_write(hdspm); From 6177e82a6b6586a057e2f00940e2e220b993547e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Feb 2016 14:32:42 +0100 Subject: [PATCH 2070/2091] ALSA: hdspm: Fix zero-division [ Upstream commit c1099c3294c2344110085a38c50e478a5992b368 ] HDSPM driver contains a code issuing zero-division potentially in system sample rate ctl code. This patch fixes it by not processing a zero or invalid rate value as a divisor, as well as excluding the invalid value to be passed via the given ctl element. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/rme9652/hdspm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 67c90f0599257..7f6190606f5e7 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1601,6 +1601,9 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate) { u64 n; + if (snd_BUG_ON(rate <= 0)) + return; + if (rate >= 112000) rate /= 4; else if (rate >= 56000) @@ -2215,6 +2218,8 @@ static int hdspm_get_system_sample_rate(struct hdspm *hdspm) } else { /* slave mode, return external sample rate */ rate = hdspm_external_sample_rate(hdspm); + if (!rate) + rate = hdspm->system_sample_rate; } } @@ -2260,7 +2265,10 @@ static int snd_hdspm_put_system_sample_rate(struct snd_kcontrol *kcontrol, ucontrol) { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + int rate = ucontrol->value.integer.value[0]; + if (rate < 27000 || rate > 207000) + return -EINVAL; hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]); return 0; } From 67d352da6e0e14bd5987661a2be95bbaaf4c3d53 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Feb 2016 14:26:43 +0100 Subject: [PATCH 2071/2091] ALSA: hdsp: Fix wrong boolean ctl value accesses [ Upstream commit eab3c4db193f5fcccf70e884de9a922ca2c63d80 ] snd-hdsp driver accesses enum item values (int) instead of boolean values (long) wrongly for some ctl elements. This patch fixes them. Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/rme9652/hdsp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index c19e021ccf668..11246280945dd 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -2878,7 +2878,7 @@ static int snd_hdsp_get_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl { struct hdsp *hdsp = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = hdsp_dds_offset(hdsp); + ucontrol->value.integer.value[0] = hdsp_dds_offset(hdsp); return 0; } @@ -2890,7 +2890,7 @@ static int snd_hdsp_put_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl if (!snd_hdsp_use_is_exclusive(hdsp)) return -EBUSY; - val = ucontrol->value.enumerated.item[0]; + val = ucontrol->value.integer.value[0]; spin_lock_irq(&hdsp->lock); if (val != hdsp_dds_offset(hdsp)) change = (hdsp_set_dds_offset(hdsp, val) == 0) ? 1 : 0; From 4950beba6231d72c54aa5f52d96487fdda8292ed Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 14:46:46 +0200 Subject: [PATCH 2072/2091] seqcount: Rename write_seqcount_barrier() [ Upstream commit a7c6f571ff51cc77d90dd54968f7c5c938c43998 ] I'll shortly be introducing another seqcount primitive that's useful to provide ordering semantics and would like to use the write_seqcount_barrier() name for that. Seeing how there's only one user of the current primitive, lets rename it to invalidate, as that appears what its doing. While there, employ lockdep_assert_held() instead of assert_spin_locked() to not generate debug code for regular kernels. Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: Oleg Nesterov Cc: wanpeng.li@linux.intel.com Cc: Paul McKenney Cc: Al Viro Cc: Linus Torvalds Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.279926217@infradead.org Signed-off-by: Thomas Gleixner Signed-off-by: Sasha Levin --- fs/dcache.c | 16 ++++++++-------- include/linux/seqlock.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 0046ab7d4f3de..d7fe995dd32df 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -322,17 +322,17 @@ static void dentry_free(struct dentry *dentry) } /** - * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups * @dentry: the target dentry * After this call, in-progress rcu-walk path lookup will fail. This * should be called after unhashing, and after changing d_inode (if * the dentry has not already been unhashed). */ -static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +static inline void dentry_rcuwalk_invalidate(struct dentry *dentry) { - assert_spin_locked(&dentry->d_lock); - /* Go through a barrier */ - write_seqcount_barrier(&dentry->d_seq); + lockdep_assert_held(&dentry->d_lock); + /* Go through am invalidation barrier */ + write_seqcount_invalidate(&dentry->d_seq); } /* @@ -372,7 +372,7 @@ static void dentry_unlink_inode(struct dentry * dentry) struct inode *inode = dentry->d_inode; __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -494,7 +494,7 @@ void __d_drop(struct dentry *dentry) __hlist_bl_del(&dentry->d_hash); dentry->d_hash.pprev = NULL; hlist_bl_unlock(b); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -1758,7 +1758,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); __d_set_inode_and_type(dentry, inode, add_flags); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a391cee..c07e3a5360990 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -266,13 +266,13 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * write_seqcount_barrier - invalidate in-progress read-side seq operations + * write_seqcount_invalidate - invalidate in-progress read-side seq operations * @s: pointer to seqcount_t * - * After write_seqcount_barrier, no read-side seq operations will complete + * After write_seqcount_invalidate, no read-side seq operations will complete * successfully and see data older than this. */ -static inline void write_seqcount_barrier(seqcount_t *s) +static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); s->sequence+=2; From c8ce76e3c6cd937e8b3fd8ae3573f23767b70eca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 29 Feb 2016 12:12:46 -0500 Subject: [PATCH 2073/2091] use ->d_seq to get coherency between ->d_inode and ->d_flags [ Upstream commit a528aca7f359f4b0b1d72ae406097e491a5ba9ea ] Games with ordering and barriers are way too brittle. Just bump ->d_seq before and after updating ->d_inode and ->d_flags type bits, so that verifying ->d_seq would guarantee they are coherent. Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/dcache.c | 20 +++++--------------- include/linux/dcache.h | 4 +--- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index d7fe995dd32df..10bce74c427f0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } -/* - * Make sure other CPUs see the inode attached before the type is set. - */ static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) @@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, unsigned flags; dentry->d_inode = inode; - smp_wmb(); flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); flags |= type_flags; WRITE_ONCE(dentry->d_flags, flags); } -/* - * Ideally, we want to make sure that other CPUs see the flags cleared before - * the inode is detached, but this is really a violation of RCU principles - * since the ordering suggests we should always set inode before flags. - * - * We should instead replace or discard the entire dentry - but that sucks - * performancewise on mass deletion/rename. - */ static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); - smp_wmb(); dentry->d_inode = NULL; } @@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - dentry_rcuwalk_invalidate(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -1757,8 +1746,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); - dentry_rcuwalk_invalidate(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 167ec0934049d..ca9df45217344 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -408,9 +408,7 @@ static inline bool d_mountpoint(const struct dentry *dentry) */ static inline unsigned __d_entry_type(const struct dentry *dentry) { - unsigned type = READ_ONCE(dentry->d_flags); - smp_rmb(); - return type & DCACHE_ENTRY_TYPE; + return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) From 369ac9cea85f7eea546ddea7e712672ffeabd5ce Mon Sep 17 00:00:00 2001 From: Patrik Halfar Date: Sat, 20 Feb 2016 18:49:56 +0100 Subject: [PATCH 2074/2091] USB: qcserial: add Dell Wireless 5809e Gobi 4G HSPA+ (rev3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 013dd239d6220a4e0dfdf0d45a82c34f1fd73deb ] New revision of Dell Wireless 5809e Gobi 4G HSPA+ Mobile Broadband Card has new idProduct. Bus 002 Device 006: ID 413c:81b3 Dell Computer Corp. Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x413c Dell Computer Corp. idProduct 0x81b3 bcdDevice 0.06 iManufacturer 1 Sierra Wireless, Incorporated iProduct 2 Dell Wireless 5809e Gobi™ 4G HSPA+ Mobile Broadband Card iSerial 3 bNumConfigurations 2 Signed-off-by: Patrik Halfar Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 514fa91cf74e4..f1eb03eed5cf6 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -163,6 +163,7 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81a8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ From fcc00f16ba5e947136d81d2c8efbccce1c564548 Mon Sep 17 00:00:00 2001 From: Vittorio Alfieri Date: Sun, 28 Feb 2016 14:40:24 +0100 Subject: [PATCH 2075/2091] USB: cp210x: Add ID for Parrot NMEA GPS Flight Recorder [ Upstream commit 3c4c615d70c8cbdc8ba8c79ed702640930652a79 ] The Parrot NMEA GPS Flight Recorder is a USB composite device consisting of hub, flash storage, and cp210x usb to serial chip. It is an accessory to the mass-produced Parrot AR Drone 2. The device emits standard NMEA messages which make the it compatible with NMEA compatible software. It was tested using gpsd version 3.11-3 as an NMEA interpreter and using the official Parrot Flight Recorder. Signed-off-by: Vittorio Alfieri Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index a7caf53d8b5e6..7a76fe4c2f9ea 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -164,6 +164,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ + { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ From 09757912c869346ec88920263aa37a3a854aa038 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 29 Feb 2016 15:36:11 +0100 Subject: [PATCH 2076/2091] USB: serial: option: add support for Telit LE922 PID 0x1045 [ Upstream commit 5deef5551c77e488922cc4bf4bc76df63be650d0 ] This patch adds support for 0x1045 PID of Telit LE922. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2590f1e467f28..577e8a4f3fe0f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_UE910_V2 0x1012 #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 +#define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 @@ -1195,6 +1196,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), From 63e289a204c69536706706e31a6805035029852d Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Mon, 29 Feb 2016 16:39:57 +0100 Subject: [PATCH 2077/2091] USB: serial: option: add support for Quectel UC20 [ Upstream commit c0992d0f54847d0d1d85c60fcaa054f175ab1ccd ] Add support for Quectel UC20 and blacklist the QMI interface. Signed-off-by: Yegor Yefremov Cc: stable [johan: amend commit message ] Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 577e8a4f3fe0f..ce3d6af977b74 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1145,6 +1145,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&sierra_mc73xx_blacklist }, /* MC73xx */ { USB_DEVICE_INTERFACE_CLASS(SIERRA_VENDOR_ID, 0x9041, 0xff), .driver_info = (kernel_ulong_t)&sierra_mc73xx_blacklist }, /* MC7305/MC7355 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), From 74a8e4d036f86999d15a4f1c24fc6f33423eb338 Mon Sep 17 00:00:00 2001 From: Dennis Kadioglu Date: Tue, 1 Mar 2016 14:23:29 +0100 Subject: [PATCH 2078/2091] ALSA: usb-audio: Add a quirk for Plantronics DA45 [ Upstream commit 17e2df4613be57d0fab68df749f6b8114e453152 ] Plantronics DA45 does not support reading the sample rate which leads to many lines of "cannot get freq at ep 0x4" and "cannot get freq at ep 0x84". This patch adds the USB ID of the DA45 to quirks.c and avoids those error messages. Signed-off-by: Dennis Kadioglu Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 37d8ababfc043..a4d03e5da3e08 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1121,6 +1121,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ From ebe15c00057a4efad2bae1a90b04a20766cefc8c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 1 Mar 2016 18:30:18 +0100 Subject: [PATCH 2079/2091] ALSA: seq: oss: Don't drain at closing a client [ Upstream commit 197b958c1e76a575d77038cc98b4bebc2134279f ] The OSS sequencer client tries to drain the pending events at releasing. Unfortunately, as spotted by syzkaller fuzzer, this may lead to an unkillable process state when the event has been queued at the far future. Since the process being released can't be signaled any longer, it remains and waits for the echo-back event in that far future. Back to history, the draining feature was implemented at the time we misinterpreted POSIX definition for blocking file operation. Actually, such a behavior is superfluous at release, and we should just release the device as is instead of keeping it up forever. This patch just removes the draining call that may block the release for too long time unexpectedly. BugLink: http://lkml.kernel.org/r/CACT4Y+Y4kD-aBGj37rf-xBw9bH3GMU6P+MYg4W1e-s-paVD2pg@mail.gmail.com Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/seq/oss/seq_oss.c | 2 -- sound/core/seq/oss/seq_oss_device.h | 1 - sound/core/seq/oss/seq_oss_init.c | 16 ---------------- 3 files changed, 19 deletions(-) diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c index 72873a46afeb3..4b53b8f2330f8 100644 --- a/sound/core/seq/oss/seq_oss.c +++ b/sound/core/seq/oss/seq_oss.c @@ -148,8 +148,6 @@ odev_release(struct inode *inode, struct file *file) if ((dp = file->private_data) == NULL) return 0; - snd_seq_oss_drain_write(dp); - mutex_lock(®ister_mutex); snd_seq_oss_release(dp); mutex_unlock(®ister_mutex); diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h index b43924325249c..d7b4d016b5475 100644 --- a/sound/core/seq/oss/seq_oss_device.h +++ b/sound/core/seq/oss/seq_oss_device.h @@ -127,7 +127,6 @@ int snd_seq_oss_write(struct seq_oss_devinfo *dp, const char __user *buf, int co unsigned int snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wait); void snd_seq_oss_reset(struct seq_oss_devinfo *dp); -void snd_seq_oss_drain_write(struct seq_oss_devinfo *dp); /* */ void snd_seq_oss_process_queue(struct seq_oss_devinfo *dp, abstime_t time); diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index dad5b1123e46e..0b9c18b2e45f5 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -435,22 +435,6 @@ snd_seq_oss_release(struct seq_oss_devinfo *dp) } -/* - * Wait until the queue is empty (if we don't have nonblock) - */ -void -snd_seq_oss_drain_write(struct seq_oss_devinfo *dp) -{ - if (! dp->timer->running) - return; - if (is_write_mode(dp->file_mode) && !is_nonblock_mode(dp->file_mode) && - dp->writeq) { - while (snd_seq_oss_writeq_sync(dp->writeq)) - ; - } -} - - /* * reset sequencer devices */ From b8ad68546922dd5acb6cd32628bc9ae69a4795f9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 19 Jan 2016 16:08:49 +0100 Subject: [PATCH 2080/2091] parisc: Fix ptrace syscall number and return value modification [ Upstream commit 98e8b6c9ac9d1b1e9d1122dfa6783d5d566bb8f7 ] Mike Frysinger reported that his ptrace testcase showed strange behaviour on parisc: It was not possible to avoid a syscall and the return value of a syscall couldn't be changed. To modify a syscall number, we were missing to save the new syscall number to gr20 which is then picked up later in assembly again. The effect that the return value couldn't be changed is a side-effect of another bug in the assembly code. When a process is ptraced, userspace expects each syscall to report entrance and exit of a syscall. If a syscall number was given which doesn't exist, we jumped to the normal syscall exit code instead of informing userspace that the (non-existant) syscall exits. This unexpected behaviour confuses userspace and thus the bug was misinterpreted as if we can't change the return value. This patch fixes both problems and was tested on 64bit kernel with 32bit userspace. Signed-off-by: Helge Deller Cc: Mike Frysinger Cc: stable@vger.kernel.org # v4.0+ Tested-by: Mike Frysinger Signed-off-by: Sasha Levin --- arch/parisc/kernel/ptrace.c | 16 +++++++++++----- arch/parisc/kernel/syscall.S | 5 ++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 9585c81f755fc..ce0b2b4075c70 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - /* Do the secure computing check first. */ secure_computing_strict(regs->gr[20]); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - ret = -1L; + tracehook_report_syscall_entry(regs)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + regs->gr[20] = -1UL; + goto out; + } #ifdef CONFIG_64BIT if (!is_compat_task()) @@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); - return ret ? : regs->gr[20]; +out: + return regs->gr[20]; } void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 0b8d26d3ba43b..02cf40c96fe34 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -343,7 +343,7 @@ tracesys_next: #endif comiclr,>>= __NR_Linux_syscalls, %r20, %r0 - b,n .Lsyscall_nosys + b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 @@ -359,6 +359,9 @@ tracesys_next: be 0(%sr7,%r19) ldo R%tracesys_exit(%r2),%r2 +.Ltracesys_nosys: + ldo -ENOSYS(%r0),%r28 /* set errno */ + /* Do *not* call this function on the gateway page, because it makes a direct call to syscall_trace. */ From f0adda6a9f5b1689cbf47dd56cf73da201c014d8 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Fri, 26 Feb 2016 15:29:32 -0600 Subject: [PATCH 2081/2091] drm/ast: Fix incorrect register check for DRAM width [ Upstream commit 2d02b8bdba322b527c5f5168ce1ca10c2d982a78 ] During DRAM initialization on certain ASpeed devices, an incorrect bit (bit 10) was checked in the "SDRAM Bus Width Status" register to determine DRAM width. Query bit 6 instead in accordance with the Aspeed AST2050 datasheet v1.05. Signed-off-by: Timothy Pearson Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Sasha Levin --- drivers/gpu/drm/ast/ast_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 035dacc93382f..fd5c5f3370f6b 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_device *dev) } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); - if (data & 0x400) + if (data & 0x40) ast->dram_bus_width = 16; else ast->dram_bus_width = 32; From 4765409e36975036eeb5a79ca58d0b3da6131101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 1 Mar 2016 14:36:32 +0100 Subject: [PATCH 2082/2091] USB: qcserial: add Sierra Wireless EM74xx device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 04fdbc825ffc02fb098964b92de802fff44e73fd ] The MC74xx and EM74xx modules use different IDs by default, according to the Lenovo EM7455 driver for Windows. Cc: Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Sasha Levin --- drivers/usb/serial/qcserial.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index f1eb03eed5cf6..f0a2ad15a9926 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -155,8 +155,10 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ - {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx/EM74xx */ - {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx/EM74xx */ + {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx */ + {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ + {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From b13b243e312d56d7ff491a553056454c2723b021 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Feb 2016 17:38:38 -0500 Subject: [PATCH 2083/2091] drm/radeon/pm: update current crtc info after setting the powerstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5e031d9fe8b0741f11d49667dfc3ebf5454121fd ] On CI, we need to see if the number of crtcs changes to determine whether or not we need to upload the mclk table again. In practice we don't currently upload the mclk table again after the initial load. The only reason you would would be to add new states, e.g., for arbitrary mclk setting which is not currently supported. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 763b523e671fc..a56eab7f0ab1d 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1080,10 +1080,6 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* update display watermarks based on new power state */ radeon_bandwidth_update(rdev); - rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; - rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; - rdev->pm.dpm.single_display = single_display; - /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; @@ -1102,6 +1098,10 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* update displays */ radeon_dpm_display_configuration_changed(rdev); + rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; + rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; + rdev->pm.dpm.single_display = single_display; + if (rdev->asic->dpm.force_performance_level) { if (rdev->pm.dpm.thermal_active) { enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; From 8ef267aabd98f9df0279b9bb4245a3b985ead692 Mon Sep 17 00:00:00 2001 From: Todd E Brandt Date: Wed, 2 Mar 2016 16:05:29 -0800 Subject: [PATCH 2084/2091] PM / sleep / x86: Fix crash on graph trace through x86 suspend [ Upstream commit 92f9e179a702a6adbc11e2fedc76ecd6ffc9e3f7 ] Pause/unpause graph tracing around do_suspend_lowlevel as it has inconsistent call/return info after it jumps to the wakeup vector. The graph trace buffer will otherwise become misaligned and may eventually crash and hang on suspend. To reproduce the issue and test the fix: Run a function_graph trace over suspend/resume and set the graph function to suspend_devices_and_enter. This consistently hangs the system without this fix. Signed-off-by: Todd Brandt Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- arch/x86/kernel/acpi/sleep.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d1daead5fcddd..adb3eaf8fe2a5 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include #include +#include #include "../../realmode/rm/wakeup.h" #include "sleep.h" @@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ + /* + * Pause/unpause graph tracing around do_suspend_lowlevel as it has + * inconsistent call/return info after it jumps to the wakeup vector. + */ + pause_graph_tracing(); do_suspend_lowlevel(); + unpause_graph_tracing(); return 0; } From f00b04c358d1583826a7340dc661eff6f8399898 Mon Sep 17 00:00:00 2001 From: Simon South Date: Wed, 2 Mar 2016 23:10:44 -0500 Subject: [PATCH 2085/2091] ALSA: hda - Fix mic issues on Acer Aspire E1-472 [ Upstream commit 02322ac9dee9aff8d8862e8d6660ebe102f492ea ] This patch applies the microphone-related fix created for the Acer Aspire E1-572 to the E1-472 as well, as it uses the same Realtek ALC282 CODEC and demonstrates the same issues. This patch allows an external, headset microphone to be used and limits the gain on the (quite noisy) internal microphone. Signed-off-by: Simon South Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 404be2d67086d..91cc6897d595e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5274,6 +5274,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), + SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), From 5a4557b0eb8a2a0b3bccdcbc7a66b8b04262f878 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 4 Mar 2016 01:42:49 +0000 Subject: [PATCH 2086/2091] MIPS: traps: Fix SIGFPE information leak from `do_ov' and `do_trap_or_bp' [ Upstream commit e723e3f7f9591b79e8c56b3d7c5a204a9c571b55 ] Avoid sending a partially initialised `siginfo_t' structure along SIGFPE signals issued from `do_ov' and `do_trap_or_bp', leading to information leaking from the kernel stack. Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/kernel/traps.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5f5f44edc77d4..54923d6b7e165 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -693,15 +693,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; - siginfo_t info; + siginfo_t info = { + .si_signo = SIGFPE, + .si_code = FPE_INTOVF, + .si_addr = (void __user *)regs->cp0_epc, + }; prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); exception_exit(prev_state); } @@ -877,7 +877,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) void do_trap_or_bp(struct pt_regs *regs, unsigned int code, const char *str) { - siginfo_t info; + siginfo_t info = { 0 }; char b[40]; #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -905,7 +905,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, else info.si_code = FPE_INTOVF; info.si_signo = SIGFPE; - info.si_errno = 0; info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); break; From 325940deb74b23351f507d5f1e1e01592c1efa1c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 21 Feb 2016 10:53:03 +0100 Subject: [PATCH 2087/2091] ubi: Fix out of bounds write in volume update code [ Upstream commit e4f6daac20332448529b11f09388f1d55ef2084c ] ubi_start_leb_change() allocates too few bytes. ubi_more_leb_change_data() will write up to req->upd_bytes + ubi->min_io_size bytes. Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger Reviewed-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/mtd/ubi/upd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 2a1b6e037e1a1..0134ba32a0578 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -193,7 +193,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, vol->changing_leb = 1; vol->ch_lnum = req->lnum; - vol->upd_buf = vmalloc(req->bytes); + vol->upd_buf = vmalloc(ALIGN((int)req->bytes, ubi->min_io_size)); if (!vol->upd_buf) return -ENOMEM; From b5c41530ef442dba667c4d964c722e8157f7da72 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 11 Mar 2016 16:53:11 +0800 Subject: [PATCH 2088/2091] KVM: x86: move steal time initialization to vcpu entry time [ Upstream commit 7cae2bedcbd4680b155999655e49c27b9cf020fa ] As reported at https://bugs.launchpad.net/qemu/+bug/1494350, it is possible to have vcpu->arch.st.last_steal initialized from a thread other than vcpu thread, say the iothread, via KVM_SET_MSRS. Which can cause an overflow later (when subtracting from vcpu threads sched_info.run_delay). To avoid that, move steal time accumulation to vcpu entry time, before copying steal time data to guest. Signed-off-by: Marcelo Tosatti Reviewed-by: David Matlack Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/x86.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42f1575ab58d9..41a3fb4ed346a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2117,6 +2117,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu) static void record_steal_time(struct kvm_vcpu *vcpu) { + accumulate_steal_time(vcpu); + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -2262,12 +2264,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & KVM_MSR_ENABLED)) break; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - - preempt_disable(); - accumulate_steal_time(vcpu); - preempt_enable(); - kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); break; @@ -2966,7 +2962,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; } - accumulate_steal_time(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); } From b014bae072a1dad1767c5c6e7e7b480165685c3e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 18:19:53 +0200 Subject: [PATCH 2089/2091] ipv6: drop frames with attached skb->sk in forwarding [ Upstream commit 9ef2e965e55481a52d6d91ce61977a27836268d3 ] This is a clone of commit 2ab957492d13b ("ip_forward: Drop frames with attached skb->sk") for ipv6. This commit has exactly the same reasons as the above mentioned commit, namely to prevent panics during netfilter reload or a misconfigured stack. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 36b9ac48b8fb1..06bf4010d3ed7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -376,6 +376,9 @@ int ip6_forward(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; From 7f30737678023b5becaf0e2e012665f71b886a7d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 17 Mar 2016 14:11:03 -0400 Subject: [PATCH 2090/2091] Linux 4.1.20 Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 06107f683bbe8..39be1bbd373a3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 1 -SUBLEVEL = 19 +SUBLEVEL = 20 EXTRAVERSION = NAME = Series 4800 From 0266e687b8d4cef7a90895f4e1d5ec67ea351ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCldenring?= Date: Thu, 5 Jul 2018 16:52:30 +0200 Subject: [PATCH 2091/2091] added progmp files --- include/net/inet_connection_sock.h | 3 +- include/net/mptcp.h | 42 +- include/net/netns/mptcp.h | 1 + include/net/tcp.h | 28 +- include/uapi/linux/bpf.h | 148 +- include/uapi/linux/tcp.h | 15 +- net/ipv4/tcp.c | 617 +- net/ipv4/tcp_input.c | 22 +- net/ipv4/tcp_output.c | 86 +- net/mptcp/Kconfig | 26 +- net/mptcp/Makefile | 2 +- net/mptcp/mptcp_ctrl.c | 135 +- net/mptcp/mptcp_ctrl.c.save | 2814 +++++++++ net/mptcp/mptcp_input.c | 324 +- net/mptcp/mptcp_ipv6.c | 4 +- net/mptcp/mptcp_output.c | 313 +- net/mptcp/mptcp_rbs_action.c | 44 + net/mptcp/mptcp_rbs_action.h | 60 + net/mptcp/mptcp_rbs_cfg.c | 230 + net/mptcp/mptcp_rbs_cfg.h | 109 + net/mptcp/mptcp_rbs_ctx.c | 13 + net/mptcp/mptcp_rbs_ctx.h | 86 + net/mptcp/mptcp_rbs_dynarray.h | 94 + net/mptcp/mptcp_rbs_exec.c | 50 + net/mptcp/mptcp_rbs_exec.h | 11 + net/mptcp/mptcp_rbs_lexer.c | 712 +++ net/mptcp/mptcp_rbs_lexer.h | 109 + net/mptcp/mptcp_rbs_multiplatform.h | 18 + net/mptcp/mptcp_rbs_optimizer.c | 84 + net/mptcp/mptcp_rbs_optimizer.h | 90 + net/mptcp/mptcp_rbs_optimizer_bm.c | 103 + net/mptcp/mptcp_rbs_optimizer_bm.h | 13 + net/mptcp/mptcp_rbs_optimizer_cf.c | 977 +++ net/mptcp/mptcp_rbs_optimizer_cf.h | 13 + net/mptcp/mptcp_rbs_optimizer_cve.c | 1160 ++++ net/mptcp/mptcp_rbs_optimizer_cve.h | 14 + net/mptcp/mptcp_rbs_optimizer_dce.c | 183 + net/mptcp/mptcp_rbs_optimizer_dce.h | 13 + net/mptcp/mptcp_rbs_optimizer_ebpf.c | 4172 +++++++++++++ net/mptcp/mptcp_rbs_optimizer_ebpf.h | 13 + net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c | 309 + net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h | 14 + net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c | 413 ++ net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h | 12 + net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c | 1169 ++++ net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h | 219 + net/mptcp/mptcp_rbs_optimizer_lu.c | 199 + net/mptcp/mptcp_rbs_optimizer_lu.h | 13 + net/mptcp/mptcp_rbs_optimizer_vi.c | 462 ++ net/mptcp/mptcp_rbs_optimizer_vi.h | 13 + net/mptcp/mptcp_rbs_parser.c | 1933 ++++++ net/mptcp/mptcp_rbs_parser.h | 23 + net/mptcp/mptcp_rbs_queue.c | 72 + net/mptcp/mptcp_rbs_queue.h | 12 + net/mptcp/mptcp_rbs_sched.c | 914 +++ net/mptcp/mptcp_rbs_sched.h | 44 + net/mptcp/mptcp_rbs_scheduler.c | 41 + net/mptcp/mptcp_rbs_scheduler.h | 76 + net/mptcp/mptcp_rbs_smt.c | 887 +++ net/mptcp/mptcp_rbs_smt.h | 221 + net/mptcp/mptcp_rbs_type.c | 18 + net/mptcp/mptcp_rbs_type.h | 21 + net/mptcp/mptcp_rbs_user.c | 932 +++ net/mptcp/mptcp_rbs_user.h | 19 + net/mptcp/mptcp_rbs_value.c | 5518 +++++++++++++++++ net/mptcp/mptcp_rbs_value.h | 2098 +++++++ net/mptcp/mptcp_rbs_value_parser.h | 928 +++ net/mptcp/mptcp_rbs_var.c | 29 + net/mptcp/mptcp_rbs_var.h | 31 + net/mptcp/mptcp_rr.c | 33 +- net/mptcp/mptcp_sched.c | 128 +- net/sched/sch_htb.c | 22 +- 72 files changed, 29402 insertions(+), 372 deletions(-) create mode 100644 net/mptcp/mptcp_ctrl.c.save create mode 100644 net/mptcp/mptcp_rbs_action.c create mode 100644 net/mptcp/mptcp_rbs_action.h create mode 100644 net/mptcp/mptcp_rbs_cfg.c create mode 100644 net/mptcp/mptcp_rbs_cfg.h create mode 100644 net/mptcp/mptcp_rbs_ctx.c create mode 100644 net/mptcp/mptcp_rbs_ctx.h create mode 100644 net/mptcp/mptcp_rbs_dynarray.h create mode 100644 net/mptcp/mptcp_rbs_exec.c create mode 100644 net/mptcp/mptcp_rbs_exec.h create mode 100644 net/mptcp/mptcp_rbs_lexer.c create mode 100644 net/mptcp/mptcp_rbs_lexer.h create mode 100644 net/mptcp/mptcp_rbs_multiplatform.h create mode 100644 net/mptcp/mptcp_rbs_optimizer.c create mode 100644 net/mptcp/mptcp_rbs_optimizer.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_bm.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_bm.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_cf.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_cf.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_cve.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_cve.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_dce.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_dce.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_lu.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_lu.h create mode 100644 net/mptcp/mptcp_rbs_optimizer_vi.c create mode 100644 net/mptcp/mptcp_rbs_optimizer_vi.h create mode 100644 net/mptcp/mptcp_rbs_parser.c create mode 100644 net/mptcp/mptcp_rbs_parser.h create mode 100644 net/mptcp/mptcp_rbs_queue.c create mode 100644 net/mptcp/mptcp_rbs_queue.h create mode 100644 net/mptcp/mptcp_rbs_sched.c create mode 100644 net/mptcp/mptcp_rbs_sched.h create mode 100644 net/mptcp/mptcp_rbs_scheduler.c create mode 100644 net/mptcp/mptcp_rbs_scheduler.h create mode 100644 net/mptcp/mptcp_rbs_smt.c create mode 100644 net/mptcp/mptcp_rbs_smt.h create mode 100644 net/mptcp/mptcp_rbs_type.c create mode 100644 net/mptcp/mptcp_rbs_type.h create mode 100644 net/mptcp/mptcp_rbs_user.c create mode 100644 net/mptcp/mptcp_rbs_user.h create mode 100644 net/mptcp/mptcp_rbs_value.c create mode 100644 net/mptcp/mptcp_rbs_value.h create mode 100644 net/mptcp/mptcp_rbs_value_parser.h create mode 100644 net/mptcp/mptcp_rbs_var.c create mode 100644 net/mptcp/mptcp_rbs_var.h diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index de838125a4daa..6f3d97d53cd85 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -222,7 +222,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, const unsigned long max_when) { struct inet_connection_sock *icsk = inet_csk(sk); - + //printk("AFR AFR reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + // sk, what, when, current_text_addr()); if (when > max_when) { #ifdef INET_CSK_DEBUG pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", diff --git a/include/net/mptcp.h b/include/net/mptcp.h index c156948f038f9..cd7b0b7acbded 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -191,7 +191,7 @@ struct mptcp_tcp_sock { u8 loc_id; u8 rem_id; -#define MPTCP_SCHED_SIZE 16 +#define MPTCP_SCHED_SIZE 64 u8 mptcp_sched[MPTCP_SCHED_SIZE] __aligned(8); struct sk_buff *shortcut_ofoqueue; /* Shortcut to the current modified @@ -210,6 +210,17 @@ struct mptcp_tcp_sock { /* HMAC of the third ack */ char sender_mac[20]; + + /* statistics for amiusing */ + u64 bytes_snd; + u64 bytes_rcv; + + /* Delay measurement values */ + u32 delay_in; + u32 delay_out; + + /* counter for unique subflow ids */ + u8 sbf_id; }; struct mptcp_tw { @@ -260,6 +271,23 @@ struct mptcp_sched_ops { char name[MPTCP_SCHED_NAME_MAX]; struct module *owner; + void (*recover_skb) (struct sock *meta_sk, + struct sock *subsk, + struct sk_buff* skb, + bool reinject); + void (*update_stats) (struct sock * subsk, + const struct sk_buff *skb, + unsigned int len, + unsigned int type); +}; + +/* scheduler selection for */ +struct mptcp_sched_select { + struct list_head list; + __be32 dstip; + __be16 sport; + unsigned long till_time_s; + struct mptcp_sched_ops* sched_ops; }; struct mptcp_cb { @@ -289,7 +317,7 @@ struct mptcp_cb { u8 cnt_subflows; u8 cnt_established; -#define MPTCP_SCHED_DATA_SIZE 8 +#define MPTCP_SCHED_DATA_SIZE 128 u8 mptcp_sched[MPTCP_SCHED_DATA_SIZE] __aligned(8); struct mptcp_sched_ops *sched_ops; @@ -348,6 +376,9 @@ struct mptcp_cb { int orig_sk_rcvbuf; int orig_sk_sndbuf; u32 orig_window_clamp; + + /* counter for unique subflow ids */ + u8 last_sbf_id; }; #define MPTCP_VERSION_0 0 @@ -909,9 +940,12 @@ extern struct mptcp_pm_ops mptcp_pm_default; int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_init_scheduler(struct mptcp_cb *mpcb); +struct mptcp_sched_ops *mptcp_sched_find(const char *name); void mptcp_cleanup_scheduler(struct mptcp_cb *mpcb); void mptcp_get_default_scheduler(char *name); int mptcp_set_default_scheduler(const char *name); +int mptcp_set_default_scheduler_for_tuple(const char *name, __be32 dstip, + __be16 sport, unsigned long till_jiffies); bool mptcp_is_available(struct sock *sk, const struct sk_buff *skb, bool zero_wnd_test); bool mptcp_is_def_unavailable(struct sock *sk); @@ -1013,7 +1047,7 @@ static inline bool mptcp_is_data_fin2(const struct sk_buff *skb, { return mptcp_is_data_fin(skb) || (tp->mpcb->infinite_mapping_rcv && - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)); + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)); } static inline u8 mptcp_get_64_bit(u64 data_seq, struct mptcp_cb *mpcb) @@ -1349,6 +1383,8 @@ void mptcp_tcp_set_rto(struct sock *sk); /* TCP and MPTCP flag-depending functions */ bool mptcp_prune_ofo_queue(struct sock *sk); +#define MPTCP_SCHED_MAX_DATA_LEN 1024 + #else /* CONFIG_MPTCP */ #define mptcp_debug(fmt, args...) \ do { \ diff --git a/include/net/netns/mptcp.h b/include/net/netns/mptcp.h index 6680f3bbcfc8d..12ad0c99d5802 100644 --- a/include/net/netns/mptcp.h +++ b/include/net/netns/mptcp.h @@ -44,6 +44,7 @@ struct netns_mptcp { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_mptcp; + struct proc_dir_entry *proc_net_mptcp_rbs; #endif void *path_managers[MPTCP_PM_MAX]; diff --git a/include/net/tcp.h b/include/net/tcp.h index 65950cd33466e..38db0d81ddd17 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -117,7 +117,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN - /* BSD style FIN_WAIT2 deadlock breaker. + /* BSD style FIN_WAIT2 deadlock breaker. * It used to be 3min, new value is 60sec, * to combine FIN-WAIT-2 timeout with * TIME-WAIT timer. @@ -142,8 +142,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes - * for local resources. - */ + * for local resources. + */ #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ @@ -315,7 +315,7 @@ extern int tcp_memory_pressure; static inline bool before(__u32 seq1, __u32 seq2) { - return (__s32)(seq1-seq2) < 0; + return (__s32)(seq1-seq2) < 0; } #define after(seq2, seq1) before(seq1, seq2) @@ -438,7 +438,7 @@ void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); void tcp_v6_hash(struct sock *sk); struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb); struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, + struct request_sock *req, struct dst_entry *dst); void tcp_v6_reqsk_destructor(struct request_sock *req); @@ -821,6 +821,16 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +union tcp_skb_cb_rbs { + struct { + __u8 flags_to_unlink:1, + flags_to_free:1, + flags_not_in_queue:1, + user:5; + }; + __u8 b; +}; + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. @@ -845,6 +855,7 @@ struct tcp_skb_cb { __u8 mptcp_flags; /* flags for the MPTCP layer */ __u8 dss_off; /* Number of 4-byte words until * seq-number */ + union tcp_skb_cb_rbs mptcp_rbs; #endif __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -1579,6 +1590,13 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } +// added for rbs +static inline bool tcp_skb_is_first(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb_queue_is_first(&sk->sk_write_queue, skb); +} + static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_is_last(sk, skb)) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5701e8d..cd1c2e3bc56f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7,60 +7,59 @@ #ifndef _UAPI__LINUX_BPF_H__ #define _UAPI__LINUX_BPF_H__ -#include #include +#include /* Extended instruction set based on top of classic BPF */ /* instruction classes */ -#define BPF_ALU64 0x07 /* alu mode in double word width */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ -#define BPF_MOV 0xb0 /* mov reg to reg */ -#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ /* change endianness of a register */ -#define BPF_END 0xd0 /* flags for endianness conversion: */ -#define BPF_TO_LE 0x00 /* convert to little-endian */ -#define BPF_TO_BE 0x08 /* convert to big-endian */ -#define BPF_FROM_LE BPF_TO_LE -#define BPF_FROM_BE BPF_TO_BE - -#define BPF_JNE 0x50 /* jump != */ -#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ -#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ -#define BPF_CALL 0x80 /* function call */ -#define BPF_EXIT 0x90 /* function return */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ /* Register numbers */ -enum { - BPF_REG_0 = 0, - BPF_REG_1, - BPF_REG_2, - BPF_REG_3, - BPF_REG_4, - BPF_REG_5, - BPF_REG_6, - BPF_REG_7, - BPF_REG_8, - BPF_REG_9, - BPF_REG_10, - __MAX_BPF_REG, +enum { BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, }; /* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG __MAX_BPF_REG +#define MAX_BPF_REG __MAX_BPF_REG struct bpf_insn { - __u8 code; /* opcode */ - __u8 dst_reg:4; /* dest register */ - __u8 src_reg:4; /* source register */ - __s16 off; /* signed offset */ - __s32 imm; /* signed immediate constant */ + __u8 code; /* opcode */ + __u8 dst_reg : 4; /* dest register */ + __u8 src_reg : 4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ }; /* BPF syscall commands */ @@ -121,42 +120,43 @@ enum bpf_prog_type { BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_RBS, }; -#define BPF_PSEUDO_MAP_FD 1 +#define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ - __u32 map_type; /* one of enum bpf_map_type */ - __u32 key_size; /* size of key in bytes */ - __u32 value_size; /* size of value in bytes */ - __u32 max_entries; /* max number of entries in a map */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ - __u32 map_fd; - __aligned_u64 key; + __u32 map_fd; + __aligned_u64 key; union { __aligned_u64 value; __aligned_u64 next_key; }; - __u64 flags; + __u64 flags; }; struct { /* anonymous struct used by BPF_PROG_LOAD command */ - __u32 prog_type; /* one of enum bpf_prog_type */ - __u32 insn_cnt; - __aligned_u64 insns; - __aligned_u64 license; - __u32 log_level; /* verbosity level of verifier */ - __u32 log_size; /* size of user buffer */ - __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ }; } __attribute__((aligned(8))); @@ -166,16 +166,20 @@ union bpf_attr { enum bpf_func_id { BPF_FUNC_unspec, BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ + BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, + flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void + *src) */ + BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ + BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int + fmt_size, ...) */ + BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet + * skb_store_bytes(skb, offset, from, len, flags) - store bytes into + * packet * @skb: pointer to skb * @offset: offset within packet from skb->mac_header * @from: pointer where to copy bytes from @@ -199,7 +203,8 @@ enum bpf_func_id { BPF_FUNC_l3_csum_replace, /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum + * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP + * checksum * @skb: pointer to skb * @offset: offset within packet where TCP/UDP checksum is located * @from: old value of header field @@ -210,6 +215,25 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /* eBPF functions for RBS */ + BPF_FUNC_mptcp_rbs_printk, + BPF_FUNC_mptcp_rbs_add_drop, + BPF_FUNC_mptcp_rbs_add_push, + BPF_FUNC_mptcp_rbs_ktime_get_raw_ms, + BPF_FUNC_mptcp_rbs_random, + BPF_FUNC_mptcp_rbs_has_window_for, + BPF_FUNC_mptcp_rbs_bw_out_send, + BPF_FUNC_mptcp_rbs_bw_out_ack, + BPF_FUNC_mptcp_rbs_lossy, + BPF_FUNC_mptcp_rbs_sent_on_all, + BPF_FUNC_mptcp_rbs_q_next, + BPF_FUNC_mptcp_rbs_qu_next, + BPF_FUNC_mptcp_rbs_rq_next, + BPF_FUNC_mptcp_rbs_subflows_next, + BPF_FUNC_mptcp_rbs_varlist_expand, + BPF_FUNC_mptcp_rbs_skb_list_pop, + __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 287e1c3ac7e9a..8f523acd7dbec 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -50,7 +50,7 @@ struct tcphdr { fin:1; #else #error "Adjust your defines" -#endif +#endif __be16 window; __sum16 check; __be16 urg_ptr; @@ -61,14 +61,14 @@ struct tcphdr { * (union is compatible to any of its members) * This means this part of the code is -fstrict-aliasing safe now. */ -union tcp_word_hdr { +union tcp_word_hdr { struct tcphdr hdr; __be32 words[5]; -}; +}; -#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) +#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) -enum { +enum { TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), @@ -79,7 +79,7 @@ enum { TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) -}; +}; /* * TCP general constants @@ -114,6 +114,9 @@ enum { #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ #define MPTCP_ENABLED 42 +#define MPTCP_SCHEDULER 44 +#define MPTCP_SCHEDULER_REG 45 +#define MPTCP_RBS_SKB_PROP 46 struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 63c107c052200..baba5ee6cec76 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -160,7 +160,8 @@ * generates them. * Alan Cox : Cache last socket. * Alan Cox : Per route irtt. - * Matt Day : poll()->select() match BSD precisely on error + * Matt Day : poll()->select() match BSD precisely on + *error * Alan Cox : New buffers * Marc Tamsky : Various sk->prot->retransmits and * sk->retransmits misupdating fixed. @@ -168,8 +169,10 @@ * and TCP syn retries gets used now. * Mark Yarvis : In tcp_read_wakeup(), don't send an * ack if state is TCP_CLOSED. - * Alan Cox : Look up device on a retransmit - routes may - * change. Doesn't yet cope with MSS shrink right + * Alan Cox : Look up device on a retransmit - routes + *may + * change. Doesn't yet cope with MSS shrink + *right * but it's a start! * Marc Tamsky : Closing in closing fixes. * Mike Shaver : RFC1122 verifications. @@ -247,42 +250,45 @@ #define pr_fmt(fmt) "TCP: " fmt -#include -#include -#include +#include +#include +#include +#include #include -#include +#include +#include #include #include -#include -#include -#include -#include +#include +#include #include -#include +#include #include -#include -#include +#include +#include +#include +#include +#include #include -#include -#include -#include #include -#include +#include #include #include +#include #include +#include #include #include -#include -#include -#include #include +#include #include #include +#include "../mptcp/mptcp_rbs_sched.h" +#include "../mptcp/mptcp_rbs_scheduler.h" + int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; int sysctl_tcp_min_tso_segs __read_mostly = 2; @@ -300,7 +306,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); -atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ +atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); /* @@ -374,22 +380,22 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) } const struct tcp_sock_ops tcp_specific = { - .__select_window = __tcp_select_window, - .select_window = tcp_select_window, - .select_initial_window = tcp_select_initial_window, - .select_size = select_size, - .init_buffer_space = tcp_init_buffer_space, - .set_rto = tcp_set_rto, - .should_expand_sndbuf = tcp_should_expand_sndbuf, - .send_fin = tcp_send_fin, - .write_xmit = tcp_write_xmit, - .send_active_reset = tcp_send_active_reset, - .write_wakeup = tcp_write_wakeup, - .prune_ofo_queue = tcp_prune_ofo_queue, - .retransmit_timer = tcp_retransmit_timer, - .time_wait = tcp_time_wait, - .cleanup_rbuf = tcp_cleanup_rbuf, - .cwnd_validate = tcp_cwnd_validate, + .__select_window = __tcp_select_window, + .select_window = tcp_select_window, + .select_initial_window = tcp_select_initial_window, + .select_size = select_size, + .init_buffer_space = tcp_init_buffer_space, + .set_rto = tcp_set_rto, + .should_expand_sndbuf = tcp_should_expand_sndbuf, + .send_fin = tcp_send_fin, + .write_xmit = tcp_write_xmit, + .send_active_reset = tcp_send_active_reset, + .write_wakeup = tcp_write_wakeup, + .prune_ofo_queue = tcp_prune_ofo_queue, + .retransmit_timer = tcp_retransmit_timer, + .time_wait = tcp_time_wait, + .cleanup_rbuf = tcp_cleanup_rbuf, + .cwnd_validate = tcp_cwnd_validate, }; /* Address-family independent initialization for a tcp_sock. @@ -528,8 +534,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && - !sock_flag(sk, SOCK_URGINLINE) && - tp->urg_data) + !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) target++; /* Potential race condition. If read of tp below will @@ -541,7 +546,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; - } else { /* send SIGIO later */ + } else { /* send SIGIO later */ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -584,8 +589,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) slow = lock_sock_fast(sk); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; - else if (sock_flag(sk, SOCK_URGINLINE) || - !tp->urg_data || + else if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { @@ -623,7 +627,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } - return put_user(answ, (int __user *)arg); + return put_user(answ, (int __user *) arg); } EXPORT_SYMBOL(tcp_ioctl); @@ -643,16 +647,47 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - skb->csum = 0; - tcb->seq = tcb->end_seq = tp->write_seq; + skb->csum = 0; + tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; - tcb->sacked = 0; + tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; + + /* RBS specific stuff */ + if (mptcp(tp)) { + /* note: only the meta_sk should be used for this*/ + if (tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb *rbs_cb = + mptcp_rbs_get_cb(meta_tp); + + if (rbs_cb->queue_position == NULL) { + rbs_cb->queue_position = skb; + mptcp_debug("rbs corrects queue " + "position, before NULL, " + "now %p\n", + skb); + } else { + mptcp_debug( + "rbs no need to correct queue " + "position, remains with %p, not " + "switched to %p\n", + rbs_cb->queue_position, skb); + } + } + } else { + printk("avoided bug at the airport 2\n"); + } + } + /* RBS specific stuff END */ } static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) @@ -674,14 +709,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { - return skb->len < size_goal && - sysctl_tcp_autocorking && + return skb->len < size_goal && sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && atomic_read(&sk->sk_wmem_alloc) > skb->truesize; } -static void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle, int size_goal) +static void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, + int size_goal) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -732,8 +766,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) { /* Store TCP splice context information in read_descriptor_t. */ read_descriptor_t rd_desc = { - .arg.data = tss, - .count = tss->len, + .arg.data = tss, .count = tss->len, }; return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); @@ -757,9 +790,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, { struct sock *sk = sock->sk; struct tcp_splice_state tss = { - .pipe = pipe, - .len = len, - .flags = flags, + .pipe = pipe, .len = len, .flags = flags, }; long timeo; ssize_t spliced; @@ -771,7 +802,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, if (mptcp(tcp_sk(sk))) { struct sock *sk_it; mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it) - sock_rps_record_flow(sk_it); + sock_rps_record_flow(sk_it); } #endif /* @@ -829,8 +860,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, lock_sock(sk); if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } @@ -885,8 +915,8 @@ unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) size_goal = tp->gso_segs * mss_now; if (unlikely(new_size_goal < size_goal || new_size_goal >= size_goal + mss_now)) { - tp->gso_segs = min_t(u16, new_size_goal / mss_now, - sk->sk_gso_max_segs); + tp->gso_segs = + min_t(u16, new_size_goal / mss_now, sk->sk_gso_max_segs); size_goal = tp->gso_segs * mss_now; } @@ -899,10 +929,12 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) if (mptcp(tcp_sk(sk))) { mss_now = mptcp_current_mss(sk); - *size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); + *size_goal = + mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); } else { mss_now = tcp_current_mss(sk); - *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); + *size_goal = + tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); } return mss_now; @@ -922,8 +954,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ? - tp->mpcb->master_sk : sk)) { + !tcp_passive_fastopen( + mptcp(tp) && tp->mpcb->master_sk ? tp->mpcb->master_sk : sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; } @@ -944,8 +976,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, return ret; } - mptcp_for_each_sk(tp->mpcb, sk_it) - sock_rps_record_flow(sk_it); + mptcp_for_each_sk(tp->mpcb, sk_it) sock_rps_record_flow(sk_it); } clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -963,7 +994,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, bool can_coalesce; if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { -new_segment: + new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -1025,11 +1056,11 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, tcp_push_one(sk, mss_now); continue; -wait_for_sndbuf: + wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + wait_for_memory: + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, + size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1049,14 +1080,14 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, return sk_stream_error(sk, flags, err); } -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, + int flags) { ssize_t res; /* If MPTCP is enabled, we check it later after establishment */ if (!mptcp(tcp_sk(sk)) && (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM))) + !(sk->sk_route_caps & NETIF_F_ALL_CSUM))) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1109,8 +1140,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ - tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), - sk->sk_allocation); + tp->fastopen_req = + kzalloc(sizeof(struct tcp_fastopen_request), sk->sk_allocation); if (unlikely(!tp->fastopen_req)) return -ENOBUFS; tp->fastopen_req->data = msg; @@ -1151,16 +1182,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ? - tp->mpcb->master_sk : sk)) { + !tcp_passive_fastopen( + mptcp(tp) && tp->mpcb->master_sk ? tp->mpcb->master_sk : sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto do_error; } if (mptcp(tp)) { struct sock *sk_it = sk; - mptcp_for_each_sk(tp->mpcb, sk_it) - sock_rps_record_flow(sk_it); + mptcp_for_each_sk(tp->mpcb, sk_it) sock_rps_record_flow(sk_it); } if (unlikely(tp->repair)) { @@ -1205,7 +1235,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } if (copy <= 0) { -new_segment: + new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. */ @@ -1227,11 +1257,39 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) * In case of mptcp, hw-csum's will be handled * later in mptcp_write_xmit. */ - if (((mptcp(tp) && !tp->mpcb->dss_csum) || !mptcp(tp)) && + if (((mptcp(tp) && !tp->mpcb->dss_csum) || + !mptcp(tp)) && (mptcp(tp) || sk->sk_route_caps & NETIF_F_ALL_CSUM)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); + + /* RBS specific stuff to set skb props*/ + if (mptcp(tp)) { + /* note: only the meta_sk should be used for + * this*/ + if (tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb *rbs_cb = + mptcp_rbs_get_cb(meta_tp); + + mptcp_debug( + "setting skb->mptcp_rbs " + "with %i for %p with size " + "of tcp_skb_cb %lu\n", + TCP_SKB_CB(skb)->mptcp_rbs.user, + skb, + sizeof(struct tcp_skb_cb)); + TCP_SKB_CB(skb)->mptcp_rbs.user = + rbs_cb->skb_prop; + } + } + } + /* RBS specific stuff END */ + copy = size_goal; max = size_goal; @@ -1251,7 +1309,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (skb_availroom(skb) > 0) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + err = + skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; } else { @@ -1278,14 +1337,14 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, pfrag->page, - pfrag->offset, - copy); + pfrag->offset, copy); if (err) goto do_error; /* Update the skb. */ if (merge) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + skb_frag_size_add( + &skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, copy); @@ -1317,12 +1376,12 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) tcp_push_one(sk, mss_now); continue; -wait_for_sndbuf: + wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: + wait_for_memory: if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, + size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1369,7 +1428,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) /* No URG data to read. */ if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || tp->urg_data == TCP_URG_READ) - return -EINVAL; /* Yes this is right ! */ + return -EINVAL; /* Yes this is right ! */ if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE)) return -ENOTCONN; @@ -1413,7 +1472,8 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) /* XXX -- need to support SO_PEEK_OFF */ - skb_queue_walk(&sk->sk_write_queue, skb) { + skb_queue_walk(&sk->sk_write_queue, skb) + { err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) break; @@ -1438,13 +1498,13 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), - "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", - tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); + "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, + TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); - /* Delayed ACKs frequently hit locked sockets during bulk - * receive. */ + /* Delayed ACKs frequently hit locked sockets during bulk + * receive. */ if (icsk->icsk_ack.blocked || /* Once-per-two-segments ACK was not sent by tcp_input.c */ tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || @@ -1458,7 +1518,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !icsk->icsk_ack.pingpong)) && - !atomic_read(&sk->sk_rmem_alloc))) + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = true; } @@ -1472,12 +1532,13 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __u32 rcv_window_now = tcp_receive_window(tp); /* Optimize, __tcp_select_window() is not cheap. */ - if (2*rcv_window_now <= tp->window_clamp) { + if (2 * rcv_window_now <= tp->window_clamp) { __u32 new_window = tp->ops->__select_window(sk); /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. - * We can advertise it now, if it is not less than current one. + * We can advertise it now, if it is not less than + * current one. * "Lots" means "at least twice" here. */ if (new_window && new_window >= 2 * rcv_window_now) @@ -1515,7 +1576,8 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) offset = seq - TCP_SKB_CB(skb)->seq; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) offset--; - if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { + if (offset < skb->len || + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { *off = offset; return skb; } @@ -1628,7 +1690,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, u32 *seq; unsigned long used; int err; - int target; /* Read at least this many bytes */ + int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; struct sk_buff *skb; @@ -1646,8 +1708,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, #ifdef CONFIG_MPTCP if (mptcp(tp)) { struct sock *sk_it; - mptcp_for_each_sk(tp->mpcb, sk_it) - sock_rps_record_flow(sk_it); + mptcp_for_each_sk(tp->mpcb, sk_it) sock_rps_record_flow(sk_it); } #endif @@ -1687,24 +1748,28 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, do { u32 offset; - /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + /* Are we at urgent data? Stop if we have read anything or have + * SIGURG pending. */ if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { - copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + copied = + timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } /* Next get a buffer. */ - skb_queue_walk(&sk->sk_receive_queue, skb) { + skb_queue_walk(&sk->sk_receive_queue, skb) + { /* Now that we have two receive queues this * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), - "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", + "recvmsg bug: copied %X seq %X rcvnxt %X fl " + "%X\n", *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags)) break; @@ -1716,9 +1781,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, goto found_ok_skb; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; - WARN(!(flags & MSG_PEEK), - "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", - *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); + WARN( + !(flags & MSG_PEEK), + "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } /* Well, if we have backlog, try to process it now yet. */ @@ -1727,10 +1793,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, break; if (copied) { - if (sk->sk_err || - sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || + if (sk->sk_err || sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || signal_pending(current)) break; } else { @@ -1827,18 +1891,23 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); + NET_ADD_STATS_USER( + sock_net(sk), + LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && !skb_queue_empty(&tp->ucopy.prequeue)) { -do_prequeue: + do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER( + sock_net(sk), + LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, + chunk); len -= chunk; copied += chunk; } @@ -1846,9 +1915,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, } if ((flags & MSG_PEEK) && (peek_seq - copied - urg_hole != tp->copied_seq)) { - net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", - current->comm, - task_pid_nr(current)); + net_dbg_ratelimited( + "TCP(%s:%d): Application bug, race in MSG_PEEK\n", + current->comm, task_pid_nr(current)); peek_seq = tp->copied_seq; } continue; @@ -1893,7 +1962,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_rcv_space_adjust(sk); -skip_copy: + skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; tcp_fast_path_check(sk); @@ -1924,7 +1993,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); + NET_ADD_STATS_USER( + sock_net(sk), + LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } @@ -1976,7 +2047,7 @@ void tcp_set_state(struct sock *sk, int state) if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(sk); - /* fall through */ + /* fall through */ default: if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); @@ -1988,7 +2059,8 @@ void tcp_set_state(struct sock *sk, int state) sk->sk_state = state; #ifdef STATE_TRACE - SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); + SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], + statename[state]); #endif } EXPORT_SYMBOL_GPL(tcp_set_state); @@ -2001,25 +2073,25 @@ EXPORT_SYMBOL_GPL(tcp_set_state); */ static const unsigned char new_state[16] = { - /* current state: new state: action: */ - [0 /* (Invalid) */] = TCP_CLOSE, - [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, - [TCP_SYN_SENT] = TCP_CLOSE, - [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, - [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, - [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, - [TCP_TIME_WAIT] = TCP_CLOSE, - [TCP_CLOSE] = TCP_CLOSE, - [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, - [TCP_LAST_ACK] = TCP_LAST_ACK, - [TCP_LISTEN] = TCP_CLOSE, - [TCP_CLOSING] = TCP_CLOSING, - [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ + /* current state: new state: action: */ + [0 /* (Invalid) */] = TCP_CLOSE, + [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_SYN_SENT] = TCP_CLOSE, + [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, + [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, + [TCP_TIME_WAIT] = TCP_CLOSE, + [TCP_CLOSE] = TCP_CLOSE, + [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, + [TCP_LAST_ACK] = TCP_LAST_ACK, + [TCP_LISTEN] = TCP_CLOSE, + [TCP_CLOSING] = TCP_CLOSING, + [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ }; int tcp_close_state(struct sock *sk) { - int next = (int)new_state[sk->sk_state]; + int next = (int) new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; tcp_set_state(sk, ns); @@ -2042,9 +2114,8 @@ void tcp_shutdown(struct sock *sk, int how) return; /* If we've already sent a FIN, or it's a closed state, skip this. */ - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | + TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_sk(sk)->ops->send_fin(sk); @@ -2062,7 +2133,8 @@ bool tcp_check_oom(struct sock *sk, int shift) if (too_many_orphans) net_info_ratelimited("too many orphaned sockets\n"); if (out_of_socket_memory) - net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); + net_info_ratelimited( + "out of memory -- consider tuning tcp_mem\n"); return too_many_orphans || out_of_socket_memory; } @@ -2169,7 +2241,6 @@ void tcp_close(struct sock *sk, long timeout) /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); - /* Now socket is owned by kernel and we acquire BH lock to finish close. No need to check for user refs. */ @@ -2203,13 +2274,13 @@ void tcp_close(struct sock *sk, long timeout) tcp_set_state(sk, TCP_CLOSE); tp->ops->send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPABORTONLINGER); + LINUX_MIB_TCPABORTONLINGER); } else { const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, - tmo - TCP_TIMEWAIT_LEN); + inet_csk_reset_keepalive_timer( + sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_sk(sk)->ops->time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -2223,7 +2294,7 @@ void tcp_close(struct sock *sk, long timeout) tcp_set_state(sk, TCP_CLOSE); tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPABORTONMEMORY); + LINUX_MIB_TCPABORTONMEMORY); } } @@ -2237,7 +2308,7 @@ void tcp_close(struct sock *sk, long timeout) reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); } - /* Otherwise, socket is reprieved until protocol close. */ +/* Otherwise, socket is reprieved until protocol close. */ out: bh_unlock_sock(sk); @@ -2326,11 +2397,12 @@ void tcp_sock_destruct(struct sock *sk) static inline bool tcp_can_repair_sock(const struct sock *sk) { return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && - ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); } static int tcp_repair_options_est(struct tcp_sock *tp, - struct tcp_repair_opt __user *optbuf, unsigned int len) + struct tcp_repair_opt __user *optbuf, + unsigned int len) { struct tcp_repair_opt opt; @@ -2345,19 +2417,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, case TCPOPT_MSS: tp->rx_opt.mss_clamp = opt.opt_val; break; - case TCPOPT_WINDOW: - { - u16 snd_wscale = opt.opt_val & 0xFFFF; - u16 rcv_wscale = opt.opt_val >> 16; + case TCPOPT_WINDOW: { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; - if (snd_wscale > 14 || rcv_wscale > 14) - return -EFBIG; + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = snd_wscale; - tp->rx_opt.rcv_wscale = rcv_wscale; - tp->rx_opt.wscale_ok = 1; - } - break; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) return -EINVAL; @@ -2381,8 +2451,8 @@ static int tcp_repair_options_est(struct tcp_sock *tp, /* * Socket option code for TCP. */ -static int do_tcp_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +static int do_tcp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -2397,8 +2467,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < 1) return -EINVAL; - val = strncpy_from_user(name, optval, - min_t(long, TCP_CA_NAME_MAX-1, optlen)); + val = strncpy_from_user( + name, optval, min_t(long, TCP_CA_NAME_MAX - 1, optlen)); if (val < 0) return -EFAULT; name[val] = 0; @@ -2408,6 +2478,56 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } +#ifdef CONFIG_MPTCP + case MPTCP_SCHEDULER: { + lock_sock(sk); + + if (mptcp_init_failed || !sysctl_mptcp_enabled || + !mptcp_rbs_is_sched_used(tp)) { + err = -EPERM; + } else if (optlen == 0) { + if (!mptcp_rbs_scheduler_set(sk, NULL)) + err = -EINVAL; + } else if (optlen >= 256) { + err = -EINVAL; + } else { + char data[256]; + memset(data, 0, sizeof(data)); + + val = strncpy_from_user(data, optval, optlen); + if (val < 0) + err = -EFAULT; + else if (!mptcp_rbs_scheduler_set(sk, data)) + err = -EINVAL; + } + + release_sock(sk); + return err; + } + case MPTCP_SCHEDULER_REG: { + lock_sock(sk); + if (mptcp_init_failed || !sysctl_mptcp_enabled || + !mptcp_rbs_is_sched_used(tp)) { + err = -EPERM; + } else if (optlen != sizeof(struct mptcp_rbs_reg_value)) { + err = -EINVAL; + } else { + struct mptcp_rbs_reg_value reg_value; + + val = + copy_from_user(®_value, optval, + sizeof(struct mptcp_rbs_reg_value)); + if (val < 0) + err = -EFAULT; + else if (!mptcp_rbs_reg_value_set(tp, + ®_value)) + err = -EINVAL; + } + + release_sock(sk); + return err; + } +#endif default: /* fallthru */ break; @@ -2416,7 +2536,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (get_user(val, (int __user *) optval)) return -EFAULT; lock_sock(sk); @@ -2443,13 +2563,26 @@ static int do_tcp_setsockopt(struct sock *sk, int level, * an explicit push, which overrides even TCP_CORK * for currently queued segments. */ - tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + tp->nonagle |= TCP_NAGLE_OFF | TCP_NAGLE_PUSH; tcp_push_pending_frames(sk); } else { tp->nonagle &= ~TCP_NAGLE_OFF; } break; - +#ifdef CONFIG_MPTCP + case MPTCP_RBS_SKB_PROP: { + if (mptcp_init_failed || !sysctl_mptcp_enabled || + !mptcp_rbs_is_sched_used(tp)) { + err = -EPERM; + } else if (val < 0) { + err = -EFAULT; + } else if (val >= 0 && val <= 0x20) { // we only have 5 bit + mptcp_rbs_get_cb(tp)->skb_prop = val; + } else + err = -EINVAL; + break; + } +#endif case TCP_THIN_LINEAR_TIMEOUTS: if (val < 0 || val > 1) err = -EINVAL; @@ -2507,9 +2640,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (!tp->repair) err = -EINVAL; else if (sk->sk_state == TCP_ESTABLISHED) - err = tcp_repair_options_est(tp, - (struct tcp_repair_opt __user *)optval, - optlen); + err = tcp_repair_options_est( + tp, (struct tcp_repair_opt __user *) optval, + optlen); else err = -EPERM; break; @@ -2530,7 +2663,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->nonagle |= TCP_NAGLE_CORK; } else { tp->nonagle &= ~TCP_NAGLE_CORK; - if (tp->nonagle&TCP_NAGLE_OFF) + if (tp->nonagle & TCP_NAGLE_OFF) tp->nonagle |= TCP_NAGLE_PUSH; tcp_push_pending_frames(sk); } @@ -2589,9 +2722,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (mptcp(tp)) break; /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ); + icsk->icsk_accept_queue.rskq_defer_accept = secs_to_retrans( + val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: @@ -2602,8 +2734,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } tp->window_clamp = 0; } else - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; + tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 + ? SOCK_MIN_RCVBUF / 2 + : val; break; case TCP_QUICKACK: @@ -2612,7 +2745,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } else { icsk->icsk_ack.pingpong = 0; if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; tp->ops->cleanup_rbuf(sk, 1); @@ -2639,8 +2772,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | - TCPF_LISTEN))) { + if (val >= 0 && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { tcp_fastopen_init_key_once(true); err = fastopen_init_queue(sk, val); @@ -2687,8 +2820,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, + optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); @@ -2698,8 +2831,8 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (level != SOL_TCP) - return inet_csk_compat_setsockopt(sk, level, optname, - optval, optlen); + return inet_csk_compat_setsockopt(sk, level, optname, optval, + optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(compat_tcp_setsockopt); @@ -2757,7 +2890,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_fackets = tp->fackets_out; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); - info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); + info->tcpi_last_data_recv = + jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = icsk->icsk_pmtu_cookie; @@ -2769,7 +2903,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_advmss = tp->advmss; info->tcpi_reordering = tp->reordering; - info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; + info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt) >> 3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; @@ -2790,8 +2924,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) } EXPORT_SYMBOL_GPL(tcp_get_info); -static int do_tcp_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, int __user *optlen) +static int do_tcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2814,10 +2948,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->rx_opt.mss_clamp; break; case TCP_NODELAY: - val = !!(tp->nonagle&TCP_NAGLE_OFF); + val = !!(tp->nonagle & TCP_NAGLE_OFF); break; case TCP_CORK: - val = !!(tp->nonagle&TCP_NAGLE_CORK); + val = !!(tp->nonagle & TCP_NAGLE_CORK); break; case TCP_KEEPIDLE: val = keepalive_time_when(tp) / HZ; @@ -2829,12 +2963,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ?: sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : sysctl_tcp_fin_timeout) / HZ; + val = (val ?: sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, @@ -2940,6 +3074,25 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case MPTCP_ENABLED: val = sock_flag(sk, SOCK_MPTCP) ? 1 : 0; break; + case MPTCP_SCHEDULER: { + const char *str = ""; + struct tcp_sock *tp = tcp_sk(sk); + + if (get_user(len, optlen)) + return -EFAULT; + + if (!mptcp_init_failed && sysctl_mptcp_enabled && + mptcp_rbs_is_sched_used(tp)) + str = mptcp_rbs_scheduler_get(sk)->name; + + len = min_t(unsigned int, len, strlen(str)); + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, str, len)) + return -EFAULT; + return 0; + } #endif default: return -ENOPROTOOPT; @@ -2958,8 +3111,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->getsockopt(sk, level, optname, optval, + optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_getsockopt); @@ -2969,8 +3122,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level != SOL_TCP) - return inet_csk_compat_getsockopt(sk, level, optname, - optval, optlen); + return inet_csk_compat_getsockopt(sk, level, optname, optval, + optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(compat_tcp_getsockopt); @@ -2985,7 +3138,8 @@ static void __tcp_alloc_md5sig_pool(void) { int cpu; - for_each_possible_cpu(cpu) { + for_each_possible_cpu(cpu) + { if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { struct crypto_hash *hash; @@ -3016,7 +3170,6 @@ bool tcp_alloc_md5sig_pool(void) } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); - /** * tcp_get_md5sig_pool - get md5sig_pool for this user * @@ -3038,8 +3191,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) } EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - const struct tcphdr *th) +int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { struct scatterlist sg; struct tcphdr hdr; @@ -3056,15 +3208,15 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_header); -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - const struct sk_buff *skb, unsigned int header_len) +int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const struct sk_buff *skb, + unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); struct hash_desc *desc = &hp->md5_desc; unsigned int i; - const unsigned int head_data_len = skb_headlen(skb) > header_len ? - skb_headlen(skb) - header_len : 0; + const unsigned int head_data_len = + skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); struct sk_buff *frag_iter; @@ -3085,15 +3237,15 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; } - skb_walk_frags(skb, frag_iter) - if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) - return 1; + skb_walk_frags(skb, frag_iter) if (tcp_md5_hash_skb_data(hp, frag_iter, + 0)) return 1; return 0; } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, + const struct tcp_md5sig_key *key) { struct scatterlist sg; @@ -3164,48 +3316,35 @@ void __init tcp_init(void) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); - tcp_hashinfo.bind_bucket_cachep = - kmem_cache_create("tcp_bind_bucket", - sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + tcp_hashinfo.bind_bucket_cachep = kmem_cache_create( + "tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* Size and allocate the main established and bind bucket * hash tables. * * The methodology is similar to that of the buffer cache. */ - tcp_hashinfo.ehash = - alloc_large_system_hash("TCP established", - sizeof(struct inet_ehash_bucket), - thash_entries, - 17, /* one slot per 128 KB of memory */ - 0, - NULL, - &tcp_hashinfo.ehash_mask, - 0, - thash_entries ? 0 : 512 * 1024); + tcp_hashinfo.ehash = alloc_large_system_hash( + "TCP established", sizeof(struct inet_ehash_bucket), thash_entries, + 17, /* one slot per 128 KB of memory */ + 0, NULL, &tcp_hashinfo.ehash_mask, 0, + thash_entries ? 0 : 512 * 1024); for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); - tcp_hashinfo.bhash = - alloc_large_system_hash("TCP bind", - sizeof(struct inet_bind_hashbucket), - tcp_hashinfo.ehash_mask + 1, - 17, /* one slot per 128 KB of memory */ - 0, - &tcp_hashinfo.bhash_size, - NULL, - 0, - 64 * 1024); + tcp_hashinfo.bhash = alloc_large_system_hash( + "TCP bind", sizeof(struct inet_bind_hashbucket), + tcp_hashinfo.ehash_mask + 1, 17, /* one slot per 128 KB of memory */ + 0, &tcp_hashinfo.bhash_size, NULL, 0, 64 * 1024); tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; for (i = 0; i < tcp_hashinfo.bhash_size; i++) { spin_lock_init(&tcp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - cnt = tcp_hashinfo.ehash_mask + 1; tcp_death_row.sysctl_max_tw_buckets = cnt / 2; @@ -3215,12 +3354,12 @@ void __init tcp_init(void) tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_wshare = min(4UL*1024*1024, limit); - max_rshare = min(6UL*1024*1024, limit); + max_wshare = min(4UL * 1024 * 1024, limit); + max_rshare = min(6UL * 1024 * 1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_wshare); + sysctl_tcp_wmem[1] = 16 * 1024; + sysctl_tcp_wmem[2] = max(64 * 1024, max_wshare); sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bac9a7fe1afe8..57de1d3f57186 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3296,6 +3296,16 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) tp->bytes_acked += delta; u64_stats_update_end(&tp->syncp); tp->snd_una = ack; + + /* + * rbs statistics, the number of bytes between snd_una and data_ack + * was acknowledged + */ + if(mptcp(tp) && mptcp_meta_tp(tp)->mpcb->sched_ops->update_stats) { + /* note that this is unsigned stuff, even wrap a around is correct */ + mptcp_meta_tp(tp)->mpcb->sched_ops->update_stats((struct sock*) tp, NULL, delta, 1); + } + /* rbs statistics end */ } /* If we update tp->rcv_nxt, also update tp->bytes_received */ @@ -4538,6 +4548,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) int eaten = -1; bool fragstolen = false; + //if(mptcp(tp)) + // printk("%s afr_ofo for sk %p with skb %p called by %pS\n", __func__, sk, skb, __builtin_return_address(0)); + /* If no data is present, but a data_fin is in the options, we still * have to call mptcp_queue_skb later on. */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && @@ -4551,6 +4564,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tp->rx_opt.dsack = 0; + //if(mptcp(tp)) + // printk("%s afr_ofo skb->seq %u compared with tp->rcv_nxt %u\n", __func__, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); + /* Queue data for delivery to the user. * Packets in sequence go to the receive queue. * Out of sequence packets to the out_of_order_queue. @@ -4639,7 +4655,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ - SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", + printk("partial packet: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -4654,6 +4670,10 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) } tcp_data_queue_ofo(sk, skb); + + if(mptcp(tp)) { + sk->sk_data_ready(sk); + } } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 76dc6a7548b65..95344b55aa815 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -48,6 +48,9 @@ #include #include + +#include "../mptcp/mptcp_rbs_sched.h" + /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; @@ -936,7 +939,7 @@ void tcp_wfree(struct sk_buff *skb) * SKB, or a fresh unique copy made by the retransmit engine. */ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) + gfp_t gfp_mask) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -1078,13 +1081,34 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - + /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); + + /* specific stuff */ + if(mptcp(tp)) { + /* note: only the meta_sk should be used for this*/ + if(tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb* rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if(rbs_cb->queue_position == NULL) { + rbs_cb->queue_position = skb; + mptcp_debug("rbs corrects queue position, before NULL, now %p\n", skb); + } else { + mptcp_debug("rbs no need to correct queue position, remains with %p, not switched to %p\n", rbs_cb->queue_position, skb); + } + } + } + } + /* RBS specific stuff END */ } /* Initialize TSO segments for a packet. */ @@ -1255,8 +1279,48 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Link BUFF into the send queue. */ __skb_header_release(buff); + // rbs debug stuff + //printk("calling tcp_insert_write_queue_after with skb %p, skb->next %p, skb->prev %p, buff %p, buf->next %p, buf->prev %p for sk %p\n", skb, skb->next, skb->prev, buff, buff->next, buff->prev, sk); + if(!skb) { + printk("fragement found skb as null\n"); + } + + if(!skb->next) { + printk("fragement found skb->next as null for skb %p\n", skb); + } + tcp_insert_write_queue_after(skb, buff, sk); + /* RBS specific stuff */ + /* note: only the meta_sk should be used for this*/ + if(mptcp(tp) && tp->mpcb->meta_sk == (struct sock *) tp) { + struct tcp_sock *meta_tp = tcp_sk(sk); + + /* are we using rbs? */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb* rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if (!rbs_cb->queue_position) { + rbs_cb->queue_position = buff; + mptcp_debug("rbs corrects queue position, before NULL, now %p\n", buff); + } else { + // TODO: semantisch ist das kritisch... + // wenn der queue pointer VOR dieses packet zeigt, machen wir nichts, da wir spaeter ohnehin auf dieses packet zeigen + // wenn der queue pointer HINTER dieses packet zeigt, ziehen wir ihn wieder nach vorne... + // ggf. waere das hier auch ein fall fuer eine open action... + mptcp_debug("comparing seq numbers for correcting queue new packet %u and current queue %u\n", TCP_SKB_CB(buff)->seq, TCP_SKB_CB(rbs_cb->queue_position)->seq); + + if (before(TCP_SKB_CB(buff)->seq, TCP_SKB_CB(rbs_cb->queue_position)->seq)) { + mptcp_debug("rbs corrects queue position, before %p, now %p\n", rbs_cb->queue_position, buff); + rbs_cb->queue_position = buff; + } else { + mptcp_debug("rbs no need to correct queue position, remains with %p, not switched to %p\n", rbs_cb->queue_position, buff); + } + } + } + } + /* RBS specific stuff END */ + return 0; } @@ -2110,7 +2174,18 @@ bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; - else + else if(mptcp(tp) && + /* is a subflow */ + tp->mpcb->meta_sk != (struct sock *) tp && + /* uses RBS */ + mptcp_rbs_is_sched_used((struct tcp_sock*) tp->mpcb->meta_sk) && + ignoreSbfCwndConfig) { + /* + * RBS does not need a cwnd check for subflows + */ + mptcp_debug("%s overruled congestion control for meta_sk %p, setting cwnd_quota = 1\n", __func__, tp->mpcb->meta_sk); + cwnd_quota = 1; + } else break; } @@ -2154,6 +2229,11 @@ bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); +/* if(mptcp(tp)) { + printk("AFR_TSQ checks with limit %d and sysvalue %d and truesize %d against value sk_wmem atomic %d\n", + limit, sysctl_tcp_limit_output_bytes, 2* skb->truesize, atomic_read(&sk->sk_wmem_alloc)); + }*/ + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); /* It is possible TX completion already happened diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 13cf4d51ce1f9..9ba2526d33ef5 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -64,7 +64,7 @@ endif config DEFAULT_MPTCP_PM string default "default" if DEFAULT_DUMMY - default "fullmesh" if DEFAULT_FULLMESH + default "fullmesh" if DEFAULT_FULLMESH default "ndiffports" if DEFAULT_NDIFFPORTS default "binder" if DEFAULT_BINDER default "default" @@ -92,6 +92,30 @@ config MPTCP_REDUNDANT This scheduler sends all packets redundantly over all subflows to decreases latency and jitter on the cost of lower throughput. +config MPTCP_RBS + tristate "MPTCP Rule-based Scheduler" + depends on (MPTCP=y) + ---help--- + Some description + +config MPTCP_RBSMEASURE + tristate "Measurements" + depends on (MPTCP_RBS=y) + ---help--- + Some description + +config MPTCP_RBSOPT + tristate "Optimizations" + depends on (MPTCP_RBS=y) + ---help--- + Some description + +config MPTCP_RBSEBPF + tristate "eBPF Code Generator" + depends on (MPTCP_RBSOPT=y) && (BPF_JIT=y) + ---help--- + Some description + choice prompt "Default MPTCP Scheduler" default DEFAULT diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 10a98ba7d8c31..c89470377ac3c 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -17,6 +17,6 @@ obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o obj-$(CONFIG_MPTCP_BINDER) += mptcp_binder.o obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o +obj-$(CONFIG_MPTCP_RBS) += mptcp_rbs_sched.o mptcp_rbs_parser.o mptcp_rbs_queue.o mptcp_rbs_lexer.o mptcp_rbs_user.o mptcp_rbs_cfg.o mptcp_rbs_smt.o mptcp_rbs_value.o mptcp_rbs_var.o mptcp_rbs_exec.o mptcp_rbs_ctx.o mptcp_rbs_type.o mptcp_rbs_scheduler.o mptcp_rbs_optimizer.o mptcp_rbs_optimizer_bm.o mptcp_rbs_optimizer_cf.o mptcp_rbs_optimizer_cve.o mptcp_rbs_optimizer_dce.o mptcp_rbs_optimizer_ebpf.o mptcp_rbs_optimizer_ebpf_disasm.o mptcp_rbs_optimizer_ebpf_regalloc.o mptcp_rbs_optimizer_ebpf_lse.o mptcp_rbs_optimizer_lu.o mptcp_rbs_optimizer_vi.o mptcp_rbs_action.o mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o - diff --git a/net/mptcp/mptcp_ctrl.c b/net/mptcp/mptcp_ctrl.c index 3a41b7b3fd070..4fac6dcf748df 100644 --- a/net/mptcp/mptcp_ctrl.c +++ b/net/mptcp/mptcp_ctrl.c @@ -57,6 +57,8 @@ #include #include #include +#include +#include static struct kmem_cache *mptcp_sock_cache __read_mostly; static struct kmem_cache *mptcp_cb_cache __read_mostly; @@ -114,6 +116,90 @@ static int proc_mptcp_scheduler(struct ctl_table *ctl, int write, return ret; } + +int proc_mptcp_scheduler_select_parse(char* str, __be32* ip, __be16* port, char** name) { + unsigned long l; + unsigned int val; + int i; + //inspired by in_aton("10.0.0.2"); + l = 0; + for (i = 0; i < 4; i++) { + l <<= 8; + if (*str != '\0') { + val = 0; + while (*str != '\0' && *str != '.' && *str != '\n' && *str != ':') { + val *= 10; + val += *str - '0'; + str++; + } + l |= val; + if (*str != '\0') + str++; + } + } + *ip = htonl(l); + + val = 0; + while (*str != '\0' && *str != ' ') { + val *= 10; + val += *str - '0'; + str++; + } + *port = val; + + if (*str != ' ') + return 1; // error + str++; + *name = str; + + return 0; +} + +static int proc_mptcp_scheduler_select(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + const int MAX_SIZE = MPTCP_SCHED_NAME_MAX+25; // ip and port + char val[MAX_SIZE]; + struct ctl_table tbl = { + .data = val, + .maxlen = MAX_SIZE, + }; + int ret; + + if(write) { + __be32 ip; + __be16 port; + char* name; + long till_time_s; + struct timespec ts; + getnstimeofday(&ts); + + till_time_s = ts.tv_sec + 60 * 5; // 5 minutes + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (ret == 0) { + ret = proc_mptcp_scheduler_select_parse(val, &ip, &port, &name); + + if(ret == 0) { + mptcp_debug("afr: add for ip %i.%i.%i.%i and port %i scheduler %s at jiffy %llu\n", ip & 0x000000FF, + (ip & 0x0000FF00)>>8, + (ip & 0x00FF0000)>>16, + (ip & 0xFF000000)>>24, + port, name, jiffies); + ret = mptcp_set_default_scheduler_for_tuple(name, ip, port, till_time_s); + } else { + mptcp_debug("afr error parse\n"); + } + } else { + mptcp_debug("afr error dostring\n"); + } + } + + return ret; +} + static struct ctl_table mptcp_table[] = { { .procname = "mptcp_enabled", @@ -164,6 +250,12 @@ static struct ctl_table mptcp_table[] = { .maxlen = MPTCP_SCHED_NAME_MAX, .proc_handler = proc_mptcp_scheduler, }, + { + .procname = "mptcp_schedselect", + .mode = 0644, + .maxlen = MPTCP_SCHED_NAME_MAX+25, + .proc_handler = proc_mptcp_scheduler_select, + }, { } }; @@ -685,6 +777,9 @@ void mptcp_destroy_sock(struct sock *sk) if (is_meta_sk(sk)) { struct sock *sk_it, *tmpsk; + if (tcp_sk(sk)->mpcb->sched_ops->release) + tcp_sk(sk)->mpcb->sched_ops->release(sk); + __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue); mptcp_purge_ofo_queue(tcp_sk(sk)); @@ -1239,6 +1334,8 @@ int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, return -EPERM; } + tp->mptcp->sbf_id = ++mpcb->last_sbf_id; + INIT_HLIST_NODE(&tp->mptcp->cb_list); tp->mptcp->tp = tp; @@ -2448,10 +2545,11 @@ EXPORT_SYMBOL(mptcp_wq); static int mptcp_pm_seq_show(struct seq_file *seq, void *v) { struct tcp_sock *meta_tp; + struct sock *sk; const struct net *net = seq->private; int i, n = 0; - seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode"); + seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode scheduler meta p"); seq_putc(seq, '\n'); for (i = 0; i < MPTCP_HASH_SIZE; i++) { @@ -2493,13 +2591,44 @@ static int mptcp_pm_seq_show(struct seq_file *seq, void *v) ntohs(isk->inet_dport)); #endif } - seq_printf(seq, " %02X %02X %08X:%08X %lu", + seq_printf(seq, " %02X %02X %08X:%08X %lu %s %p", meta_sk->sk_state, mpcb->cnt_subflows, meta_tp->write_seq - meta_tp->snd_una, max_t(int, meta_tp->rcv_nxt - meta_tp->copied_seq, 0), - sock_i_ino(meta_sk)); + sock_i_ino(meta_sk), mpcb->sched_ops->name, meta_sk); seq_putc(seq, '\n'); + +#if 1 + // added more stats per subflow... maybe we should move this to a new file + seq_printf(seq, " snd rcv srtt mdev packets_out retrans_out snd_cwnd\n"); + + for ((sk) = (struct sock *)(mpcb)->connection_list; sk; sk = (struct sock *)tcp_sk(sk)->mptcp->next) { + //mptcp_for_each_sk(mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *isk_tmp = inet_sk(sk); + + seq_printf(seq, "%15llu%15llu%15u%15u%15u%15u%15u %#x %i.%i.%i.%i:%i %i.%i.%i.%i:%i\n", + tp->mptcp->bytes_snd, + tp->mptcp->bytes_rcv, + tp->srtt_us, tp->mdev_us, + tp->packets_out, + tp->retrans_out, + tp->snd_cwnd, + (unsigned int) tp, + isk_tmp->inet_rcv_saddr & 0x000000FF, + (isk_tmp->inet_rcv_saddr & 0x0000FF00)>>8, + (isk_tmp->inet_rcv_saddr & 0x00FF0000)>>16, + (isk_tmp->inet_rcv_saddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_sport), + isk_tmp->inet_daddr & 0x000000FF, + (isk_tmp->inet_daddr & 0x0000FF00)>>8, + (isk_tmp->inet_daddr & 0x00FF0000)>>16, + (isk_tmp->inet_daddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_dport) + ); + } +#endif } rcu_read_unlock_bh(); diff --git a/net/mptcp/mptcp_ctrl.c.save b/net/mptcp/mptcp_ctrl.c.save new file mode 100644 index 0000000000000..30c522206c7e4 --- /dev/null +++ b/net/mptcp/mptcp_ctrl.c.save @@ -0,0 +1,2814 @@ +/* + * MPTCP implementation - MPTCP-control + * + * Initial Design & Implementation: + * Sébastien Barré + * + * Current Maintainer & Author: + * Christoph Paasch + * + * Additional authors: + * Jaakko Korkeaniemi + * Gregory Detal + * Fabien Duchêne + * Andreas Seelinger + * Lavkesh Lahngir + * Andreas Ripke + * Vlad Dogaru + * Octavian Purdila + * John Ronan + * Catalin Nicutar + * Brandon Heller + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *mptcp_sock_cache __read_mostly; +static struct kmem_cache *mptcp_cb_cache __read_mostly; +static struct kmem_cache *mptcp_tw_cache __read_mostly; + +int sysctl_mptcp_enabled __read_mostly = 1; +int sysctl_mptcp_version __read_mostly = 0; +static int min_mptcp_version; +static int max_mptcp_version = 1; +int sysctl_mptcp_checksum __read_mostly = 1; +int sysctl_mptcp_debug __read_mostly; +EXPORT_SYMBOL(sysctl_mptcp_debug); +int sysctl_mptcp_syn_retries __read_mostly = 3; + +bool mptcp_init_failed __read_mostly; + +struct static_key mptcp_static_key = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL(mptcp_static_key); + +static int proc_mptcp_path_manager(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char val[MPTCP_PM_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = MPTCP_PM_NAME_MAX, + }; + int ret; + + mptcp_get_default_path_manager(val); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = mptcp_set_default_path_manager(val); + return ret; +} + +static int proc_mptcp_scheduler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char val[MPTCP_SCHED_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = MPTCP_SCHED_NAME_MAX, + }; + int ret; + + mptcp_get_default_scheduler(val); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = mptcp_set_default_scheduler(val); + return ret; +} + + +int proc_mptcp_scheduler_select_parse(char* str, __be32* ip, __be16* port, char** name) { + unsigned long l; + unsigned int val; + int i; + //inspired by in_aton("10.0.0.2"); + l = 0; + for (i = 0; i < 4; i++) { + l <<= 8; + if (*str != '\0') { + val = 0; + while (*str != '\0' && *str != '.' && *str != '\n' && *str != ':') { + val *= 10; + val += *str - '0'; + str++; + } + l |= val; + if (*str != '\0') + str++; + } + } + *ip = htonl(l); + + val = 0; + while (*str != '\0' && *str != ' ') { + val *= 10; + val += *str - '0'; + str++; + } + *port = val; + + if (*str != ' ') + return 1; // error + str++; + *name = str; + + return 0; +} + +static int proc_mptcp_scheduler_select(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + const int MAX_SIZE = MPTCP_SCHED_NAME_MAX+25; // ip and port + char val[MAX_SIZE]; + struct ctl_table tbl = { + .data = val, + .maxlen = MAX_SIZE, + }; + int ret; + + if(write) { + __be32 ip; + __be16 port; + char* name; + long till_time_s; + struct timespec ts; + getnstimeofday(&ts); + + till_time_s = ts.tv_sec + 60 * 5; // 5 minutes + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (ret == 0) { + ret = proc_mptcp_scheduler_select_parse(val, &ip, &port, &name); + + if(ret == 0) { + mptcp_debug("afr: add for ip %i.%i.%i.%i and port %i scheduler %s at jiffy %llu\n", ip & 0x000000FF, + (ip & 0x0000FF00)>>8, + (ip & 0x00FF0000)>>16, + (ip & 0xFF000000)>>24, + port, name, jiffies); + ret = mptcp_set_default_scheduler_for_tuple(name, ip, port, till_time_s); + } else { + mptcp_debug("afr error parse\n"); + } + } else { + mptcp_debug("afr error dostring\n"); + } + } + + return ret; +} + +static struct ctl_table mptcp_table[] = { + { + .procname = "mptcp_enabled", + .data = &sysctl_mptcp_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_version", + .data = &sysctl_mptcp_version, + .mode = 0644, + .maxlen = sizeof(int), + .proc_handler = &proc_dointvec_minmax, + .extra1 = &min_mptcp_version, + .extra2 = &max_mptcp_version, + }, + { + .procname = "mptcp_checksum", + .data = &sysctl_mptcp_checksum, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_debug", + .data = &sysctl_mptcp_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_syn_retries", + .data = &sysctl_mptcp_syn_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "mptcp_path_manager", + .mode = 0644, + .maxlen = MPTCP_PM_NAME_MAX, + .proc_handler = proc_mptcp_path_manager, + }, + { + .procname = "mptcp_scheduler", + .mode = 0644, + .maxlen = MPTCP_SCHED_NAME_MAX, + .proc_handler = proc_mptcp_scheduler, + }, + { + .procname = "mptcp_schedselect", + .mode = 0644, + .maxlen = MPTCP_SCHED_NAME_MAX+25, + .proc_handler = proc_mptcp_scheduler_select, + }, + { } +}; + +static inline u32 mptcp_hash_tk(u32 token) +{ + return token % MPTCP_HASH_SIZE; +} + +struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE]; +EXPORT_SYMBOL(tk_hashtable); + +/* This second hashtable is needed to retrieve request socks + * created as a result of a join request. While the SYN contains + * the token, the final ack does not, so we need a separate hashtable + * to retrieve the mpcb. + */ +struct hlist_nulls_head mptcp_reqsk_htb[MPTCP_HASH_SIZE]; +spinlock_t mptcp_reqsk_hlock; /* hashtable protection */ + +/* The following hash table is used to avoid collision of token */ +static struct hlist_nulls_head mptcp_reqsk_tk_htb[MPTCP_HASH_SIZE]; +spinlock_t mptcp_tk_hashlock; /* hashtable protection */ + +static bool mptcp_reqsk_find_tk(const u32 token) +{ + const u32 hash = mptcp_hash_tk(token); + const struct mptcp_request_sock *mtreqsk; + const struct hlist_nulls_node *node; + +begin: + hlist_nulls_for_each_entry_rcu(mtreqsk, node, + &mptcp_reqsk_tk_htb[hash], hash_entry) { + if (token == mtreqsk->mptcp_loc_token) + return true; + } + /* A request-socket is destroyed by RCU. So, it might have been recycled + * and put into another hash-table list. So, after the lookup we may + * end up in a different list. So, we may need to restart. + * + * See also the comment in __inet_lookup_established. + */ + if (get_nulls_value(node) != hash) + goto begin; + return false; +} + +static void mptcp_reqsk_insert_tk(struct request_sock *reqsk, const u32 token) +{ + u32 hash = mptcp_hash_tk(token); + + hlist_nulls_add_head_rcu(&mptcp_rsk(reqsk)->hash_entry, + &mptcp_reqsk_tk_htb[hash]); +} + +static void mptcp_reqsk_remove_tk(const struct request_sock *reqsk) +{ + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&mptcp_rsk(reqsk)->hash_entry); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); +} + +void mptcp_reqsk_destructor(struct request_sock *req) +{ + if (!mptcp_rsk(req)->is_sub) { + if (in_softirq()) { + mptcp_reqsk_remove_tk(req); + } else { + rcu_read_lock_bh(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&mptcp_rsk(req)->hash_entry); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock_bh(); + } + } else { + mptcp_hash_request_remove(req); + } +} + +static void __mptcp_hash_insert(struct tcp_sock *meta_tp, const u32 token) +{ + u32 hash = mptcp_hash_tk(token); + hlist_nulls_add_head_rcu(&meta_tp->tk_table, &tk_hashtable[hash]); + meta_tp->inside_tk_table = 1; +} + +static bool mptcp_find_token(u32 token) +{ + const u32 hash = mptcp_hash_tk(token); + const struct tcp_sock *meta_tp; + const struct hlist_nulls_node *node; + +begin: + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash], tk_table) { + if (token == meta_tp->mptcp_loc_token) + return true; + } + /* A TCP-socket is destroyed by RCU. So, it might have been recycled + * and put into another hash-table list. So, after the lookup we may + * end up in a different list. So, we may need to restart. + * + * See also the comment in __inet_lookup_established. + */ + if (get_nulls_value(node) != hash) + goto begin; + return false; +} + +static void mptcp_set_key_reqsk(struct request_sock *req, + const struct sk_buff *skb, + u32 seed) +{ + const struct inet_request_sock *ireq = inet_rsk(req); + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + if (skb->protocol == htons(ETH_P_IP)) { + mtreq->mptcp_loc_key = mptcp_v4_get_key(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + htons(ireq->ir_num), + ireq->ir_rmt_port, + seed); +#if IS_ENABLED(CONFIG_IPV6) + } else { + mtreq->mptcp_loc_key = mptcp_v6_get_key(ipv6_hdr(skb)->saddr.s6_addr32, + ipv6_hdr(skb)->daddr.s6_addr32, + htons(ireq->ir_num), + ireq->ir_rmt_port, + seed); +#endif + } + + mptcp_key_sha1(mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL); +} + +/* New MPTCP-connection request, prepare a new token for the meta-socket that + * will be created in mptcp_check_req_master(), and store the received token. + */ +static void mptcp_reqsk_new_mptcp(struct request_sock *req, + struct sock *sk, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + + inet_rsk(req)->saw_mpc = 1; + /* MPTCP version agreement */ + if (mopt->mptcp_ver >= tp->mptcp_ver) + mtreq->mptcp_ver = tp->mptcp_ver; + else + mtreq->mptcp_ver = mopt->mptcp_ver; + + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + do { + mptcp_set_key_reqsk(req, skb, mptcp_seed++); + } while (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || + mptcp_find_token(mtreq->mptcp_loc_token)); + mptcp_reqsk_insert_tk(req, mtreq->mptcp_loc_token); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + mtreq->mptcp_rem_key = mopt->mptcp_sender_key; +} + +static int mptcp_reqsk_new_cookie(struct request_sock *req, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + + mptcp_set_key_reqsk(req, skb, tcp_rsk(req)->snt_isn); + + if (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || + mptcp_find_token(mtreq->mptcp_loc_token)) { + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + return false; + } + + inet_rsk(req)->saw_mpc = 1; + + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + + mtreq->mptcp_rem_key = mopt->mptcp_sender_key; + + return true; +} + +static void mptcp_set_key_sk(const struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_sock *isk = inet_sk(sk); + + if (sk->sk_family == AF_INET) + tp->mptcp_loc_key = mptcp_v4_get_key(isk->inet_saddr, + isk->inet_daddr, + isk->inet_sport, + isk->inet_dport, + mptcp_seed++); +#if IS_ENABLED(CONFIG_IPV6) + else + tp->mptcp_loc_key = mptcp_v6_get_key(inet6_sk(sk)->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + isk->inet_sport, + isk->inet_dport, + mptcp_seed++); +#endif + + mptcp_key_sha1(tp->mptcp_loc_key, + &tp->mptcp_loc_token, NULL); +} + +#ifdef HAVE_JUMP_LABEL +/* We are not allowed to call static_key_slow_dec() from irq context + * If mptcp_enable/disable_static_key() is called from irq context, + * defer the static_key_slow_dec() calls. + */ +static atomic_t mptcp_enable_deferred; +#endif + +void mptcp_enable_static_key(void) +{ +#ifdef HAVE_JUMP_LABEL + int deferred; + + if (in_interrupt()) { + atomic_inc(&mptcp_enable_deferred); + return; + } + + deferred = atomic_xchg(&mptcp_enable_deferred, 0); + + if (deferred > 0) { + while (deferred--) + static_key_slow_inc(&mptcp_static_key); + } else if (deferred < 0) { + /* Do exactly one dec less than necessary */ + while (++deferred) + static_key_slow_dec(&mptcp_static_key); + return; + } +#endif + static_key_slow_inc(&mptcp_static_key); + WARN_ON(atomic_read(&mptcp_static_key.enabled) == 0); +} + +void mptcp_disable_static_key(void) +{ +#ifdef HAVE_JUMP_LABEL + int deferred; + + if (in_interrupt()) { + atomic_dec(&mptcp_enable_deferred); + return; + } + + deferred = atomic_xchg(&mptcp_enable_deferred, 0); + + if (deferred > 0) { + /* Do exactly one inc less than necessary */ + while (--deferred) + static_key_slow_inc(&mptcp_static_key); + return; + } else if (deferred < 0) { + while (deferred++) + static_key_slow_dec(&mptcp_static_key); + } +#endif + static_key_slow_dec(&mptcp_static_key); +} + +void mptcp_enable_sock(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_MPTCP)) { + sock_set_flag(sk, SOCK_MPTCP); + tcp_sk(sk)->mptcp_ver = sysctl_mptcp_version; + + /* Necessary here, because MPTCP can be enabled/disabled through + * a setsockopt. + */ + if (sk->sk_family == AF_INET) + inet_csk(sk)->icsk_af_ops = &mptcp_v4_specific; +#if IS_ENABLED(CONFIG_IPV6) + else if (mptcp_v6_is_v4_mapped(sk)) + inet_csk(sk)->icsk_af_ops = &mptcp_v6_mapped; + else + inet_csk(sk)->icsk_af_ops = &mptcp_v6_specific; +#endif + + mptcp_enable_static_key(); + } +} + +void mptcp_disable_sock(struct sock *sk) +{ + if (sock_flag(sk, SOCK_MPTCP)) { + sock_reset_flag(sk, SOCK_MPTCP); + + /* Necessary here, because MPTCP can be enabled/disabled through + * a setsockopt. + */ + if (sk->sk_family == AF_INET) + inet_csk(sk)->icsk_af_ops = &ipv4_specific; +#if IS_ENABLED(CONFIG_IPV6) + else if (mptcp_v6_is_v4_mapped(sk)) + inet_csk(sk)->icsk_af_ops = &ipv6_mapped; + else + inet_csk(sk)->icsk_af_ops = &ipv6_specific; +#endif + + mptcp_disable_static_key(); + } +} + +void mptcp_connect_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + rcu_read_lock_bh(); + spin_lock(&mptcp_tk_hashlock); + do { + mptcp_set_key_sk(sk); + } while (mptcp_reqsk_find_tk(tp->mptcp_loc_token) || + mptcp_find_token(tp->mptcp_loc_token)); + + __mptcp_hash_insert(tp, tp->mptcp_loc_token); + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock_bh(); + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); +} + +/** + * This function increments the refcount of the mpcb struct. + * It is the responsibility of the caller to decrement when releasing + * the structure. + */ +struct sock *mptcp_hash_find(const struct net *net, const u32 token) +{ + const u32 hash = mptcp_hash_tk(token); + const struct tcp_sock *meta_tp; + struct sock *meta_sk = NULL; + const struct hlist_nulls_node *node; + + rcu_read_lock(); +begin: + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash], + tk_table) { + meta_sk = (struct sock *)meta_tp; + if (token == meta_tp->mptcp_loc_token && + net_eq(net, sock_net(meta_sk))) { + if (unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt))) + goto out; + if (unlikely(token != meta_tp->mptcp_loc_token || + !net_eq(net, sock_net(meta_sk)))) { + sock_gen_put(meta_sk); + goto begin; + } + goto found; + } + } + /* A TCP-socket is destroyed by RCU. So, it might have been recycled + * and put into another hash-table list. So, after the lookup we may + * end up in a different list. So, we may need to restart. + * + * See also the comment in __inet_lookup_established. + */ + if (get_nulls_value(node) != hash) + goto begin; +out: + meta_sk = NULL; +found: + rcu_read_unlock(); + return meta_sk; +} + +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) +{ + /* remove from the token hashtable */ + rcu_read_lock_bh(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&meta_tp->tk_table); + meta_tp->inside_tk_table = 0; + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock_bh(); +} + +void mptcp_hash_remove(struct tcp_sock *meta_tp) +{ + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + hlist_nulls_del_init_rcu(&meta_tp->tk_table); + meta_tp->inside_tk_table = 0; + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); +} + +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk) +{ + const struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct sock *sk, *rttsk = NULL, *lastsk = NULL; + u32 min_time = 0, last_active = 0; + + mptcp_for_each_sk(meta_tp->mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + u32 elapsed; + + if (!mptcp_sk_can_send_ack(sk) || tp->pf) + continue; + + elapsed = keepalive_time_elapsed(tp); + + /* We take the one with the lowest RTT within a reasonable + * (meta-RTO)-timeframe + */ + if (elapsed < inet_csk(meta_sk)->icsk_rto) { + if (!min_time || tp->srtt_us < min_time) { + min_time = tp->srtt_us; + rttsk = sk; + } + continue; + } + + /* Otherwise, we just take the most recent active */ + if (!rttsk && (!last_active || elapsed < last_active)) { + last_active = elapsed; + lastsk = sk; + } + } + + if (rttsk) + return rttsk; + + return lastsk; +} +EXPORT_SYMBOL(mptcp_select_ack_sock); + +static void mptcp_sock_def_error_report(struct sock *sk) +{ + const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; + + if (!sock_flag(sk, SOCK_DEAD)) + mptcp_sub_close(sk, 0); + + if (mpcb->infinite_mapping_rcv || mpcb->infinite_mapping_snd || + mpcb->send_infinite_mapping) { + struct sock *meta_sk = mptcp_meta_sk(sk); + + meta_sk->sk_err = sk->sk_err; + meta_sk->sk_err_soft = sk->sk_err_soft; + + if (!sock_flag(meta_sk, SOCK_DEAD)) + meta_sk->sk_error_report(meta_sk); + + tcp_done(meta_sk); + } + + sk->sk_err = 0; + return; +} + +static void mptcp_mpcb_put(struct mptcp_cb *mpcb) +{ + if (atomic_dec_and_test(&mpcb->mpcb_refcnt)) { + mptcp_cleanup_path_manager(mpcb); + mptcp_cleanup_scheduler(mpcb); + kmem_cache_free(mptcp_cb_cache, mpcb); + } +} + +void mptcp_sock_destruct(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!is_meta_sk(sk) && !tp->was_meta_sk) { + BUG_ON(!hlist_unhashed(&tp->mptcp->cb_list)); + + kmem_cache_free(mptcp_sock_cache, tp->mptcp); + tp->mptcp = NULL; + + /* Taken when mpcb pointer was set */ + sock_put(mptcp_meta_sk(sk)); + mptcp_mpcb_put(tp->mpcb); + } else { + struct mptcp_cb *mpcb = tp->mpcb; + struct mptcp_tw *mptw; + + /* The mpcb is disappearing - we can make the final + * update to the rcv_nxt of the time-wait-sock and remove + * its reference to the mpcb. + */ + spin_lock_bh(&mpcb->tw_lock); + list_for_each_entry_rcu(mptw, &mpcb->tw_list, list) { + list_del_rcu(&mptw->list); + mptw->in_list = 0; + mptcp_mpcb_put(mpcb); + rcu_assign_pointer(mptw->mpcb, NULL); + } + spin_unlock_bh(&mpcb->tw_lock); + + mptcp_mpcb_put(mpcb); + + mptcp_debug("%s destroying meta-sk\n", __func__); + } + + WARN_ON(!static_key_false(&mptcp_static_key)); + + /* Must be called here, because this will decrement the jump-label. */ + inet_sock_destruct(sk); +} + +void mptcp_destroy_sock(struct sock *sk) +{ + if (is_meta_sk(sk)) { + struct sock *sk_it, *tmpsk; + + if (tcp_sk(sk)->mpcb->sched_ops->release) + tcp_sk(sk)->mpcb->sched_ops->release(sk); + + __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue); + mptcp_purge_ofo_queue(tcp_sk(sk)); + + /* We have to close all remaining subflows. Normally, they + * should all be about to get closed. But, if the kernel is + * forcing a closure (e.g., tcp_write_err), the subflows might + * not have been closed properly (as we are waiting for the + * DATA_ACK of the DATA_FIN). + */ + mptcp_for_each_sk_safe(tcp_sk(sk)->mpcb, sk_it, tmpsk) { + /* Already did call tcp_close - waiting for graceful + * closure, or if we are retransmitting fast-close on + * the subflow. The reset (or timeout) will kill the + * subflow.. + */ + if (tcp_sk(sk_it)->closing || + tcp_sk(sk_it)->send_mp_fclose) + continue; + + /* Allow the delayed work first to prevent time-wait state */ + if (delayed_work_pending(&tcp_sk(sk_it)->mptcp->work)) + continue; + + mptcp_sub_close(sk_it, 0); + } + } else { + mptcp_del_sock(sk); + } +} + +static void mptcp_set_state(struct sock *sk) +{ + struct sock *meta_sk = mptcp_meta_sk(sk); + + /* Meta is not yet established - wake up the application */ + if ((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) && + sk->sk_state == TCP_ESTABLISHED) { + tcp_set_state(meta_sk, TCP_ESTABLISHED); + + if (!sock_flag(meta_sk, SOCK_DEAD)) { + meta_sk->sk_state_change(meta_sk); + sk_wake_async(meta_sk, SOCK_WAKE_IO, POLL_OUT); + } + } + + if (sk->sk_state == TCP_ESTABLISHED) { + tcp_sk(sk)->mptcp->establish_increased = 1; + tcp_sk(sk)->mpcb->cnt_established++; + } +} + +static void mptcp_assign_congestion_control(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *meta_icsk = inet_csk(mptcp_meta_sk(sk)); + const struct tcp_congestion_ops *ca = meta_icsk->icsk_ca_ops; + + /* Congestion control is the same as meta. Thus, it has been + * try_module_get'd by tcp_assign_congestion_control. + */ + if (icsk->icsk_ca_ops == ca) + return; + + /* Use the same congestion control as set on the meta-sk */ + if (!try_module_get(ca->owner)) { + /* This should never happen. The congestion control is linked + * to the meta-socket (through tcp_assign_congestion_control) + * who "holds" the refcnt on the module. + */ + WARN(1, "Could not get the congestion control!"); + return; + } + icsk->icsk_ca_ops = ca; + + /* Clear out private data before diag gets it and + * the ca has not been initialized. + */ + if (ca->get_info) + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + + return; +} + +u32 mptcp_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +u32 mptcp_seed = 0; + +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn) +{ + u32 workspace[SHA_WORKSPACE_WORDS]; + u32 mptcp_hashed_key[SHA_DIGEST_WORDS]; + u8 input[64]; + int i; + + memset(workspace, 0, sizeof(workspace)); + + /* Initialize input with appropriate padding */ + memset(&input[9], 0, sizeof(input) - 10); /* -10, because the last byte + * is explicitly set too + */ + memcpy(input, &key, sizeof(key)); /* Copy key to the msg beginning */ + input[8] = 0x80; /* Padding: First bit after message = 1 */ + input[63] = 0x40; /* Padding: Length of the message = 64 bits */ + + sha_init(mptcp_hashed_key); + sha_transform(mptcp_hashed_key, input, workspace); + + for (i = 0; i < 5; i++) + mptcp_hashed_key[i] = cpu_to_be32(mptcp_hashed_key[i]); + + if (token) + *token = mptcp_hashed_key[0]; + if (idsn) + *idsn = *((u64 *)&mptcp_hashed_key[3]); +} + +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u32 *hash_out, int arg_num, ...) +{ + u32 workspace[SHA_WORKSPACE_WORDS]; + u8 input[128]; /* 2 512-bit blocks */ + int i; + int index; + int length; + u8 *msg; + va_list list; + + memset(workspace, 0, sizeof(workspace)); + + /* Generate key xored with ipad */ + memset(input, 0x36, 64); + for (i = 0; i < 8; i++) + input[i] ^= key_1[i]; + for (i = 0; i < 8; i++) + input[i + 8] ^= key_2[i]; + + va_start(list, arg_num); + index = 64; + for (i = 0; i < arg_num; i++) { + length = va_arg(list, int); + msg = va_arg(list, u8 *); + BUG_ON(index + length > 125); /* Message is too long */ + memcpy(&input[index], msg, length); + index += length; + } + va_end(list); + + input[index] = 0x80; /* Padding: First bit after message = 1 */ + memset(&input[index + 1], 0, (126 - index)); + + /* Padding: Length of the message = 512 + message length (bits) */ + input[126] = 0x02; + input[127] = ((index - 64) * 8); /* Message length (bits) */ + + sha_init(hash_out); + sha_transform(hash_out, input, workspace); + memset(workspace, 0, sizeof(workspace)); + + sha_transform(hash_out, &input[64], workspace); + memset(workspace, 0, sizeof(workspace)); + + for (i = 0; i < 5; i++) + hash_out[i] = cpu_to_be32(hash_out[i]); + + /* Prepare second part of hmac */ + memset(input, 0x5C, 64); + for (i = 0; i < 8; i++) + input[i] ^= key_1[i]; + for (i = 0; i < 8; i++) + input[i + 8] ^= key_2[i]; + + memcpy(&input[64], hash_out, 20); + input[84] = 0x80; + memset(&input[85], 0, 41); + + /* Padding: Length of the message = 512 + 160 bits */ + input[126] = 0x02; + input[127] = 0xA0; + + sha_init(hash_out); + sha_transform(hash_out, input, workspace); + memset(workspace, 0, sizeof(workspace)); + + sha_transform(hash_out, &input[64], workspace); + + for (i = 0; i < 5; i++) + hash_out[i] = cpu_to_be32(hash_out[i]); +} +EXPORT_SYMBOL(mptcp_hmac_sha1); + +static void mptcp_mpcb_inherit_sockopts(struct sock *meta_sk, struct sock *master_sk) +{ + /* Socket-options handled by sk_clone_lock while creating the meta-sk. + * ====== + * SO_SNDBUF, SO_SNDBUFFORCE, SO_RCVBUF, SO_RCVBUFFORCE, SO_RCVLOWAT, + * SO_RCVTIMEO, SO_SNDTIMEO, SO_ATTACH_FILTER, SO_DETACH_FILTER, + * TCP_NODELAY, TCP_CORK + * + * Socket-options handled in this function here + * ====== + * TCP_DEFER_ACCEPT + * SO_KEEPALIVE + * + * Socket-options on the todo-list + * ====== + * SO_BINDTODEVICE - should probably prevent creation of new subsocks + * across other devices. - what about the api-draft? + * SO_DEBUG + * SO_REUSEADDR - probably we don't care about this + * SO_DONTROUTE, SO_BROADCAST + * SO_OOBINLINE + * SO_LINGER + * SO_TIMESTAMP* - I don't think this is of concern for a SOCK_STREAM + * SO_PASSSEC - I don't think this is of concern for a SOCK_STREAM + * SO_RXQ_OVFL + * TCP_COOKIE_TRANSACTIONS + * TCP_MAXSEG + * TCP_THIN_* - Handled by sk_clone_lock, but we need to support this + * in mptcp_meta_retransmit_timer. AND we need to check + * what is about the subsockets. + * TCP_LINGER2 + * TCP_WINDOW_CLAMP + * TCP_USER_TIMEOUT + * TCP_MD5SIG + * + * Socket-options of no concern for the meta-socket (but for the subsocket) + * ====== + * SO_PRIORITY + * SO_MARK + * TCP_CONGESTION + * TCP_SYNCNT + * TCP_QUICKACK + */ + + /* DEFER_ACCEPT should not be set on the meta, as we want to accept new subflows directly */ + inet_csk(meta_sk)->icsk_accept_queue.rskq_defer_accept = 0; + + /* Keepalives are handled entirely at the MPTCP-layer */ + if (sock_flag(meta_sk, SOCK_KEEPOPEN)) { + inet_csk_reset_keepalive_timer(meta_sk, + keepalive_time_when(tcp_sk(meta_sk))); + sock_reset_flag(master_sk, SOCK_KEEPOPEN); + inet_csk_delete_keepalive_timer(master_sk); + } + + /* Do not propagate subflow-errors up to the MPTCP-layer */ + inet_sk(master_sk)->recverr = 0; +} + +static void mptcp_sub_inherit_sockopts(const struct sock *meta_sk, struct sock *sub_sk) +{ + /* IP_TOS also goes to the subflow. */ + if (inet_sk(sub_sk)->tos != inet_sk(meta_sk)->tos) { + inet_sk(sub_sk)->tos = inet_sk(meta_sk)->tos; + sub_sk->sk_priority = meta_sk->sk_priority; + sk_dst_reset(sub_sk); + } + + /* Inherit SO_REUSEADDR */ + sub_sk->sk_reuse = meta_sk->sk_reuse; + + /* Inherit snd/rcv-buffer locks */ + sub_sk->sk_userlocks = meta_sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + + /* Nagle/Cork is forced off on the subflows. It is handled at the meta-layer */ + tcp_sk(sub_sk)->nonagle = TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + + /* Keepalives are handled entirely at the MPTCP-layer */ + if (sock_flag(sub_sk, SOCK_KEEPOPEN)) { + sock_reset_flag(sub_sk, SOCK_KEEPOPEN); + inet_csk_delete_keepalive_timer(sub_sk); + } + + /* Do not propagate subflow-errors up to the MPTCP-layer */ + inet_sk(sub_sk)->recverr = 0; +} + +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb) +{ + /* skb-sk may be NULL if we receive a packet immediatly after the + * SYN/ACK + MP_CAPABLE. + */ + struct sock *sk = skb->sk ? skb->sk : meta_sk; + int ret = 0; + + skb->sk = NULL; + + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + kfree_skb(skb); + return 0; + } + + if (sk->sk_family == AF_INET) + ret = tcp_v4_do_rcv(sk, skb); +#if IS_ENABLED(CONFIG_IPV6) + else + ret = tcp_v6_do_rcv(sk, skb); +#endif + + sock_put(sk); + return ret; +} + +struct lock_class_key meta_key; +struct lock_class_key meta_slock_key; + +static const struct tcp_sock_ops mptcp_meta_specific = { + .__select_window = __mptcp_select_window, + .select_window = mptcp_select_window, + .select_initial_window = mptcp_select_initial_window, + .select_size = mptcp_select_size, + .init_buffer_space = mptcp_init_buffer_space, + .set_rto = mptcp_tcp_set_rto, + .should_expand_sndbuf = mptcp_should_expand_sndbuf, + .send_fin = mptcp_send_fin, + .write_xmit = mptcp_write_xmit, + .send_active_reset = mptcp_send_active_reset, + .write_wakeup = mptcp_write_wakeup, + .prune_ofo_queue = mptcp_prune_ofo_queue, + .retransmit_timer = mptcp_meta_retransmit_timer, + .time_wait = mptcp_time_wait, + .cleanup_rbuf = mptcp_cleanup_rbuf, +}; + +static const struct tcp_sock_ops mptcp_sub_specific = { + .__select_window = __mptcp_select_window, + .select_window = mptcp_select_window, + .select_initial_window = mptcp_select_initial_window, + .select_size = mptcp_select_size, + .init_buffer_space = mptcp_init_buffer_space, + .set_rto = mptcp_tcp_set_rto, + .should_expand_sndbuf = mptcp_should_expand_sndbuf, + .send_fin = tcp_send_fin, + .write_xmit = tcp_write_xmit, + .send_active_reset = tcp_send_active_reset, + .write_wakeup = tcp_write_wakeup, + .prune_ofo_queue = tcp_prune_ofo_queue, + .retransmit_timer = mptcp_sub_retransmit_timer, + .time_wait = tcp_time_wait, + .cleanup_rbuf = tcp_cleanup_rbuf, +}; + +static int mptcp_alloc_mpcb(struct sock *meta_sk, __u64 remote_key, + __u8 mptcp_ver, u32 window) +{ + struct mptcp_cb *mpcb; + struct sock *master_sk; + struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); + struct tcp_sock *master_tp, *meta_tp = tcp_sk(meta_sk); + u64 idsn; + + dst_release(meta_sk->sk_rx_dst); + meta_sk->sk_rx_dst = NULL; + /* This flag is set to announce sock_lock_init to + * reclassify the lock-class of the master socket. + */ + meta_tp->is_master_sk = 1; + master_sk = sk_clone_lock(meta_sk, GFP_ATOMIC | __GFP_ZERO); + meta_tp->is_master_sk = 0; + if (!master_sk) + return -ENOBUFS; + + master_tp = tcp_sk(master_sk); + + mpcb = kmem_cache_zalloc(mptcp_cb_cache, GFP_ATOMIC); + if (!mpcb) { + /* sk_free (and __sk_free) requirese wmem_alloc to be 1. + * All the rest is set to 0 thanks to __GFP_ZERO above. + */ + atomic_set(&master_sk->sk_wmem_alloc, 1); + sk_free(master_sk); + return -ENOBUFS; + } + +#if IS_ENABLED(CONFIG_IPV6) + if (meta_icsk->icsk_af_ops == &mptcp_v6_mapped) { + struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk); + + inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6; + + newnp = inet6_sk(master_sk); + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; + newnp->opt = NULL; + newnp->pktoptions = NULL; + (void)xchg(&newnp->rxpmtu, NULL); + } else if (meta_sk->sk_family == AF_INET6) { + struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk); + + inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6; + + newnp = inet6_sk(master_sk); + memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + + newnp->hop_limit = -1; + newnp->mcast_hops = IPV6_DEFAULT_MCASTHOPS; + newnp->mc_loop = 1; + newnp->pmtudisc = IPV6_PMTUDISC_WANT; + master_sk->sk_ipv6only = sock_net(master_sk)->ipv6.sysctl.bindv6only; + } +#endif + + meta_tp->mptcp = NULL; + + /* Store the mptcp version agreed on initial handshake */ + mpcb->mptcp_ver = mptcp_ver; + + /* Store the keys and generate the peer's token */ + mpcb->mptcp_loc_key = meta_tp->mptcp_loc_key; + mpcb->mptcp_loc_token = meta_tp->mptcp_loc_token; + + /* Generate Initial data-sequence-numbers */ + mptcp_key_sha1(mpcb->mptcp_loc_key, NULL, &idsn); + idsn = ntohll(idsn) + 1; + mpcb->snd_high_order[0] = idsn >> 32; + mpcb->snd_high_order[1] = mpcb->snd_high_order[0] - 1; + + meta_tp->write_seq = (u32)idsn; + meta_tp->snd_sml = meta_tp->write_seq; + meta_tp->snd_una = meta_tp->write_seq; + meta_tp->snd_nxt = meta_tp->write_seq; + meta_tp->pushed_seq = meta_tp->write_seq; + meta_tp->snd_up = meta_tp->write_seq; + + mpcb->mptcp_rem_key = remote_key; + mptcp_key_sha1(mpcb->mptcp_rem_key, &mpcb->mptcp_rem_token, &idsn); + idsn = ntohll(idsn) + 1; + mpcb->rcv_high_order[0] = idsn >> 32; + mpcb->rcv_high_order[1] = mpcb->rcv_high_order[0] + 1; + meta_tp->copied_seq = (u32) idsn; + meta_tp->rcv_nxt = (u32) idsn; + meta_tp->rcv_wup = (u32) idsn; + + meta_tp->snd_wl1 = meta_tp->rcv_nxt - 1; + meta_tp->snd_wnd = window; + meta_tp->retrans_stamp = 0; /* Set in tcp_connect() */ + + meta_tp->packets_out = 0; + meta_icsk->icsk_probes_out = 0; + + /* Set mptcp-pointers */ + master_tp->mpcb = mpcb; + master_tp->meta_sk = meta_sk; + meta_tp->mpcb = mpcb; + meta_tp->meta_sk = meta_sk; + mpcb->meta_sk = meta_sk; + mpcb->master_sk = master_sk; + + meta_tp->was_meta_sk = 0; + + /* Initialize the queues */ + skb_queue_head_init(&mpcb->reinject_queue); + skb_queue_head_init(&master_tp->out_of_order_queue); + tcp_prequeue_init(master_tp); + INIT_LIST_HEAD(&master_tp->tsq_node); + + master_tp->tsq_flags = 0; + + mutex_init(&mpcb->mpcb_mutex); + + /* Init the accept_queue structure, we support a queue of 32 pending + * connections, it does not need to be huge, since we only store here + * pending subflow creations. + */ + if (reqsk_queue_alloc(&meta_icsk->icsk_accept_queue, 32, GFP_ATOMIC)) { + inet_put_port(master_sk); + kmem_cache_free(mptcp_cb_cache, mpcb); + sk_free(master_sk); + return -ENOMEM; + } + + if (!sock_flag(meta_sk, SOCK_MPTCP)) { + mptcp_enable_static_key(); + sock_set_flag(meta_sk, SOCK_MPTCP); + } + + /* Redefine function-pointers as the meta-sk is now fully ready */ + meta_tp->mpc = 1; + meta_tp->ops = &mptcp_meta_specific; + + meta_sk->sk_backlog_rcv = mptcp_backlog_rcv; + meta_sk->sk_destruct = mptcp_sock_destruct; + + /* Meta-level retransmit timer */ + meta_icsk->icsk_rto *= 2; /* Double of initial - rto */ + + tcp_init_xmit_timers(master_sk); + /* Has been set for sending out the SYN */ + inet_csk_clear_xmit_timer(meta_sk, ICSK_TIME_RETRANS); + + if (!meta_tp->inside_tk_table) { + /* Adding the meta_tp in the token hashtable - coming from server-side */ + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + + __mptcp_hash_insert(meta_tp, mpcb->mptcp_loc_token); + + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); + } + master_tp->inside_tk_table = 0; + + /* Init time-wait stuff */ + INIT_LIST_HEAD(&mpcb->tw_list); + spin_lock_init(&mpcb->tw_lock); + + INIT_HLIST_HEAD(&mpcb->callback_list); + + mptcp_mpcb_inherit_sockopts(meta_sk, master_sk); + + mpcb->orig_sk_rcvbuf = meta_sk->sk_rcvbuf; + mpcb->orig_sk_sndbuf = meta_sk->sk_sndbuf; + mpcb->orig_window_clamp = meta_tp->window_clamp; + + /* The meta is directly linked - set refcnt to 1 */ + atomic_set(&mpcb->mpcb_refcnt, 1); + + mptcp_init_path_manager(mpcb); + mptcp_init_scheduler(mpcb); + + if (!try_module_get(inet_csk(master_sk)->icsk_ca_ops->owner)) + tcp_assign_congestion_control(master_sk); + + + mptcp_debug("%s: created mpcb with token %#x\n", + __func__, mpcb->mptcp_loc_token); + + return 0; +} + +void mptcp_fallback_meta_sk(struct sock *meta_sk) +{ + kfree(inet_csk(meta_sk)->icsk_accept_queue.listen_opt); + kmem_cache_free(mptcp_cb_cache, tcp_sk(meta_sk)->mpcb); +} + +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, + gfp_t flags) +{ + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + struct tcp_sock *tp = tcp_sk(sk); + + tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, flags); + if (!tp->mptcp) + return -ENOMEM; + + tp->mptcp->path_index = mptcp_set_new_pathindex(mpcb); + /* No more space for more subflows? */ + if (!tp->mptcp->path_index) { + kmem_cache_free(mptcp_sock_cache, tp->mptcp); + return -EPERM; + } + + tp->mptcp->sbf_id = ++mpcb->last_sbf_id; + + INIT_HLIST_NODE(&tp->mptcp->cb_list); + + tp->mptcp->tp = tp; + tp->mpcb = mpcb; + tp->meta_sk = meta_sk; + + if (!sock_flag(sk, SOCK_MPTCP)) { + mptcp_enable_static_key(); + sock_set_flag(sk, SOCK_MPTCP); + } + + tp->mpc = 1; + tp->ops = &mptcp_sub_specific; + + tp->mptcp->loc_id = loc_id; + tp->mptcp->rem_id = rem_id; + if (mpcb->sched_ops->init) + mpcb->sched_ops->init(sk); + + /* The corresponding sock_put is in mptcp_sock_destruct(). It cannot be + * included in mptcp_del_sock(), because the mpcb must remain alive + * until the last subsocket is completely destroyed. + */ + sock_hold(meta_sk); + atomic_inc(&mpcb->mpcb_refcnt); + + tp->mptcp->next = mpcb->connection_list; + mpcb->connection_list = tp; + tp->mptcp->attached = 1; + + mpcb->cnt_subflows++; + atomic_add(atomic_read(&((struct sock *)tp)->sk_rmem_alloc), + &meta_sk->sk_rmem_alloc); + + mptcp_sub_inherit_sockopts(meta_sk, sk); + INIT_DELAYED_WORK(&tp->mptcp->work, mptcp_sub_close_wq); + + /* Properly inherit CC from the meta-socket */ + mptcp_assign_congestion_control(sk); + + /* As we successfully allocated the mptcp_tcp_sock, we have to + * change the function-pointers here (for sk_destruct to work correctly) + */ + sk->sk_error_report = mptcp_sock_def_error_report; + sk->sk_data_ready = mptcp_data_ready; + sk->sk_write_space = mptcp_write_space; + sk->sk_state_change = mptcp_set_state; + sk->sk_destruct = mptcp_sock_destruct; + + if (sk->sk_family == AF_INET) + mptcp_debug("%s: token %#x pi %d, src_addr:%pI4:%d dst_addr:%pI4:%d, cnt_subflows now %d\n", + __func__ , mpcb->mptcp_loc_token, + tp->mptcp->path_index, + &((struct inet_sock *)tp)->inet_saddr, + ntohs(((struct inet_sock *)tp)->inet_sport), + &((struct inet_sock *)tp)->inet_daddr, + ntohs(((struct inet_sock *)tp)->inet_dport), + mpcb->cnt_subflows); +#if IS_ENABLED(CONFIG_IPV6) + else + mptcp_debug("%s: token %#x pi %d, src_addr:%pI6:%d dst_addr:%pI6:%d, cnt_subflows now %d\n", + __func__ , mpcb->mptcp_loc_token, + tp->mptcp->path_index, &inet6_sk(sk)->saddr, + ntohs(((struct inet_sock *)tp)->inet_sport), + &sk->sk_v6_daddr, + ntohs(((struct inet_sock *)tp)->inet_dport), + mpcb->cnt_subflows); +#endif + + return 0; +} + +void mptcp_del_sock(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk), *tp_prev; + struct mptcp_cb *mpcb; + + if (!tp->mptcp || !tp->mptcp->attached) + return; + + mpcb = tp->mpcb; + tp_prev = mpcb->connection_list; + + if (mpcb->sched_ops->release) + mpcb->sched_ops->release(sk); + + mptcp_debug("%s: Removing subsock tok %#x pi:%d state %d is_meta? %d\n", + __func__, mpcb->mptcp_loc_token, tp->mptcp->path_index, + sk->sk_state, is_meta_sk(sk)); + + if (tp_prev == tp) { + mpcb->connection_list = tp->mptcp->next; + } else { + for (; tp_prev && tp_prev->mptcp->next; tp_prev = tp_prev->mptcp->next) { + if (tp_prev->mptcp->next == tp) { + tp_prev->mptcp->next = tp->mptcp->next; + break; + } + } + } + mpcb->cnt_subflows--; + if (tp->mptcp->establish_increased) + mpcb->cnt_established--; + + tp->mptcp->next = NULL; + tp->mptcp->attached = 0; + mpcb->path_index_bits &= ~(1 << tp->mptcp->path_index); + + if (!skb_queue_empty(&sk->sk_write_queue)) + mptcp_reinject_data(sk, 0); + + if (is_master_tp(tp)) + mpcb->master_sk = NULL; + else if (tp->mptcp->pre_established) + sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer); + + rcu_assign_pointer(inet_sk(sk)->inet_opt, NULL); +} + +/* Updates the MPTCP-session based on path-manager information (e.g., addresses, + * low-prio flows,...). + */ +void mptcp_update_metasocket(struct sock *sk, const struct sock *meta_sk) +{ + if (tcp_sk(sk)->mpcb->pm_ops->new_session) + tcp_sk(sk)->mpcb->pm_ops->new_session(meta_sk); +} + +/* Clean up the receive buffer for full frames taken by the user, + * then send an ACK if necessary. COPIED is the number of bytes + * tcp_recvmsg has given to the user so far, it speeds up the + * calculation of whether or not we must ACK for the sake of + * a window update. + */ +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct sock *sk; + __u32 rcv_window_now = 0; + + if (copied > 0 && !(meta_sk->sk_shutdown & RCV_SHUTDOWN)) { + rcv_window_now = tcp_receive_window(meta_tp); + + if (2 * rcv_window_now > meta_tp->window_clamp) + rcv_window_now = 0; + } + + mptcp_for_each_sk(meta_tp->mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (!mptcp_sk_can_send_ack(sk)) + continue; + + if (!inet_csk_ack_scheduled(sk)) + goto second_part; + /* Delayed ACKs frequently hit locked sockets during bulk + * receive. + */ + if (icsk->icsk_ack.blocked || + /* Once-per-two-segments ACK was not sent by tcp_input.c */ + tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || + /* If this read emptied read buffer, we send ACK, if + * connection is not bidirectional, user drained + * receive buffer and there was a small segment + * in queue. + */ + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&meta_sk->sk_rmem_alloc))) { + tcp_send_ack(sk); + continue; + } + +second_part: + /* This here is the second part of tcp_cleanup_rbuf */ + if (rcv_window_now) { + __u32 new_window = tp->ops->__select_window(sk); + + /* Send ACK now, if this read freed lots of space + * in our buffer. Certainly, new_window is new window. + * We can advertise it now, if it is not less than + * current one. + * "Lots" means "at least twice" here. + */ + if (new_window && new_window >= 2 * rcv_window_now) + tcp_send_ack(sk); + } + } +} + +static int mptcp_sub_send_fin(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb = tcp_write_queue_tail(sk); + int mss_now; + + /* Optimization, tack on the FIN if we have a queue of + * unsent frames. But be careful about outgoing SACKS + * and IP options. + */ + mss_now = tcp_current_mss(sk); + + if (tcp_send_head(sk) != NULL) { + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(skb)->end_seq++; + tp->write_seq++; + } else { + skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_ATOMIC); + if (!skb) + return 1; + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, MAX_TCP_HEADER); + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ + tcp_init_nondata_skb(skb, tp->write_seq, + TCPHDR_ACK | TCPHDR_FIN); + tcp_queue_skb(sk, skb); + } + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + + return 0; +} + +void mptcp_sub_close_wq(struct work_struct *work) +{ + struct tcp_sock *tp = container_of(work, struct mptcp_tcp_sock, work.work)->tp; + struct sock *sk = (struct sock *)tp; + struct sock *meta_sk = mptcp_meta_sk(sk); + + mutex_lock(&tp->mpcb->mpcb_mutex); + lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); + + if (sock_flag(sk, SOCK_DEAD)) + goto exit; + + /* We come from tcp_disconnect. We are sure that meta_sk is set */ + if (!mptcp(tp)) { + tp->closing = 1; + tcp_close(sk, 0); + goto exit; + } + + if (meta_sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) { + tp->closing = 1; + tcp_close(sk, 0); + } else if (tcp_close_state(sk)) { + sk->sk_shutdown |= SEND_SHUTDOWN; + tcp_send_fin(sk); + } + +exit: + release_sock(meta_sk); + mutex_unlock(&tp->mpcb->mpcb_mutex); + sock_put(sk); +} + +void mptcp_sub_close(struct sock *sk, unsigned long delay) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct delayed_work *work = &tcp_sk(sk)->mptcp->work; + + /* We are already closing - e.g., call from sock_def_error_report upon + * tcp_disconnect in tcp_close. + */ + if (tp->closing) + return; + + /* Work already scheduled ? */ + if (work_pending(&work->work)) { + /* Work present - who will be first ? */ + if (jiffies + delay > work->timer.expires) + return; + + /* Try canceling - if it fails, work will be executed soon */ + if (!cancel_delayed_work(work)) + return; + sock_put(sk); + } + + if (!delay) { + unsigned char old_state = sk->sk_state; + + /* If we are in user-context we can directly do the closing + * procedure. No need to schedule a work-queue. + */ + if (!in_softirq()) { + if (sock_flag(sk, SOCK_DEAD)) + return; + + if (!mptcp(tp)) { + tp->closing = 1; + tcp_close(sk, 0); + return; + } + + if (mptcp_meta_sk(sk)->sk_shutdown == SHUTDOWN_MASK || + sk->sk_state == TCP_CLOSE) { + tp->closing = 1; + tcp_close(sk, 0); + } else if (tcp_close_state(sk)) { + sk->sk_shutdown |= SEND_SHUTDOWN; + tcp_send_fin(sk); + } + + return; + } + + /* We directly send the FIN. Because it may take so a long time, + * untile the work-queue will get scheduled... + * + * If mptcp_sub_send_fin returns 1, it failed and thus we reset + * the old state so that tcp_close will finally send the fin + * in user-context. + */ + if (!sk->sk_err && old_state != TCP_CLOSE && + tcp_close_state(sk) && mptcp_sub_send_fin(sk)) { + if (old_state == TCP_ESTABLISHED) + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); + sk->sk_state = old_state; + } + } + + sock_hold(sk); + queue_delayed_work(mptcp_wq, work, delay); +} + +void mptcp_sub_force_close(struct sock *sk) +{ + /* The below tcp_done may have freed the socket, if he is already dead. + * Thus, we are not allowed to access it afterwards. That's why + * we have to store the dead-state in this local variable. + */ + int sock_is_dead = sock_flag(sk, SOCK_DEAD); + + tcp_sk(sk)->mp_killed = 1; + + if (sk->sk_state != TCP_CLOSE) + tcp_done(sk); + + if (!sock_is_dead) + mptcp_sub_close(sk, 0); +} +EXPORT_SYMBOL(mptcp_sub_force_close); + +/* Update the mpcb send window, based on the contributions + * of each subflow + */ +void mptcp_update_sndbuf(const struct tcp_sock *tp) +{ + struct sock *meta_sk = tp->meta_sk, *sk; + int new_sndbuf = 0, old_sndbuf = meta_sk->sk_sndbuf; + + mptcp_for_each_sk(tp->mpcb, sk) { + if (!mptcp_sk_can_send(sk)) + continue; + + new_sndbuf += sk->sk_sndbuf; + + if (new_sndbuf > sysctl_tcp_wmem[2] || new_sndbuf < 0) { + new_sndbuf = sysctl_tcp_wmem[2]; + break; + } + } + meta_sk->sk_sndbuf = max(min(new_sndbuf, sysctl_tcp_wmem[2]), meta_sk->sk_sndbuf); + + /* The subflow's call to sk_write_space in tcp_new_space ends up in + * mptcp_write_space. + * It has nothing to do with waking up the application. + * So, we do it here. + */ + if (old_sndbuf != meta_sk->sk_sndbuf) + meta_sk->sk_write_space(meta_sk); +} + +void mptcp_close(struct sock *meta_sk, long timeout) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct sock *sk_it, *tmpsk; + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sk_buff *skb; + int data_was_unread = 0; + int state; + + mptcp_debug("%s: Close of meta_sk with tok %#x\n", + __func__, mpcb->mptcp_loc_token); + + mutex_lock(&mpcb->mpcb_mutex); + lock_sock(meta_sk); + + if (meta_tp->inside_tk_table) { + /* Detach the mpcb from the token hashtable */ + mptcp_hash_remove_bh(meta_tp); + reqsk_queue_destroy(&inet_csk(meta_sk)->icsk_accept_queue); + } + + meta_sk->sk_shutdown = SHUTDOWN_MASK; + /* We need to flush the recv. buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + * reader process may not have drained the data yet! + */ + while ((skb = __skb_dequeue(&meta_sk->sk_receive_queue)) != NULL) { + u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + len--; + data_was_unread += len; + __kfree_skb(skb); + } + + sk_mem_reclaim(meta_sk); + + /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ + if (meta_sk->sk_state == TCP_CLOSE) { + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) { + if (tcp_sk(sk_it)->send_mp_fclose) + continue; + mptcp_sub_close(sk_it, 0); + } + goto adjudge_to_death; + } + + if (data_was_unread) { + /* Unread data was tossed, zap the connection. */ + NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONCLOSE); + tcp_set_state(meta_sk, TCP_CLOSE); + tcp_sk(meta_sk)->ops->send_active_reset(meta_sk, + meta_sk->sk_allocation); + } else if (sock_flag(meta_sk, SOCK_LINGER) && !meta_sk->sk_lingertime) { + /* Check zero linger _after_ checking for unread data. */ + meta_sk->sk_prot->disconnect(meta_sk, 0); + NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA); + } else if (tcp_close_state(meta_sk)) { + mptcp_send_fin(meta_sk); + } else if (meta_tp->snd_una == meta_tp->write_seq) { + /* The DATA_FIN has been sent and acknowledged + * (e.g., by sk_shutdown). Close all the other subflows + */ + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) { + unsigned long delay = 0; + /* If we are the passive closer, don't trigger + * subflow-fin until the subflow has been finned + * by the peer. - thus we add a delay + */ + if (mpcb->passive_close && + sk_it->sk_state == TCP_ESTABLISHED) + delay = inet_csk(sk_it)->icsk_rto << 3; + + mptcp_sub_close(sk_it, delay); + } + } + + sk_stream_wait_close(meta_sk, timeout); + +adjudge_to_death: + state = meta_sk->sk_state; + sock_hold(meta_sk); + sock_orphan(meta_sk); + + /* socket will be freed after mptcp_close - we have to prevent + * access from the subflows. + */ + mptcp_for_each_sk(mpcb, sk_it) { + /* Similar to sock_orphan, but we don't set it DEAD, because + * the callbacks are still set and must be called. + */ + write_lock_bh(&sk_it->sk_callback_lock); + sk_set_socket(sk_it, NULL); + sk_it->sk_wq = NULL; + write_unlock_bh(&sk_it->sk_callback_lock); + } + + /* It is the last release_sock in its life. It will remove backlog. */ + release_sock(meta_sk); + + /* Now socket is owned by kernel and we acquire BH lock + * to finish close. No need to check for user refs. + */ + local_bh_disable(); + bh_lock_sock(meta_sk); + WARN_ON(sock_owned_by_user(meta_sk)); + + percpu_counter_inc(meta_sk->sk_prot->orphan_count); + + /* Have we already been destroyed by a softirq or backlog? */ + if (state != TCP_CLOSE && meta_sk->sk_state == TCP_CLOSE) + goto out; + + /* This is a (useful) BSD violating of the RFC. There is a + * problem with TCP as specified in that the other end could + * keep a socket open forever with no application left this end. + * We use a 3 minute timeout (about the same as BSD) then kill + * our end. If they send after that then tough - BUT: long enough + * that we won't make the old 4*rto = almost no time - whoops + * reset mistake. + * + * Nope, it was not mistake. It is really desired behaviour + * f.e. on http servers, when such sockets are useless, but + * consume significant resources. Let's do it with special + * linger2 option. --ANK + */ + + if (meta_sk->sk_state == TCP_FIN_WAIT2) { + if (meta_tp->linger2 < 0) { + tcp_set_state(meta_sk, TCP_CLOSE); + meta_tp->ops->send_active_reset(meta_sk, GFP_ATOMIC); + NET_INC_STATS_BH(sock_net(meta_sk), + LINUX_MIB_TCPABORTONLINGER); + } else { + const int tmo = tcp_fin_time(meta_sk); + + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(meta_sk, + tmo - TCP_TIMEWAIT_LEN); + } else { + meta_tp->ops->time_wait(meta_sk, TCP_FIN_WAIT2, + tmo); + goto out; + } + } + } + if (meta_sk->sk_state != TCP_CLOSE) { + sk_mem_reclaim(meta_sk); + if (tcp_too_many_orphans(meta_sk, 0)) { + if (net_ratelimit()) + pr_info("MPTCP: too many of orphaned sockets\n"); + tcp_set_state(meta_sk, TCP_CLOSE); + meta_tp->ops->send_active_reset(meta_sk, GFP_ATOMIC); + NET_INC_STATS_BH(sock_net(meta_sk), + LINUX_MIB_TCPABORTONMEMORY); + } + } + + + if (meta_sk->sk_state == TCP_CLOSE) + inet_csk_destroy_sock(meta_sk); + /* Otherwise, socket is reprieved until protocol close. */ + +out: + bh_unlock_sock(meta_sk); + local_bh_enable(); + mutex_unlock(&mpcb->mpcb_mutex); + sock_put(meta_sk); /* Taken by sock_hold */ +} + +void mptcp_disconnect(struct sock *sk) +{ + struct sock *subsk, *tmpsk; + struct tcp_sock *tp = tcp_sk(sk); + + __skb_queue_purge(&tp->mpcb->reinject_queue); + + if (tp->inside_tk_table) { + mptcp_hash_remove_bh(tp); + reqsk_queue_destroy(&inet_csk(tp->meta_sk)->icsk_accept_queue); + } + + local_bh_disable(); + mptcp_for_each_sk_safe(tp->mpcb, subsk, tmpsk) { + /* The socket will get removed from the subsocket-list + * and made non-mptcp by setting mpc to 0. + * + * This is necessary, because tcp_disconnect assumes + * that the connection is completly dead afterwards. + * Thus we need to do a mptcp_del_sock. Due to this call + * we have to make it non-mptcp. + * + * We have to lock the socket, because we set mpc to 0. + * An incoming packet would take the subsocket's lock + * and go on into the receive-path. + * This would be a race. + */ + + bh_lock_sock(subsk); + mptcp_del_sock(subsk); + tcp_sk(subsk)->mpc = 0; + tcp_sk(subsk)->ops = &tcp_specific; + mptcp_sub_force_close(subsk); + bh_unlock_sock(subsk); + } + local_bh_enable(); + + tp->was_meta_sk = 1; + tp->mpc = 0; + tp->ops = &tcp_specific; +} + + +/* Returns 1 if we should enable MPTCP for that socket. */ +int mptcp_doit(struct sock *sk) +{ + /* Don't do mptcp over loopback */ + if (sk->sk_family == AF_INET && + (ipv4_is_loopback(inet_sk(sk)->inet_daddr) || + ipv4_is_loopback(inet_sk(sk)->inet_saddr))) + return 0; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && + (ipv6_addr_loopback(&sk->sk_v6_daddr) || + ipv6_addr_loopback(&inet6_sk(sk)->saddr))) + return 0; +#endif + if (mptcp_v6_is_v4_mapped(sk) && + ipv4_is_loopback(inet_sk(sk)->inet_saddr)) + return 0; + +#ifdef CONFIG_TCP_MD5SIG + /* If TCP_MD5SIG is enabled, do not do MPTCP - there is no Option-Space */ + if (tcp_sk(sk)->af_specific->md5_lookup(sk, sk)) + return 0; +#endif + + return 1; +} + +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, + __u8 mptcp_ver, u32 window) +{ + struct tcp_sock *master_tp; + struct sock *master_sk; + + if (mptcp_alloc_mpcb(meta_sk, remote_key, mptcp_ver, window)) + goto err_alloc_mpcb; + + master_sk = tcp_sk(meta_sk)->mpcb->master_sk; + master_tp = tcp_sk(master_sk); + + if (mptcp_add_sock(meta_sk, master_sk, 0, 0, GFP_ATOMIC)) + goto err_add_sock; + + if (__inet_inherit_port(meta_sk, master_sk) < 0) + goto err_add_sock; + + meta_sk->sk_prot->unhash(meta_sk); + + if (master_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(master_sk)) + __inet_hash_nolisten(master_sk, NULL); +#if IS_ENABLED(CONFIG_IPV6) + else + __inet_hash(master_sk, NULL); +#endif + + master_tp->mptcp->init_rcv_wnd = master_tp->rcv_wnd; + + return 0; + +err_add_sock: + mptcp_fallback_meta_sk(meta_sk); + + inet_csk_prepare_forced_close(master_sk); + tcp_done(master_sk); + inet_csk_prepare_forced_close(meta_sk); + tcp_done(meta_sk); + +err_alloc_mpcb: + return -ENOBUFS; +} + +static int __mptcp_check_req_master(struct sock *child, + struct request_sock *req) +{ + struct tcp_sock *child_tp = tcp_sk(child); + struct sock *meta_sk = child; + struct mptcp_cb *mpcb; + struct mptcp_request_sock *mtreq; + + /* Never contained an MP_CAPABLE */ + if (!inet_rsk(req)->mptcp_rqsk) + return 1; + + if (!inet_rsk(req)->saw_mpc) { + /* Fallback to regular TCP, because we saw one SYN without + * MP_CAPABLE. In tcp_check_req we continue the regular path. + * But, the socket has been added to the reqsk_tk_htb, so we + * must still remove it. + */ + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); + mptcp_reqsk_remove_tk(req); + return 1; + } + + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + + /* Just set this values to pass them to mptcp_alloc_mpcb */ + mtreq = mptcp_rsk(req); + child_tp->mptcp_loc_key = mtreq->mptcp_loc_key; + child_tp->mptcp_loc_token = mtreq->mptcp_loc_token; + + if (mptcp_create_master_sk(meta_sk, mtreq->mptcp_rem_key, + mtreq->mptcp_ver, child_tp->snd_wnd)) + return -ENOBUFS; + + child = tcp_sk(child)->mpcb->master_sk; + child_tp = tcp_sk(child); + mpcb = child_tp->mpcb; + + child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn; + child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn; + + mpcb->dss_csum = mtreq->dss_csum; + mpcb->server_side = 1; + + /* Will be moved to ESTABLISHED by tcp_rcv_state_process() */ + mptcp_update_metasocket(child, meta_sk); + + /* Needs to be done here additionally, because when accepting a + * new connection we pass by __reqsk_free and not reqsk_free. + */ + mptcp_reqsk_remove_tk(req); + + /* Hold when creating the meta-sk in tcp_vX_syn_recv_sock. */ + sock_put(meta_sk); + + return 0; +} + +int mptcp_check_req_fastopen(struct sock *child, struct request_sock *req) +{ + struct sock *meta_sk = child, *master_sk; + struct sk_buff *skb; + u32 new_mapping; + int ret; + + ret = __mptcp_check_req_master(child, req); + if (ret) + return ret; + + master_sk = tcp_sk(meta_sk)->mpcb->master_sk; + + /* We need to rewind copied_seq as it is set to IDSN + 1 and as we have + * pre-MPTCP data in the receive queue. + */ + tcp_sk(meta_sk)->copied_seq -= tcp_sk(master_sk)->rcv_nxt - + tcp_rsk(req)->rcv_isn - 1; + + /* Map subflow sequence number to data sequence numbers. We need to map + * these data to [IDSN - len - 1, IDSN[. + */ + new_mapping = tcp_sk(meta_sk)->copied_seq - tcp_rsk(req)->rcv_isn - 1; + + /* There should be only one skb: the SYN + data. */ + skb_queue_walk(&meta_sk->sk_receive_queue, skb) { + TCP_SKB_CB(skb)->seq += new_mapping; + TCP_SKB_CB(skb)->end_seq += new_mapping; + } + + /* With fastopen we change the semantics of the relative subflow + * sequence numbers to deal with middleboxes that could add/remove + * multiple bytes in the SYN. We chose to start counting at rcv_nxt - 1 + * instead of the regular TCP ISN. + */ + tcp_sk(master_sk)->mptcp->rcv_isn = tcp_sk(master_sk)->rcv_nxt - 1; + + /* We need to update copied_seq of the master_sk to account for the + * already moved data to the meta receive queue. + */ + tcp_sk(master_sk)->copied_seq = tcp_sk(master_sk)->rcv_nxt; + + /* Handled by the master_sk */ + tcp_sk(meta_sk)->fastopen_rsk = NULL; + + return 0; +} + +int mptcp_check_req_master(struct sock *sk, struct sock *child, + struct request_sock *req, + int drop) +{ + struct sock *meta_sk = child; + int ret; + + ret = __mptcp_check_req_master(child, req); + if (ret) + return ret; + + /* drop indicates that we come from tcp_check_req and thus need to + * handle the request-socket fully. + */ + if (drop) { + inet_csk_reqsk_queue_drop(sk, req); + } else { + /* Thus, we come from syn-cookies */ + atomic_set(&req->rsk_refcnt, 1); + } + inet_csk_reqsk_queue_add(sk, req, meta_sk); + + return 0; +} + +struct sock *mptcp_check_req_child(struct sock *meta_sk, struct sock *child, + struct request_sock *req, + const struct mptcp_options_received *mopt) +{ + struct tcp_sock *child_tp = tcp_sk(child); + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + u8 hash_mac_check[20]; + + child_tp->inside_tk_table = 0; + + if (!mopt->join_ack) { + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_JOINACKFAIL); + goto teardown; + } + + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_rem_key, + (u8 *)&mpcb->mptcp_loc_key, + (u32 *)hash_mac_check, 2, + 4, (u8 *)&mtreq->mptcp_rem_nonce, + 4, (u8 *)&mtreq->mptcp_loc_nonce); + + if (memcmp(hash_mac_check, (char *)&mopt->mptcp_recv_mac, 20)) { + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_JOINACKMAC); + goto teardown; + } + + /* Point it to the same struct socket and wq as the meta_sk */ + sk_set_socket(child, meta_sk->sk_socket); + child->sk_wq = meta_sk->sk_wq; + + if (mptcp_add_sock(meta_sk, child, mtreq->loc_id, mtreq->rem_id, GFP_ATOMIC)) { + /* Has been inherited, but now child_tp->mptcp is NULL */ + child_tp->mpc = 0; + child_tp->ops = &tcp_specific; + + /* TODO when we support acking the third ack for new subflows, + * we should silently discard this third ack, by returning NULL. + * + * Maybe, at the retransmission we will have enough memory to + * fully add the socket to the meta-sk. + */ + goto teardown; + } + + /* The child is a clone of the meta socket, we must now reset + * some of the fields + */ + child_tp->mptcp->rcv_low_prio = mtreq->rcv_low_prio; + + /* We should allow proper increase of the snd/rcv-buffers. Thus, we + * use the original values instead of the bloated up ones from the + * clone. + */ + child->sk_sndbuf = mpcb->orig_sk_sndbuf; + child->sk_rcvbuf = mpcb->orig_sk_rcvbuf; + + child_tp->mptcp->slave_sk = 1; + child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn; + child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn; + child_tp->mptcp->init_rcv_wnd = req->rcv_wnd; + + child_tp->tsq_flags = 0; + + /* Subflows do not use the accept queue, as they + * are attached immediately to the mpcb. + */ + inet_csk_reqsk_queue_drop(meta_sk, req); + + /* The refcnt is initialized to 2, because regular TCP will put him + * in the socket's listener queue. However, we do not have a listener-queue. + * So, we need to make sure that this request-sock indeed gets destroyed. + */ + reqsk_put(req); + + MPTCP_INC_STATS_BH(sock_net(meta_sk), MPTCP_MIB_JOINACKRX); + return child; + +teardown: + /* Drop this request - sock creation failed. */ + inet_csk_reqsk_queue_drop(meta_sk, req); + reqsk_put(req); + inet_csk_prepare_forced_close(child); + tcp_done(child); + return meta_sk; +} + +int mptcp_init_tw_sock(struct sock *sk, struct tcp_timewait_sock *tw) +{ + struct mptcp_tw *mptw; + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_cb *mpcb = tp->mpcb; + + /* A subsocket in tw can only receive data. So, if we are in + * infinite-receive, then we should not reply with a data-ack or act + * upon general MPTCP-signaling. We prevent this by simply not creating + * the mptcp_tw_sock. + */ + if (mpcb->infinite_mapping_rcv) { + tw->mptcp_tw = NULL; + return 0; + } + + /* Alloc MPTCP-tw-sock */ + mptw = kmem_cache_alloc(mptcp_tw_cache, GFP_ATOMIC); + if (!mptw) { + tw->mptcp_tw = NULL; + return -ENOBUFS; + } + + atomic_inc(&mpcb->mpcb_refcnt); + + tw->mptcp_tw = mptw; + mptw->loc_key = mpcb->mptcp_loc_key; + mptw->meta_tw = mpcb->in_time_wait; + if (mptw->meta_tw) { + mptw->rcv_nxt = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp)); + if (mpcb->mptw_state != TCP_TIME_WAIT) + mptw->rcv_nxt++; + } + rcu_assign_pointer(mptw->mpcb, mpcb); + + spin_lock(&mpcb->tw_lock); + list_add_rcu(&mptw->list, &tp->mpcb->tw_list); + mptw->in_list = 1; + spin_unlock(&mpcb->tw_lock); + + return 0; +} + +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) +{ + struct mptcp_cb *mpcb; + + rcu_read_lock(); + mpcb = rcu_dereference(tw->mptcp_tw->mpcb); + + /* If we are still holding a ref to the mpcb, we have to remove ourself + * from the list and drop the ref properly. + */ + if (mpcb && atomic_inc_not_zero(&mpcb->mpcb_refcnt)) { + spin_lock(&mpcb->tw_lock); + if (tw->mptcp_tw->in_list) { + list_del_rcu(&tw->mptcp_tw->list); + tw->mptcp_tw->in_list = 0; + } + spin_unlock(&mpcb->tw_lock); + + /* Twice, because we increased it above */ + mptcp_mpcb_put(mpcb); + mptcp_mpcb_put(mpcb); + } + + rcu_read_unlock(); + + kmem_cache_free(mptcp_tw_cache, tw->mptcp_tw); +} + +/* Updates the rcv_nxt of the time-wait-socks and allows them to ack a + * data-fin. + */ +void mptcp_time_wait(struct sock *sk, int state, int timeo) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_tw *mptw; + + /* Used for sockets that go into tw after the meta + * (see mptcp_init_tw_sock()) + */ + tp->mpcb->in_time_wait = 1; + tp->mpcb->mptw_state = state; + + /* Update the time-wait-sock's information */ + rcu_read_lock_bh(); + list_for_each_entry_rcu(mptw, &tp->mpcb->tw_list, list) { + mptw->meta_tw = 1; + mptw->rcv_nxt = mptcp_get_rcv_nxt_64(tp); + + /* We want to ack a DATA_FIN, but are yet in FIN_WAIT_2 - + * pretend as if the DATA_FIN has already reached us, that way + * the checks in tcp_timewait_state_process will be good as the + * DATA_FIN comes in. + */ + if (state != TCP_TIME_WAIT) + mptw->rcv_nxt++; + } + rcu_read_unlock_bh(); + + tcp_done(sk); +} + +void mptcp_tsq_flags(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sock *meta_sk = mptcp_meta_sk(sk); + + /* It will be handled as a regular deferred-call */ + if (is_meta_sk(sk)) + return; + + if (hlist_unhashed(&tp->mptcp->cb_list)) { + hlist_add_head(&tp->mptcp->cb_list, &tp->mpcb->callback_list); + /* We need to hold it here, as the sock_hold is not assured + * by the release_sock as it is done in regular TCP. + * + * The subsocket may get inet_csk_destroy'd while it is inside + * the callback_list. + */ + sock_hold(sk); + } + + if (!test_and_set_bit(MPTCP_SUB_DEFERRED, &tcp_sk(meta_sk)->tsq_flags)) + sock_hold(meta_sk); +} + +void mptcp_tsq_sub_deferred(struct sock *meta_sk) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_tcp_sock *mptcp; + struct hlist_node *tmp; + + BUG_ON(!is_meta_sk(meta_sk) && !meta_tp->was_meta_sk); + + __sock_put(meta_sk); + hlist_for_each_entry_safe(mptcp, tmp, &meta_tp->mpcb->callback_list, cb_list) { + struct tcp_sock *tp = mptcp->tp; + struct sock *sk = (struct sock *)tp; + + hlist_del_init(&mptcp->cb_list); + sk->sk_prot->release_cb(sk); + /* Final sock_put (cfr. mptcp_tsq_flags */ + sock_put(sk); + } +} + +void mptcp_join_reqsk_init(struct mptcp_cb *mpcb, const struct request_sock *req, + struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct mptcp_options_received mopt; + u8 mptcp_hash_mac[20]; + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); + + mtreq->mptcp_mpcb = mpcb; + mtreq->is_sub = 1; + inet_rsk(req)->mptcp_rqsk = 1; + + mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce; + + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key, + (u8 *)&mpcb->mptcp_rem_key, + (u32 *)mptcp_hash_mac, 2, + 4, (u8 *)&mtreq->mptcp_loc_nonce, + 4, (u8 *)&mtreq->mptcp_rem_nonce); + mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac; + + mtreq->rem_id = mopt.rem_id; + mtreq->rcv_low_prio = mopt.low_prio; + inet_rsk(req)->saw_mpc = 1; + + MPTCP_INC_STATS_BH(sock_net(mpcb->meta_sk), MPTCP_MIB_JOINSYNRX); +} + +void mptcp_reqsk_init(struct request_sock *req, struct sock *sk, + const struct sk_buff *skb, bool want_cookie) +{ + struct mptcp_options_received mopt; + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); + + mtreq->dss_csum = mopt.dss_csum; + + if (want_cookie) { + if (!mptcp_reqsk_new_cookie(req, &mopt, skb)) + /* No key available - back to regular TCP */ + inet_rsk(req)->mptcp_rqsk = 0; + return; + } + + mptcp_reqsk_new_mptcp(req, sk, &mopt, skb); +} + +void mptcp_cookies_reqsk_init(struct request_sock *req, + struct mptcp_options_received *mopt, + struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + + /* Absolutely need to always initialize this. */ + mtreq->hash_entry.pprev = NULL; + + mtreq->mptcp_rem_key = mopt->mptcp_sender_key; + mtreq->mptcp_loc_key = mopt->mptcp_receiver_key; + + /* Generate the token */ + mptcp_key_sha1(mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL); + + rcu_read_lock(); + spin_lock(&mptcp_tk_hashlock); + + /* Check, if the key is still free */ + if (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || + mptcp_find_token(mtreq->mptcp_loc_token)) + goto out; + + inet_rsk(req)->saw_mpc = 1; + mtreq->is_sub = 0; + inet_rsk(req)->mptcp_rqsk = 1; + mtreq->dss_csum = mopt->dss_csum; + +out: + spin_unlock(&mptcp_tk_hashlock); + rcu_read_unlock(); +} + +int mptcp_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct mptcp_options_received mopt; + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); + + if (mopt.is_mp_join) + return mptcp_do_join_short(skb, &mopt, sock_net(sk)); + if (mopt.drop_me) + goto drop; + + if (!sock_flag(sk, SOCK_MPTCP)) + mopt.saw_mpc = 0; + + if (skb->protocol == htons(ETH_P_IP)) { + if (mopt.saw_mpc) { + if (skb_rtable(skb)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; + + MPTCP_INC_STATS_BH(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVE); + return tcp_conn_request(&mptcp_request_sock_ops, + &mptcp_request_sock_ipv4_ops, + sk, skb); + } + + return tcp_v4_conn_request(sk, skb); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (mopt.saw_mpc) { + if (!ipv6_unicast_destination(skb)) + goto drop; + + MPTCP_INC_STATS_BH(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVE); + return tcp_conn_request(&mptcp6_request_sock_ops, + &mptcp_request_sock_ipv6_ops, + sk, skb); + } + + return tcp_v6_conn_request(sk, skb); +#endif + } +drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return 0; +} + +static const struct snmp_mib mptcp_snmp_list[] = { + SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), + SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), + SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), + SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), + SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), + SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), + SNMP_MIB_ITEM("MPCapableRetransFallback", MPTCP_MIB_MPCAPABLERETRANSFALLBACK), + SNMP_MIB_ITEM("MPTCPCsumEnabled", MPTCP_MIB_CSUMENABLED), + SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), + SNMP_MIB_ITEM("MPFailRX", MPTCP_MIB_MPFAILRX), + SNMP_MIB_ITEM("MPCsumFail", MPTCP_MIB_CSUMFAIL), + SNMP_MIB_ITEM("MPFastcloseRX", MPTCP_MIB_FASTCLOSERX), + SNMP_MIB_ITEM("MPFastcloseTX", MPTCP_MIB_FASTCLOSETX), + SNMP_MIB_ITEM("MPFallbackAckSub", MPTCP_MIB_FBACKSUB), + SNMP_MIB_ITEM("MPFallbackAckInit", MPTCP_MIB_FBACKINIT), + SNMP_MIB_ITEM("MPFallbackDataSub", MPTCP_MIB_FBDATASUB), + SNMP_MIB_ITEM("MPFallbackDataInit", MPTCP_MIB_FBDATAINIT), + SNMP_MIB_ITEM("MPRemoveAddrSubDelete", MPTCP_MIB_REMADDRSUB), + SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), + SNMP_MIB_ITEM("MPJoinAlreadyFallenback", MPTCP_MIB_JOINFALLBACK), + SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), + SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), + SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), + SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), + SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), + SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("MPJoinAckMissing", MPTCP_MIB_JOINACKFAIL), + SNMP_MIB_ITEM("MPJoinAckRTO", MPTCP_MIB_JOINACKRTO), + SNMP_MIB_ITEM("MPJoinAckRexmit", MPTCP_MIB_JOINACKRXMIT), + SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), + SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), + SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), + SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), + SNMP_MIB_ITEM("DSSTrimHead", MPTCP_MIB_DSSTRIMHEAD), + SNMP_MIB_ITEM("DSSSplitTail", MPTCP_MIB_DSSSPLITTAIL), + SNMP_MIB_ITEM("DSSPurgeOldSubSegs", MPTCP_MIB_PURGEOLD), + SNMP_MIB_ITEM("AddAddrRx", MPTCP_MIB_ADDADDRRX), + SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), + SNMP_MIB_ITEM("RemAddrRx", MPTCP_MIB_REMADDRRX), + SNMP_MIB_ITEM("RemAddrTx", MPTCP_MIB_REMADDRTX), + SNMP_MIB_SENTINEL +}; + +struct workqueue_struct *mptcp_wq; +EXPORT_SYMBOL(mptcp_wq); + +/* Output /proc/net/mptcp */ +static int mptcp_pm_seq_show(struct seq_file *seq, void *v) +{ + struct tcp_sock *meta_tp; + struct sock *sk; + const struct net *net = seq->private; + int i, n = 0; + + seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode scheduler"); + seq_putc(seq, '\n'); + + for (i = 0; i < MPTCP_HASH_SIZE; i++) { + struct hlist_nulls_node *node; + rcu_read_lock_bh(); + hlist_nulls_for_each_entry_rcu(meta_tp, node, + &tk_hashtable[i], tk_table) { + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sock *meta_sk = (struct sock *)meta_tp; + struct inet_sock *isk = inet_sk(meta_sk); + + if (!mptcp(meta_tp) || !net_eq(net, sock_net(meta_sk))) + continue; + + if (capable(CAP_NET_ADMIN)) { + seq_printf(seq, "%4d: %04X %04X ", n++, + mpcb->mptcp_loc_token, + mpcb->mptcp_rem_token); + } else { + seq_printf(seq, "%4d: %04X %04X ", n++, -1, -1); + } + if (meta_sk->sk_family == AF_INET || + mptcp_v6_is_v4_mapped(meta_sk)) { + seq_printf(seq, " 0 %08X:%04X %08X:%04X ", + isk->inet_rcv_saddr, + ntohs(isk->inet_sport), + isk->inet_daddr, + ntohs(isk->inet_dport)); +#if IS_ENABLED(CONFIG_IPV6) + } else if (meta_sk->sk_family == AF_INET6) { + struct in6_addr *src = &meta_sk->sk_v6_rcv_saddr; + struct in6_addr *dst = &meta_sk->sk_v6_daddr; + seq_printf(seq, " 1 %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X", + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + ntohs(isk->inet_sport), + dst->s6_addr32[0], dst->s6_addr32[1], + dst->s6_addr32[2], dst->s6_addr32[3], + ntohs(isk->inet_dport)); +#endif + } + seq_printf(seq, " %02X %02X %08X:%08X %lu %s", + meta_sk->sk_state, mpcb->cnt_subflows, + meta_tp->write_seq - meta_tp->snd_una, + max_t(int, meta_tp->rcv_nxt - + meta_tp->copied_seq, 0), + sock_i_ino(meta_sk), mpcb->sched_ops->name); + seq_putc(seq, '\n'); + +#if 0 + // added more stats per subflow... maybe we should move this to a new file + seq_printf(seq, " snd rcv srtt mdev packets_out retrans_out snd_cwnd\n"); + + for ((sk) = (struct sock *)(mpcb)->connection_list; sk; sk = (struct sock *)tcp_sk(sk)->mptcp->next) { + //mptcp_for_each_sk(mpcb, sk) { + struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *isk_tmp = inet_sk(sk); + + seq_printf(seq, "%15llu%15llu%15u%15u%15u%15u%15u %#x %i.%i.%i.%i:%i %i.%i.%i.%i:%i\n", + tp->mptcp->bytes_snd, + tp->mptcp->bytes_rcv, + tp->srtt_us, tp->mdev_us, + tp->packets_out, + tp->retrans_out, + tp->snd_cwnd, + (unsigned int) tp, + isk_tmp->inet_rcv_saddr & 0x000000FF, + (isk_tmp->inet_rcv_saddr & 0x0000FF00)>>8, + (isk_tmp->inet_rcv_saddr & 0x00FF0000)>>16, + (isk_tmp->inet_rcv_saddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_sport), + isk_tmp->inet_daddr & 0x000000FF, + (isk_tmp->inet_daddr & 0x0000FF00)>>8, + (isk_tmp->inet_daddr & 0x00FF0000)>>16, + (isk_tmp->inet_daddr & 0xFF000000)>>24, + ntohs(isk_tmp->inet_dport) + ); + } +#endif + } + + rcu_read_unlock_bh(); + } + + return 0; +} + +static int mptcp_pm_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, mptcp_pm_seq_show); +} + +static const struct file_operations mptcp_pm_seq_fops = { + .owner = THIS_MODULE, + .open = mptcp_pm_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +static int mptcp_snmp_seq_show(struct seq_file *seq, void *v) +{ + struct net *net = seq->private; + int i; + + for (i = 0; mptcp_snmp_list[i].name != NULL; i++) + seq_printf(seq, "%-32s\t%ld\n", mptcp_snmp_list[i].name, + snmp_fold_field(net->mptcp.mptcp_statistics, + mptcp_snmp_list[i].entry)); + + return 0; +} + +static int mptcp_snmp_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, mptcp_snmp_seq_show); +} + +static const struct file_operations mptcp_snmp_seq_fops = { + .owner = THIS_MODULE, + .open = mptcp_snmp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +static int mptcp_pm_init_net(struct net *net) +{ + net->mptcp.mptcp_statistics = alloc_percpu(struct mptcp_mib); + if (!net->mptcp.mptcp_statistics) + goto out_mptcp_mibs; + +#ifdef CONFIG_PROC_FS + net->mptcp.proc_net_mptcp = proc_net_mkdir(net, "mptcp_net", net->proc_net); + if (!net->mptcp.proc_net_mptcp) + goto out_proc_net_mptcp; + if (!proc_create("mptcp", S_IRUGO, net->mptcp.proc_net_mptcp, + &mptcp_pm_seq_fops)) + goto out_mptcp_net_mptcp; + if (!proc_create("snmp", S_IRUGO, net->mptcp.proc_net_mptcp, + &mptcp_snmp_seq_fops)) + goto out_mptcp_net_snmp; +#endif + + return 0; + +#ifdef CONFIG_PROC_FS +out_mptcp_net_snmp: + remove_proc_entry("mptcp", net->mptcp.proc_net_mptcp); +out_mptcp_net_mptcp: + remove_proc_subtree("mptcp_net", net->proc_net); + net->mptcp.proc_net_mptcp = NULL; +out_proc_net_mptcp: + free_percpu(net->mptcp.mptcp_statistics); +#endif +out_mptcp_mibs: + return -ENOMEM; +} + +static void mptcp_pm_exit_net(struct net *net) +{ + remove_proc_entry("snmp", net->mptcp.proc_net_mptcp); + remove_proc_entry("mptcp", net->mptcp.proc_net_mptcp); + remove_proc_subtree("mptcp_net", net->proc_net); + free_percpu(net->mptcp.mptcp_statistics); +} + +static struct pernet_operations mptcp_pm_proc_ops = { + .init = mptcp_pm_init_net, + .exit = mptcp_pm_exit_net, +}; + +/* General initialization of mptcp */ +void __init mptcp_init(void) +{ + int i; + struct ctl_table_header *mptcp_sysctl; + + mptcp_sock_cache = kmem_cache_create("mptcp_sock", + sizeof(struct mptcp_tcp_sock), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (!mptcp_sock_cache) + goto mptcp_sock_cache_failed; + + mptcp_cb_cache = kmem_cache_create("mptcp_cb", sizeof(struct mptcp_cb), + 0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN, + NULL); + if (!mptcp_cb_cache) + goto mptcp_cb_cache_failed; + + mptcp_tw_cache = kmem_cache_create("mptcp_tw", sizeof(struct mptcp_tw), + 0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN, + NULL); + if (!mptcp_tw_cache) + goto mptcp_tw_cache_failed; + + get_random_bytes(mptcp_secret, sizeof(mptcp_secret)); + + mptcp_wq = alloc_workqueue("mptcp_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8); + if (!mptcp_wq) + goto alloc_workqueue_failed; + + for (i = 0; i < MPTCP_HASH_SIZE; i++) { + INIT_HLIST_NULLS_HEAD(&tk_hashtable[i], i); + INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_htb[i], + i + MPTCP_REQSK_NULLS_BASE); + INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_tk_htb[i], i); + } + + spin_lock_init(&mptcp_reqsk_hlock); + spin_lock_init(&mptcp_tk_hashlock); + + if (register_pernet_subsys(&mptcp_pm_proc_ops)) + goto pernet_failed; + +#if IS_ENABLED(CONFIG_IPV6) + if (mptcp_pm_v6_init()) + goto mptcp_pm_v6_failed; +#endif + if (mptcp_pm_v4_init()) + goto mptcp_pm_v4_failed; + + mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", mptcp_table); + if (!mptcp_sysctl) + goto register_sysctl_failed; + + if (mptcp_register_path_manager(&mptcp_pm_default)) + goto register_pm_failed; + + if (mptcp_register_scheduler(&mptcp_sched_default)) + goto register_sched_failed; + + pr_info("MPTCP: Stable release v0.89.0-rc"); + + mptcp_init_failed = false; + + return; + +register_sched_failed: + mptcp_unregister_path_manager(&mptcp_pm_default); +register_pm_failed: + unregister_net_sysctl_table(mptcp_sysctl); +register_sysctl_failed: + mptcp_pm_v4_undo(); +mptcp_pm_v4_failed: +#if IS_ENABLED(CONFIG_IPV6) + mptcp_pm_v6_undo(); +mptcp_pm_v6_failed: +#endif + unregister_pernet_subsys(&mptcp_pm_proc_ops); +pernet_failed: + destroy_workqueue(mptcp_wq); +alloc_workqueue_failed: + kmem_cache_destroy(mptcp_tw_cache); +mptcp_tw_cache_failed: + kmem_cache_destroy(mptcp_cb_cache); +mptcp_cb_cache_failed: + kmem_cache_destroy(mptcp_sock_cache); +mptcp_sock_cache_failed: + mptcp_init_failed = true; +} diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c index 95ff08d06562b..523c0dc1eb6ea 100644 --- a/net/mptcp/mptcp_input.c +++ b/net/mptcp/mptcp_input.c @@ -35,6 +35,10 @@ #include +// for rbs specific stuff :-( +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_sched.h" + /* is seq1 < seq2 ? */ static inline bool before64(const u64 seq1, const u64 seq2) { @@ -124,6 +128,20 @@ static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una) if (!fully_acked) break; + /* RBS specific stuff */ + if (mptcp_rbs_is_sched_used(meta_tp)) { + struct mptcp_rbs_cb* rbs_cb = mptcp_rbs_get_cb(meta_tp); + mptcp_debug("checking in mptcp_input for queue position\n"); + + if(skb == rbs_cb->queue_position) { + mptcp_debug("rbs has to correct queue position old %p\n", rbs_cb->queue_position); + mptcp_rbs_advance_send_head(meta_sk, &rbs_cb->queue_position); + mptcp_debug("rbs had to correct queue position %p\n", rbs_cb->queue_position); + } + } + mptcp_debug("unlinking skb %p from sk_send_queue\n", skb); + /* RBS specific stuff END */ + tcp_unlink_write_queue(skb, meta_sk); if (mptcp_is_data_fin(skb)) { @@ -159,6 +177,13 @@ static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una) break; } + mptcp_debug("unlinking skb %p from reinject_queue %p with reinject->next %p\n", skb, &mpcb->reinject_queue, mpcb->reinject_queue.next); + if(skb) { + mptcp_debug("skb->next = %p and skb->prev = %p\n", skb->next, skb->prev); + } else { + printk("##### skb is null\n"); + } + __skb_unlink(skb, &mpcb->reinject_queue); __kfree_skb(skb); } @@ -872,6 +897,12 @@ static int mptcp_validate_mapping(struct sock *sk, struct sk_buff *skb) return 0; } +//#define AFR_OOO_RECEIVE_VERBOSE + +/* checks all subflows ofo queues except sk if something fits */ +void mptcp_afr_all_ofo_queue(struct sock *meta_sk, struct sock *sk); + + /* @return: 0 everything is fine. Just continue processing * 1 subflow is broken stop everything * -1 this mapping has been put in the meta-receive-queue @@ -887,6 +918,10 @@ static int mptcp_queue_skb(struct sock *sk) u32 old_copied_seq = tp->copied_seq; bool data_queued = false; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo for sk %p\n", __func__, sk); +#endif + /* Have we not yet received the full mapping? */ if (!tp->mptcp->mapping_present || before(tp->rcv_nxt, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) @@ -967,7 +1002,9 @@ static int mptcp_queue_skb(struct sock *sk) * Then, kfree_skb_partial will not account the memory. */ skb_orphan(tmp1); - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo test if segment has already been received with skb->end_seq %u after meta_tp->rcv_nxt %u\n", __func__, TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt); +#endif /* This segment has already been received */ if (!after(TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt)) { __kfree_skb(tmp1); @@ -997,6 +1034,8 @@ static int mptcp_queue_skb(struct sock *sk) /* Check if this fills a gap in the ofo queue */ if (!skb_queue_empty(&meta_tp->out_of_order_queue)) mptcp_ofo_queue(meta_sk); + /* Whenever we check the ofo_queue, we check all sbf ofos */ + mptcp_afr_all_ofo_queue(meta_sk, sk); if (eaten) kfree_skb_partial(tmp1, fragstolen); @@ -1016,6 +1055,190 @@ static int mptcp_queue_skb(struct sock *sk) return data_queued ? -1 : -2; } +void afr_ofo_push(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp); + struct sock *meta_sk = mptcp_meta_sk(sk); + struct sk_buff *tmp, *skb; + + if(!mptcp_ooo_opt) { + return; + } + + if(tp->out_of_order_queue.qlen > 0) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ooo checking sbf %p out_of_order_queue.qlen %u\n", __func__, sk, tp->out_of_order_queue.qlen); +#endif + /* + * traverse the whole queue + * + * there might be lower data_seq with higher subflow_seq: + * repeat loop whenever we match something and we saw out of order data_seq + */ + while(true) { + bool out_of_order_data_seq = false; + u32 last_data_seq = 0; + bool matched = false; + + skb_queue_walk_safe(&tp->out_of_order_queue, skb, tmp) + { + u32 *ptr; + u32 data_seq, sub_seq, data_len; + +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ooo packet %p with seq %u and end_seq %u and len %u\n", __func__, skb, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq);; +#endif + + /* No mapping here? Exit - for a first version we only accept packets with mapping */ + /* TODO: check if old mapping still maps this sequence number */ + if (!mptcp_is_data_seq(skb)) { + // no ifdef, at this is really important! + printk("%s afr__ooo packet %p in ofo has no mapping... go to next skb...\n", __func__, skb); + continue; + } + + ptr = mptcp_skb_set_data_seq(skb, &data_seq, NULL); + ptr++; + sub_seq = get_unaligned_be32(ptr) + tp->mptcp->rcv_isn; + ptr++; + data_len = get_unaligned_be16(ptr); +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("packet %p in ofo queue with data_seq %u sub_seq %u and data_len %u\n", skb, data_seq, sub_seq, data_len); + printk("comparing meta_tp->rcv_nxt %u and data_seq %u\n", meta_tp->rcv_nxt, data_seq); +#endif + if(!before(last_data_seq, data_seq)) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("found out of order data_seq in the sbf ooo queue\n"); +#endif + out_of_order_data_seq = true; + } + + if (before(meta_tp->rcv_nxt, data_seq)) { + /* + * Seg's have to go to the meta-ofo-queue + * + * We avoid this, as most packets in heterogeneous networks would + * go into meta-ofo-queue. Instead, we check again whenever we get new packets. + */ + + // TODO really most packets? actually only those which are ooo in both layers. + } else { + /* Ready for the meta-rcv-queue */ + bool eaten = false; + bool fragstolen = false; + struct sk_buff* skb_cpy = NULL; + + /* Design decision: we keep all packets in the + * subflow receive queue (no unlinking) to keep + * changes minimal. + */ + u32 data_end_seq = data_seq + data_len; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("comparing data_end_seq %u and meta_tp->rcv_nxt %u\n", data_end_seq, meta_tp->rcv_nxt); +#endif + + if (!after(data_end_seq , meta_tp->rcv_nxt)) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("packet %p with meta_end_seq %u is already fully in meta_receive_queue\n", skb, data_end_seq); +#endif + continue; + } + + // I did not check the implementation, but packetdrill 3 checks that partially + // overlapping numbers work + + // design decision: copy the packet, so the old subflow logic still has its own copy + + skb_cpy = pskb_copy_for_clone(skb, GFP_ATOMIC); +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("ok, it will fit in the meta, so we COPY it to be sure... old %p to new skb_cpy %p\n", skb, skb_cpy); +#endif + + if(!skb_cpy) { + // no space available... actually, it is not so important + // but no ifdef here, as this is interesting! + printk("%s failed copying packet\n", __func__); + return; + } + + matched = true; + + // we have to recalculate the seq numbers... stolen from prepare_skb + /* Adapt data-seq's to the packet itself. We kinda transform the + * dss-mapping to a per-packet granularity. This is necessary to + * correctly handle overlapping mappings coming from different + * subflows. Otherwise it would be a complete mess. + */ + TCP_SKB_CB(skb_cpy)->seq = data_seq; + TCP_SKB_CB(skb_cpy)->end_seq = data_end_seq; + +// printk("%s matched ooo packet for meta_sk %p on sbf %p with sbf seq %u end_seq %u and meta seq %u end_seq %u with sbf.rq.count %u and tp->rcv_nxt %u\n", +// __func__, meta_sk, sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, data_seq, data_end_seq, tp->out_of_order_queue.qlen, tp->rcv_nxt); + mptcp_ooo_number_matches++; + + /* Is direct copy possible ? */ + if (TCP_SKB_CB(skb_cpy)->seq == meta_tp->rcv_nxt && + meta_tp->ucopy.task == current && + meta_tp->copied_seq == meta_tp->rcv_nxt && + meta_tp->ucopy.len && sock_owned_by_user(meta_sk)) { + +// printk("%s uses direct copy for skb_cpy %p\n", __func__, skb_cpy); + eaten = mptcp_direct_copy(skb_cpy, meta_sk); +// printk("%s direct copy returned %u\n", __func__, eaten); + } + + if (meta_tp->mpcb->in_time_wait) { /* In time-wait, do not receive data */ + printk("%s eats skb as we are in time wait\n", __func__); + eaten = 1; + } + + if (!eaten) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("we put skb_cpy %p into the meta receive queue\n", skb_cpy); +#endif + eaten = tcp_queue_rcv(meta_sk, skb_cpy, 0, &fragstolen); + } + + meta_tp->rcv_nxt = TCP_SKB_CB(skb_cpy)->end_seq; + // TODO do we need this? + //mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt); + + /* Check if this fills a gap in the ofo queue */ + if (!skb_queue_empty(&meta_tp->out_of_order_queue)) + mptcp_ofo_queue(meta_sk); + /* Whenever we check the ofo_queue, we check all sbf ofos */ + mptcp_afr_all_ofo_queue(meta_sk, sk); + } + } + + if(!matched) + break; + if(!out_of_order_data_seq) + break; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s matched packet and saw out of order data seq... iterate one more time\n", __func__); +#endif + } + } +} + +/* checks all subflows ofo queues except sk if something fits */ +void mptcp_afr_all_ofo_queue(struct sock *meta_sk, struct sock *not_sk) { + struct sock *sk; + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + + if(!mptcp_ooo_opt) { + return; + } + + mptcp_for_each_sk(mpcb, sk) { + if(sk == not_sk) + continue; + if(tcp_sk(sk)->out_of_order_queue.qlen > 0) { + afr_ofo_push(sk); + } + } +} + void mptcp_data_ready(struct sock *sk) { struct sock *meta_sk = mptcp_meta_sk(sk); @@ -1035,30 +1258,50 @@ void mptcp_data_ready(struct sock *sk) tcp_sk(sk)->copied_seq = tcp_sk(sk)->rcv_nxt; goto exit; } - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo for sk %p with sk_receive_queue.qlen %u called by %pS\n", __func__, sk, sk->sk_receive_queue.qlen, __builtin_return_address(0)); +#endif /* Iterate over all segments, detect their mapping (if we don't have * one yet), validate them and push everything one level higher. */ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { int ret; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo for sk %p walking skb %p with seq %u and end_seq %u and diff_seq %u\n", __func__, sk, skb, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq); +#endif /* Pre-validation - e.g., early fallback */ ret = mptcp_prevalidate_skb(sk, skb); if (ret < 0) goto restart; else if (ret > 0) break; - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo prevalidation of skb %p successfull\n", __func__, skb); +#endif /* Set the current mapping */ ret = mptcp_detect_mapping(sk, skb); if (ret < 0) goto restart; else if (ret > 0) break; - +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo set mapping of skb %p successfull\n", __func__, skb); +#endif /* Validation */ if (mptcp_validate_mapping(sk, skb) < 0) goto restart; +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo validate mapping of skb %p successfull\n", __func__, skb); +#endif + // some statistics + { + struct tcp_sock *tp = tcp_sk(sk); + tp->mptcp->bytes_rcv += skb->len; + } +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo pushing a level higher\n", __func__); +#endif /* Push a level higher */ ret = mptcp_queue_skb(sk); if (ret < 0) { @@ -1072,14 +1315,30 @@ void mptcp_data_ready(struct sock *sk) } } + /* in the current implementation, + * out of order packets of the subflow are not pushed + * to the meta socket. + * + * There are 2 implementation alternatives: + * 1. push all subflow ofos to the meta socket. + * This implies a lot of ofos which would be solved by... + * 2. only push a subflow ofo if it fits into the + * meta socket receive queue. + */ + afr_ofo_push(sk); + exit: if (tcp_sk(sk)->close_it) { tcp_send_ack(sk); tcp_sk(sk)->ops->time_wait(sk, TCP_TIME_WAIT, 0); } - if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD)) + if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD)) { +#ifdef AFR_OOO_RECEIVE_VERBOSE + printk("%s afr_ofo calls meta_sk->data_ready for meta_sk %p\n", __func__, meta_sk); +#endif meta_sk->sk_data_ready(meta_sk); + } } @@ -1414,6 +1673,9 @@ void mptcp_fin(struct sock *meta_sk) return; } +void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, + struct sock *sk, int clone_it); + static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) { struct tcp_sock *meta_tp = tcp_sk(meta_sk); @@ -1423,11 +1685,30 @@ static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) return; tcp_for_write_queue(skb, meta_sk) { - if (skb == tcp_send_head(meta_sk)) + if (skb == tcp_send_head(meta_sk)) { + printk("%s breaks at skb %p = sk_send_head\n", __func__, skb); break; + } - if (mptcp_retransmit_skb(meta_sk, skb)) - return; + printk("mptcp_xmit_retransmit_queue retransmits skb %p\n", skb); + + // rbs: check if we already sent this skb (not the data, the skb) + if (!tcp_skb_pcount(skb)) { + /* this is some kind of heritage... we put it in the reinject queue some days ago, but + actually this situation should not occur... + the current solution does not crash, but should not happen... therefore, print it */ + printk("%s wants to retransmit skb %p with seq %u but no pcount... skip it\n", __func__, skb, TCP_SKB_CB(skb)->seq); + + // put it in reinjection queue would be "correct", however, than we would be out of order... + // maybe we will directly xmit it here... + // maybe we will remove it from sending queue, and continue without cloning... + // mptcp_debug("rbs detected sk_send_head before unsent packets for retransmission of skb %p, put copy into reinjection queue\n", skb); + // __mptcp_reinject_data(skb, meta_sk, NULL, 1); + } else { + // was already sent, so we can use retransmit... + if (mptcp_retransmit_skb(meta_sk, skb)) + return; + } if (skb == tcp_write_queue_head(meta_sk)) inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, @@ -1439,14 +1720,28 @@ static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) /* Handle the DATA_ACK */ static void mptcp_data_ack(struct sock *sk, const struct sk_buff *skb) { - struct sock *meta_sk = mptcp_meta_sk(sk); + /*struct sock *meta_sk = mptcp_meta_sk(sk); struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - u32 prior_snd_una = meta_tp->snd_una; + u32 prior_snd_una = meta_tp->snd_una;*/ + struct sock *meta_sk; + struct tcp_sock *meta_tp; + struct tcp_sock *tp; + struct tcp_skb_cb *tcb; + u32 prior_snd_una; + int prior_packets; u32 nwin, data_ack, data_seq; u16 data_len = 0; + meta_sk = mptcp_meta_sk(sk); + meta_tp = tcp_sk(meta_sk); + tp = tcp_sk(sk); + tcb = TCP_SKB_CB(skb); + prior_snd_una = meta_tp->snd_una; + + + /* A valid packet came in - subflow is operational again */ tp->pf = 0; @@ -1524,12 +1819,18 @@ static void mptcp_data_ack(struct sock *sk, const struct sk_buff *skb) inet_csk(meta_sk)->icsk_probes_out = 0; meta_tp->rcv_tstamp = tcp_time_stamp; prior_packets = meta_tp->packets_out; + + if (meta_tp->mpcb->sched_ops->update_stats) + meta_tp->mpcb->sched_ops->update_stats(sk, skb, 0, 2); + if (!prior_packets) goto no_queue; meta_tp->snd_una = data_ack; + mptcp_debug("now going to mptcp_clean_rtx_queue for meta_sk %p with prior_snd_una %u\n", meta_sk, prior_snd_una); mptcp_clean_rtx_queue(meta_sk, prior_snd_una); + mptcp_debug("we are back from cleaning\n"); /* We are in loss-state, and something got acked, retransmit the whole * queue now! @@ -2146,6 +2447,9 @@ int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, { struct tcp_sock *tp = tcp_sk(sk); struct mptcp_options_received *mopt = &tp->mptcp->rx_opt; + + if (tp->mpcb->sched_ops->update_stats) + tp->mpcb->sched_ops->update_stats(sk, skb, 0, 2); if (tp->mpcb->infinite_mapping_rcv || tp->mpcb->infinite_mapping_snd) return 0; diff --git a/net/mptcp/mptcp_ipv6.c b/net/mptcp/mptcp_ipv6.c index bd57015399498..9daa69964f921 100644 --- a/net/mptcp/mptcp_ipv6.c +++ b/net/mptcp/mptcp_ipv6.c @@ -208,7 +208,7 @@ int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) &tcp_hashinfo, &ip6h->saddr, th->source, &ip6h->daddr, ntohs(th->dest), - inet6_iif(skb)); + tcp_v6_iif(skb)); if (!sk) { kfree_skb(skb); @@ -284,7 +284,7 @@ int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) */ req = inet6_csk_search_req(meta_sk, th->source, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, inet6_iif(skb)); + &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { inet_csk_reqsk_queue_drop(meta_sk, req); diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c index 3a6b675e8983c..f72f3b962af28 100644 --- a/net/mptcp/mptcp_output.c +++ b/net/mptcp/mptcp_output.c @@ -35,6 +35,7 @@ #include #include #include +#include "mptcp_rbs_queue.h" static const int mptcp_dss_len = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN + @@ -97,10 +98,23 @@ static bool mptcp_is_reinjected(const struct sk_buff *skb) static void mptcp_find_and_set_pathmask(const struct sock *meta_sk, struct sk_buff *skb) { struct sk_buff *skb_it; - + u32 counter = 0; skb_it = tcp_write_queue_head(meta_sk); tcp_for_write_queue_from(skb_it, meta_sk) { + counter++; + + if(counter == 1000) { + /*printk("%s called more than %u times with qlen %u for seq %u... we break at 100 000\n", __func__, counter, meta_sk->sk_write_queue.qlen, TCP_SKB_CB(skb)->seq); + printk("Caller is %pS\n", __builtin_return_address(0)); + printk("Callerer is %pS\n", __builtin_return_address(1));*/ + } + + if(counter > 1000 * 100) { + printk("ProgMP: %s breaking with %u and qlen %u\n", __func__, counter, meta_sk->sk_write_queue.qlen); + break; + } + if (skb_it == tcp_send_head(meta_sk)) break; @@ -114,7 +128,7 @@ static void mptcp_find_and_set_pathmask(const struct sock *meta_sk, struct sk_bu /* Reinject data from one TCP subflow to the meta_sk. If sk == NULL, we are * coming from the meta-retransmit-timer */ -static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, +void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, struct sock *sk, int clone_it) { struct sk_buff *skb, *skb1; @@ -123,11 +137,27 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk u32 seq, end_seq; if (clone_it) { + mptcp_debug("%s going to clone %p for rq %p\n", __func__, orig_skb, &mpcb->reinject_queue); /* pskb_copy is necessary here, because the TCP/IP-headers * will be changed when it's going to be reinjected on another * subflow. */ + skb = pskb_copy_for_clone(orig_skb, GFP_ATOMIC); + + /* afr added this as the cloned skb might be null and comparing rbs leads to null pointer otherwise */ + if (unlikely(!skb)) { + printk("%s failed cloning sb %p\n", __func__, orig_skb); + return; + } + + if (TCP_SKB_CB(orig_skb)->mptcp_rbs.user != TCP_SKB_CB(skb)->mptcp_rbs.user) { + printk("cloning skb %p with rbs %i leads to rbs %i\n", orig_skb, TCP_SKB_CB(orig_skb)->mptcp_rbs.user, TCP_SKB_CB(skb)->mptcp_rbs.user); + BUG(); + } + +// mptcp_debug("%s cloned %p and got %p with not_in_queue %u and not_in_queue %u\n", __func__, orig_skb, skb, +// TCP_SKB_CB(orig_skb)->mptcp_rbs_flags_not_in_queue, TCP_SKB_CB(skb)->mptcp_rbs_flags_not_in_queue); } else { __skb_unlink(orig_skb, &sk->sk_write_queue); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); @@ -138,6 +168,9 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk if (unlikely(!skb)) return; + /* now it is in the queue again */ + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 0; + if (sk && !mptcp_reconstruct_mapping(skb)) { __kfree_skb(skb); return; @@ -145,9 +178,14 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk skb->sk = meta_sk; + /* Reset subflow-specific TCP control-data */ + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tcp_flags &= (TCPHDR_ACK | TCPHDR_PSH); + /* If it reached already the destination, we don't have to reinject it */ if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { __kfree_skb(skb); + mptcp_debug("do not have to retransmit skb %p, reached already\n", skb); return; } @@ -183,11 +221,12 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk /* We need to find out the path-mask from the meta-write-queue * to properly select a subflow. */ - mptcp_find_and_set_pathmask(meta_sk, skb); + mptcp_find_and_set_pathmask(meta_sk, skb); // why is this needed? /* If it's empty, just add */ if (skb_queue_empty(&mpcb->reinject_queue)) { skb_queue_head(&mpcb->reinject_queue, skb); + mptcp_debug("added skb %p as first packet to reinjection_queue %p with new queue size %u intern %u\n", skb, &mpcb->reinject_queue, skb_queue_len(&mpcb->reinject_queue), mpcb->reinject_queue.qlen ); return; } @@ -214,6 +253,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Don't reinject */ + mptcp_debug("do not reinject skb %p, overlapping and fully present\n", skb); __kfree_skb(skb); return; } @@ -224,10 +264,13 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1); } } - if (!skb1) + if (!skb1) { __skb_queue_head(&mpcb->reinject_queue, skb); - else + mptcp_debug("added skb %p as queue head for rq %p\n", skb, &mpcb->reinject_queue); + } else { __skb_queue_after(&mpcb->reinject_queue, skb1, skb); + mptcp_debug("added skb %p after other skb %p in queue with queue size %i for rq %p\n", skb, skb1, skb_queue_len(&mpcb->reinject_queue), &mpcb->reinject_queue); + } /* And clean segments covered by new one as whole. */ while (!skb_queue_is_last(&mpcb->reinject_queue, skb)) { @@ -265,8 +308,11 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) (tcb->tcp_flags & TCPHDR_FIN && mptcp_is_data_fin(skb_it) && !skb_it->len)) continue; - if (mptcp_is_reinjected(skb_it)) + mptcp_debug("check skb %p for reinjection\n", skb_it); + if (mptcp_is_reinjected(skb_it)) { + mptcp_debug("skb %p was already in reinjection queue, no need for reinjection\n", skb_it); continue; + } tcb->mptcp_flags |= MPTCP_REINJECT; __mptcp_reinject_data(skb_it, meta_sk, sk, clone_it); @@ -276,6 +322,7 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) /* If sk has sent the empty data-fin, we have to reinject it too. */ if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 && TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index)) { + //printk("%s reinjects the empty data-fin\n", __func__); __mptcp_reinject_data(skb_it, meta_sk, NULL, 1); } @@ -434,6 +481,8 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject) MPTCPHDR_SEQ64_INDEX : 0); subskb = pskb_copy_for_clone(skb, GFP_ATOMIC); + mptcp_debug("mptcp_skb_entail copied skb %p to subskb %p, skb->next %p\n", skb, subskb, skb->next); + if (!subskb) return false; @@ -524,6 +573,16 @@ static int mptcp_fragment(struct sock *meta_sk, struct sk_buff *skb, u32 len, diff = skb->data_len; old_factor = tcp_skb_pcount(skb); + //mptcp_debug("mptcp_fragment for meta_sk %p with skb %p of length %u with reinject %u\n", meta_sk, skb, len, reinject); + + if(!skb) { + printk("mptcp_fragement found skb as null with reinject %i\n", reinject); + } + + if(!skb->next) { + printk("mptcp_fragement found skb->next for skb %p as null with reinject %i\n", skb, reinject); + } + /* The mss_now in tcp_fragment is used to set the tso_segs of the skb. * At the MPTCP-level we do not care about the absolute value. All we * care about is that it is set to 1 for accurate packets_out @@ -576,6 +635,7 @@ int mptcp_write_wakeup(struct sock *meta_sk) return -1; skb = tcp_send_head(meta_sk); + mptcp_debug("%s called by %pS for skb %p\n", __func__, __builtin_return_address(0), skb); if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(meta_tp))) { unsigned int mss; @@ -646,6 +706,19 @@ int mptcp_write_wakeup(struct sock *meta_sk) } } +bool is_tp_in_connection_list2(struct mptcp_cb *mpcb, struct tcp_sock* tp) { + struct tcp_sock* tmp = mpcb->connection_list; + + while(tmp) { + if(tmp == tp) { + return true; + } + + tmp = tmp->mptcp->next; + } + return false; +} + bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp) { @@ -657,31 +730,130 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, unsigned int sublimit; __u32 path_mask = 0; +#define RBS_DEBUG + +#ifdef RBS_DEBUG + struct sk_buff* skb_last_iteration = NULL; + unsigned int iteration_count = 0; +#endif + + mptcp_debug("rbs before loop meta_sk->send_head = %p and queue_len(sk_write_queue) = %5i\n", tcp_send_head(meta_sk), skb_queue_len(&meta_sk->sk_write_queue)); + while ((skb = mpcb->sched_ops->next_segment(meta_sk, &reinject, &subsk, &sublimit))) { unsigned int limit; +#ifdef RBS_DEBUG + if(iteration_count > 1000 * 1000) { // once found a correct execution with more than 100 + printk("ProgMP aborts iterations in %s after %u rounds\n", __func__, iteration_count); + BUG(); + } + + if(skb_last_iteration == skb) { + iteration_count++; + } else { + skb_last_iteration = skb; + iteration_count = 0; + } +#endif + /* this fix is important, but should happen somewhere else... we do not want to overcount packets_out in + * tcp_event_new_data_sent, which is only called if reinject == 0... + */ + if(reinject==0 && TCP_SKB_CB(skb)->path_mask) { + reinject = -1; + } + +// mptcp_debug("rbs in loop meta_sk->send_head = %p and queue_len(sk_write_queue) = %5i with skb %p and path_mask %u\n", +// tcp_send_head(meta_sk), skb_queue_len(&meta_sk->sk_write_queue), skb, TCP_SKB_CB(skb)->path_mask); + subtp = tcp_sk(subsk); + + /* + * rbs might return invalid subsk from its open actions + * + * before we use the subsk, ensure that it is still valid + * this test leads to an undefined symbol if executed in the scheduler, + * but would make more sense at this place + */ + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + if(!is_tp_in_connection_list2(mpcb, (struct tcp_sock*) subsk)) { +// mptcp_debug("rbs recovers skb %p from outdated subflow %p\n", skb, subsk); + /* + * Actually, this might be a semantic problematic, as packets in the open action might overtake this packet + * + * We have to clone the skb for the reinjection queue, as it will remain in the sending_queue + */ + __mptcp_reinject_data(skb, meta_sk, NULL, 1); +// mptcp_debug("added packet %p as first packet to reinject queue with new queue size %i, skb->prev = %p, skb->next = %p\n", skb, skb_queue_len(&mpcb->reinject_queue), skb->prev, skb->next ); + + continue; + } + } + mss_now = tcp_current_mss(subsk); if (reinject == 1) { - if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { - /* Segment already reached the peer, take the next one */ - __skb_unlink(skb, &mpcb->reinject_queue); - __kfree_skb(skb); - continue; + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + /* + * rbs manages the queue by itself, but for debugging purposes, we might want to check again + */ +#ifdef RBS_DEBUG + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { + printk("ProgMP: found a reinjected skb %p with end_seq %u < snd_una %u in %s ... it was already acknowledged\n", skb, TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una, __func__); + BUG(); + } +#endif + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { + /* Segment already reached the peer, take the next one */ + __skb_unlink(skb, &mpcb->reinject_queue); + __kfree_skb(skb); + continue; + } + } + } + + /* + * RBS copied from sched, without this coding, the fragmentation call later sometimes crashes + */ + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + u32 max_segs; + u16 gso_max_segs = subsk->sk_gso_max_segs; + if (!gso_max_segs) /* No gso supported on the subflow's NIC */ + gso_max_segs = 1; + max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); + if (!max_segs) { +// mptcp_debug("RBS0 recover ... could not send skb %p\n", skb); + //if(mpcb->sched_ops->recover_skb) { + // mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + //} + //break; +// mptcp_debug("%s temporarly removed this to check what happens when we send more than cwnd\n", __func__); } } + /* + * RBS copied from sched end + */ /* If the segment was cloned (e.g. a meta retransmission), * the header must be expanded/copied so that there is no * corruption of TSO information. */ - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) { +// mptcp_debug("RBS1 recover ... could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } - if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) + if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) { +// mptcp_debug("RBS2 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } /* Force tso_segs to 1 by using UINT_MAX. * We actually don't care about the exact number of segments @@ -701,8 +873,13 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, */ if (unlikely(!tcp_nagle_test(meta_tp, skb, mss_now, (tcp_skb_is_last(meta_sk, skb) ? - nonagle : TCP_NAGLE_PUSH)))) + nonagle : TCP_NAGLE_PUSH)))) { +// mptcp_debug("RBS3 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } limit = mss_now; /* skb->len > mss_now is the equivalent of tso_segs > 1 in @@ -725,13 +902,40 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, if (sublimit) limit = min(limit, sublimit); +#ifdef FRAGMENT_BUG_DBG + if(!skb) { +// printk("mptcp_next_segment found before fragement skb as null with reinject %i\n", reinject); + } + + if(!skb->next) { +// printk("mptcp_next_segment found before fragment skb->next for skb %p as null with reinject %i\n", skb, reinject); + } +#endif if (skb->len > limit && - unlikely(mptcp_fragment(meta_sk, skb, limit, gfp, reinject))) + unlikely(mptcp_fragment(meta_sk, skb, limit, gfp, reinject))) { +// mptcp_debug("RBS4 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } - if (!mptcp_skb_entail(subsk, skb, reinject)) + if (!mptcp_skb_entail(subsk, skb, reinject)) { +// mptcp_debug("RBS5 could not send skb %p\n", skb); + if(mpcb->sched_ops->recover_skb) { + mpcb->sched_ops->recover_skb(meta_sk, subsk, skb, reinject); + } break; + } + + if(mpcb->sched_ops->update_stats) { + mpcb->sched_ops->update_stats(subsk, skb, skb->len, 0); + } + + /* Statisitics for the web */ + subtp->mptcp->bytes_snd += skb->len; + /* Nagle is handled at the MPTCP-layer, so * always push on the subflow */ @@ -749,14 +953,29 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, tcp_minshall_update(meta_tp, mss_now, skb); if (reinject > 0) { - __skb_unlink(skb, &mpcb->reinject_queue); - kfree_skb(skb); + if(mpcb->sched_ops->name[0] == 'r' && mpcb->sched_ops->name[1] == 'b' && mpcb->sched_ops->name[2] == 's') { + /* + * rbs decides about unlinking, and always returns a copy of the skb if called with top, + * thus, calling free is safe at this position + */ + + // TODO a unlinked packet might cause problems in fragement, as fragement assumes the packet to be in a queue! + //kfree_skb(skb); +// mptcp_debug("%s pushed reinjected %p with next %p and qlen %u\n", +// __func__, skb, skb->next, mpcb->reinject_queue.qlen); + } else { + __skb_unlink(skb, &mpcb->reinject_queue); + kfree_skb(skb); + } } if (push_one) break; } +// mptcp_debug("rbs after loop meta_sk->send_head = %p and queue_len(sk_write_queue) = %5i and packets_out %u\n", +// tcp_send_head(meta_sk), skb_queue_len(&meta_sk->sk_write_queue), meta_tp->packets_out); + mptcp_for_each_sk(mpcb, subsk) { subtp = tcp_sk(subsk); @@ -1216,7 +1435,13 @@ void mptcp_send_fin(struct sock *meta_sk) */ mss_now = mptcp_current_mss(meta_sk); - if (tcp_send_head(meta_sk) != NULL) { + if(tcp_send_head(meta_sk) != NULL && skb == NULL) { + printk("ProgMP would have crashed in %s as send_head is not in write queue. how can this happen?\n", __func__); + } + + if (tcp_send_head(meta_sk) != NULL && + skb != NULL /* afr: ist das wirklich ein fix? ohne das hier gibt es manchmal am Ende einen absturz, weil der send_head auf etwas zeigt, aber die queue ler ist.*/) { + TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; meta_tp->write_seq++; @@ -1385,6 +1610,8 @@ int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) unsigned int limit, mss_now; int err = -1; + mptcp_debug("retransmit skb %p for meta_sk %p with skb->next = %p and skb->prev = %p\n", skb, meta_sk, skb->next, skb->prev); + /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. * @@ -1466,7 +1693,19 @@ void mptcp_meta_retransmit_timer(struct sock *meta_sk) struct tcp_sock *meta_tp = tcp_sk(meta_sk); struct mptcp_cb *mpcb = meta_tp->mpcb; struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); - int err; + int err = 0; + +/* mptcp_debug("######### afr meta_retransmit timer meta_sk->send_head %p , meta_sk->sk_write_queue-next %p, reinject_queue len %i, first packet in reinject queue %p called by %pS\n", + meta_sk->sk_send_head, + meta_sk->sk_write_queue.next, + skb_queue_len(&meta_tp->mpcb->reinject_queue), + skb_peek(&meta_tp->mpcb->reinject_queue), + __builtin_return_address(0));*/ + + if(tcp_write_queue_empty(meta_sk)) { + printk("ProgMP: abort meta retransmit as write queue is empty... why is this triggered at all?"); + return; + } /* In fallback, retransmission is handled at the subflow-level */ if (!meta_tp->packets_out || mpcb->infinite_mapping_snd) @@ -1503,7 +1742,17 @@ void mptcp_meta_retransmit_timer(struct sock *meta_sk) return; } - mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + mptcp_debug("meta_retransmit_timer wants to retransmit skb %p", tcp_write_queue_head(meta_sk)); + /* rbs: check if we already sent this skb (not the data, the skb) */ + if(!tcp_skb_pcount(tcp_write_queue_head(meta_sk))) { + // put it in reinjection queue would be "correct", however, than we would be out of order... + // maybe we will directly xmit it here... + // maybe we will remove it from sending queue, and continue without cloning... + mptcp_debug("rbs detected sk_send_head before unsent packets for retransmission of skb %p, put copy into reinjection queue\n", tcp_write_queue_head(meta_sk)); + __mptcp_reinject_data(tcp_write_queue_head(meta_sk), meta_sk, NULL, 1); + } else { + mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + } goto out_reset_timer; } @@ -1515,7 +1764,21 @@ void mptcp_meta_retransmit_timer(struct sock *meta_sk) meta_icsk->icsk_ca_state = TCP_CA_Loss; - err = mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + mptcp_debug("meta_retransmit_timer at second position wants to retransmit skb %p", tcp_write_queue_head(meta_sk)); + // TODO this code is just a copy of the stuff some lines above + /* rbs: check if we already sent this skb (not the data, the skb) */ + if(!tcp_skb_pcount(tcp_write_queue_head(meta_sk))) { + // put it in reinjection queue would be "correct", however, than we would be out of order... + // maybe we will directly xmit it here... + // maybe we will remove it from sending queue, and continue without cloning... + mptcp_debug("rbs detected sk_send_head before unsent packets for retransmission of skb %p at second position, put copy into reinjection queue\n", tcp_write_queue_head(meta_sk)); + // TODO jetzt beim zweiten draufschauen ist mir nicht mehr klar, warum wir das drin haben... wie kann write queue head ... naja + if(tcp_write_queue_head(meta_sk)) + __mptcp_reinject_data(tcp_write_queue_head(meta_sk), meta_sk, NULL, 1); + } else { + err = mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk)); + } + if (err > 0) { /* Retransmission failed because of local congestion, * do not backoff. @@ -1578,6 +1841,12 @@ void mptcp_sub_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + /*printk("Sub retransmit Caller is %pS\n", __builtin_return_address(0)); + printk("Callerer is %pS\n", __builtin_return_address(1)); + mptcp_debug("######### sub_restransmit_timer meta_sk->send_head %p , reinject_queue len %i and queue %p and first entry %p called by %pS\n", + sk->sk_send_head, skb_queue_len(&tp->mpcb->reinject_queue), &tp->mpcb->reinject_queue, + tp->mpcb->reinject_queue.next, __builtin_return_address(0));*/ + tcp_retransmit_timer(sk); if (!tp->fastopen_rsk) { diff --git a/net/mptcp/mptcp_rbs_action.c b/net/mptcp/mptcp_rbs_action.c new file mode 100644 index 0000000000000..05c6b711fd87c --- /dev/null +++ b/net/mptcp/mptcp_rbs_action.c @@ -0,0 +1,44 @@ +#include "mptcp_rbs_action.h" +#include +#include +#include + +void mptcp_rbs_action_new(struct mptcp_rbs_actions *actions, bool high_priority, + enum mptcp_rbs_action_kind kind, struct tcp_sock *sbf, + struct sk_buff *skb, bool reinject) +{ + int i; + struct mptcp_rbs_action *action = NULL; + + /* Check if there is place in the static array */ + for (i = 0; i < STATIC_ACTIONS_NUM; ++i) { + if (!actions->static_actions[i].skb) { + action = &actions->static_actions[i]; + break; + } + } + + if (!action) + action = kmalloc(sizeof(struct mptcp_rbs_action), GFP_ATOMIC); + + action->next = NULL; + action->kind = kind; + action->sbf = sbf; + action->skb = skb; + action->end_seq = TCP_SKB_CB(skb)->end_seq; + action->reinject = reinject; + + if (high_priority) { + if (actions->first) + action->next = actions->first; + else + actions->last = action; + actions->first = action; + } else { + if (actions->last) + actions->last->next = action; + else + actions->first = action; + actions->last = action; + } +} diff --git a/net/mptcp/mptcp_rbs_action.h b/net/mptcp/mptcp_rbs_action.h new file mode 100644 index 0000000000000..ef8edffb8a78b --- /dev/null +++ b/net/mptcp/mptcp_rbs_action.h @@ -0,0 +1,60 @@ +#ifndef _MPTCP_RBS_ACTION_H +#define _MPTCP_RBS_ACTION_H + +#include + +struct mptcp_rbs_cb; +struct sk_buff; +struct tcp_sock; + +enum mptcp_rbs_action_kind { ACTION_KIND_PUSH, ACTION_KIND_DROP }; + +/* Action during evaluation */ +struct mptcp_rbs_action { + struct mptcp_rbs_action *next; + enum mptcp_rbs_action_kind kind; + struct tcp_sock *sbf; + struct sk_buff *skb; + u32 end_seq; + bool reinject; +}; + +#define STATIC_ACTIONS_NUM 10 + +/* Multiple actions */ +struct mptcp_rbs_actions { + struct mptcp_rbs_action static_actions[STATIC_ACTIONS_NUM]; + struct mptcp_rbs_action *first; + struct mptcp_rbs_action *last; +}; + +#define FOREACH_ACTION(actions, kind_, sbf_, skb_, end_seq_, reinject_, cmds) \ + do { \ + while ((actions)->first) { \ + struct mptcp_rbs_action *__cur = (actions)->first; \ + kind_ = __cur->kind; \ + sbf_ = __cur->sbf; \ + skb_ = __cur->skb; \ + end_seq_ = __cur->end_seq; \ + reinject_ = __cur->reinject; \ + \ + (actions)->first = (actions)->first->next; \ + if (!(actions)->first) \ + (actions)->last = NULL; \ + if (__cur < &(actions)->static_actions[0] || \ + __cur > \ + &(actions) \ + ->static_actions[STATIC_ACTIONS_NUM - 1]) \ + kfree(__cur); \ + else \ + __cur->skb = NULL; \ + \ + cmds; \ + } \ + } while (0) + +void mptcp_rbs_action_new(struct mptcp_rbs_actions *actions, bool high_priority, + enum mptcp_rbs_action_kind kind, struct tcp_sock *sbf, + struct sk_buff *skb, bool reinject); + +#endif diff --git a/net/mptcp/mptcp_rbs_cfg.c b/net/mptcp/mptcp_rbs_cfg.c new file mode 100644 index 0000000000000..708e967ddba39 --- /dev/null +++ b/net/mptcp/mptcp_rbs_cfg.c @@ -0,0 +1,230 @@ +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" +#include + +void mptcp_rbs_cfg_block_traverse(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + if (block->next) + mptcp_rbs_cfg_block_traverse(block->next, list); + if (block->next_else) + mptcp_rbs_cfg_block_traverse(block->next_else, list); +} + +void mptcp_rbs_cfg_block_append(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_smt *first_smt) +{ + struct mptcp_rbs_smt *smt; + + if (!block->first_smt) + block->first_smt = first_smt; + else { + smt = block->first_smt; + + while (smt->next) { + smt = smt->next; + } + + smt->next = first_smt; + } +} + +void mptcp_rbs_cfg_block_free(struct mptcp_rbs_cfg_block *block) +{ + if (block->first_smt) + mptcp_rbs_smts_free(block->first_smt); + MPTCP_RBS_VALUE_FREE(block->condition); + kfree(block); +} + +static void mptcp_rbs_cfg_free_helper(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + if (block->next) + mptcp_rbs_cfg_free_helper(block->next, list); + if (block->next_else) + mptcp_rbs_cfg_free_helper(block->next_else, list); +} + +void mptcp_rbs_cfg_blocks_free(struct mptcp_rbs_cfg_block *first_block) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block *block; + + INIT_BLOCK_LIST(&list); + mptcp_rbs_cfg_free_helper(first_block, &list); + + FOREACH_BLOCK(&list, block, mptcp_rbs_cfg_block_free(block)); + FREE_BLOCK_LIST(&list); +} + +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_block_clone( + const struct mptcp_rbs_cfg_block *block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_cfg_block *clone; + struct mptcp_rbs_smt *smt = block->first_smt; + struct mptcp_rbs_smt *last_clone_smt = NULL; + + clone = kmalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_ATOMIC); + *clone = *block; + + clone->first_smt = NULL; + while (smt) { + struct mptcp_rbs_smt *clone_smt = + mptcp_rbs_smt_clone(smt, user_ctx, user_func); + smt = smt->next; + + if (last_clone_smt) + last_clone_smt->next = clone_smt; + else + clone->first_smt = clone_smt; + last_clone_smt = clone_smt; + } + + clone->next = NULL; + clone->next_else = NULL; + if (clone->condition) + clone->condition = + (struct mptcp_rbs_value_bool *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->condition, user_ctx, + user_func); + + return clone; +} + +static struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_blocks_clone_helper( + const struct mptcp_rbs_cfg_block *block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *clone; + + /* Check if the block was already cloned */ + FOREACH_BLOCK(list, clone, if (block == clone->tag) return clone); + + clone = mptcp_rbs_cfg_block_clone(block, user_ctx, user_func); + clone->tag = (struct mptcp_rbs_cfg_block *) block; + ADD_BLOCK(list, clone); + + if (block->next) + clone->next = mptcp_rbs_cfg_blocks_clone_helper( + block->next, user_ctx, user_func, list); + if (block->next_else) + clone->next_else = mptcp_rbs_cfg_blocks_clone_helper( + block->next_else, user_ctx, user_func, list); + + return clone; +} + +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_blocks_clone( + const struct mptcp_rbs_cfg_block *first_block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + /* The list contains the new created blocks with their tags set to the + * old ones. This is important to allow concurrent copying + */ + struct mptcp_rbs_cfg_block *clone; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_cfg_block_list list; + + INIT_BLOCK_LIST(&list); + clone = mptcp_rbs_cfg_blocks_clone_helper(first_block, user_ctx, + user_func, &list); + FOREACH_BLOCK(&list, block, block->tag = NULL); + FREE_BLOCK_LIST(&list); + return clone; +} + +static int print_null_block(char **buffer, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block; + + /* Check if the block was already printed */ + FOREACH_BLOCK(list, block, if (block == NULL) return 0); + ADD_BLOCK(list, NULL); + + return sprintf_null(buffer, "%p:\n RETURN;\n\n", NULL); +} + +static int mptcp_rbs_cfg_block_print(const struct mptcp_rbs_cfg_block *block, + char **buffer, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + int len; + int tmp_len; + const struct mptcp_rbs_smt *smt = block->first_smt; + + /* Check if the block was already printed */ + FOREACH_BLOCK(list, block2, if (block == block2) return 0); + ADD_BLOCK(list, (struct mptcp_rbs_cfg_block *) block); + + len = sprintf_null(buffer, "%p:\n", block); + + while (smt) { + len += sprintf_null(buffer, " "); + tmp_len = mptcp_rbs_smt_print(smt, *buffer); + len += tmp_len; + if (buffer && *buffer) + *buffer += tmp_len; + len += sprintf_null(buffer, "\n"); + smt = smt->next; + } + + if (block->condition) { + len += sprintf_null(buffer, " IF "); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) block->condition, *buffer); + len += tmp_len; + if (buffer && *buffer) + *buffer += tmp_len; + len += sprintf_null(buffer, " GOTO %p ELSE GOTO %p;\n\n", + block->next, block->next_else); + + if (block->next) + len += mptcp_rbs_cfg_block_print(block->next, buffer, + list); + else + len += print_null_block(buffer, list); + + if (block->next_else) + len += mptcp_rbs_cfg_block_print(block->next_else, + buffer, list); + else + len += print_null_block(buffer, list); + } else if (block->next) { + len += sprintf_null(buffer, " GOTO %p;\n\n", block->next); + len += mptcp_rbs_cfg_block_print(block->next, buffer, list); + } else + len += sprintf_null(buffer, " RETURN;\n\n"); + + return len; +} + +int mptcp_rbs_cfg_blocks_print(const struct mptcp_rbs_cfg_block *first_block, + char *buffer) +{ + struct mptcp_rbs_cfg_block_list list; + int len; + + INIT_BLOCK_LIST(&list); + len = mptcp_rbs_cfg_block_print(first_block, &buffer, &list); + FREE_BLOCK_LIST(&list); + return len; +} diff --git a/net/mptcp/mptcp_rbs_cfg.h b/net/mptcp/mptcp_rbs_cfg.h new file mode 100644 index 0000000000000..6548686f2f146 --- /dev/null +++ b/net/mptcp/mptcp_rbs_cfg.h @@ -0,0 +1,109 @@ +#ifndef _MPTCP_RBS_CFG_H +#define _MPTCP_RBS_CFG_H + +#include "mptcp_rbs_dynarray.h" + +/* + * Struct for a single block inside the control flow graph. + * Blocks are singly linked to save memory and remove the necessity to handle + * lists of multiple previous blocks. Back pointers would only be useful for + * optimizations at creation time but useless for the rest of the time. + */ +struct mptcp_rbs_cfg_block { + /* The next block or NULL if the execution ends after this block */ + struct mptcp_rbs_cfg_block *next; + /* + * The alternative next block if the block ends with an if instruction + * or NULL + */ + struct mptcp_rbs_cfg_block *next_else; + /* Condition if the block ends with an if instruction or NULL */ + struct mptcp_rbs_value_bool *condition; + /* + * This field can be used for any purpose i.e. to store information + * during optimization + */ + void *tag; + /* First statement in the block */ + struct mptcp_rbs_smt *first_smt; +}; + +/* + * Block lists + */ + +DECL_DA(mptcp_rbs_cfg_block_list, struct mptcp_rbs_cfg_block *); + +#define INIT_BLOCK_LIST(list) INIT_DA(list) + +#define FREE_BLOCK_LIST(list) FREE_DA(list) + +#define ADD_BLOCK(list, block) ADD_DA_ITEM(list, block) + +#define FOREACH_BLOCK(list, var, cmds) FOREACH_DA_ITEM(list, var, cmds) + +/* + * Traverses over all blocks and adds them to the list + */ +void mptcp_rbs_cfg_block_traverse(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list); + +/* + * Appends statements at the end of the block + */ +void mptcp_rbs_cfg_block_append(struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_smt *first_smt); + +/* + * Releases the passed control flow graph block and all its statements + */ +void mptcp_rbs_cfg_block_free(struct mptcp_rbs_cfg_block *block); + +/* + * Releases the passed control flow graph block and all its successors + */ +void mptcp_rbs_cfg_blocks_free(struct mptcp_rbs_cfg_block *first_block); + +#ifndef MPTCP_RBS_CLONE_USER_FUNC_DEFINED +#define MPTCP_RBS_CLONE_USER_FUNC_DEFINED +typedef struct mptcp_rbs_value *(*mptcp_rbs_value_clone_user_func)( + void *user_ctx, const struct mptcp_rbs_value *value); +#endif + +/* + * Creates a copy of a block and all its statements + * @block: The block to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_block_clone( + const struct mptcp_rbs_cfg_block *block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Creates a copy of a block, all its statements and all following blocks + * @first_block: The first block to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_cfg_block *mptcp_rbs_cfg_blocks_clone( + const struct mptcp_rbs_cfg_block *first_block, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Writes a string representation of a control flow graph block and all its + * successors to the given buffer + * @first_block: Pointer to the CFG block + * @buffer: Pointer to the buffer where the string should be stored or NULL + * Return: Number of written characters + */ +int mptcp_rbs_cfg_blocks_print(const struct mptcp_rbs_cfg_block *first_block, + char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_ctx.c b/net/mptcp/mptcp_rbs_ctx.c new file mode 100644 index 0000000000000..df00bf4d7cd2b --- /dev/null +++ b/net/mptcp/mptcp_rbs_ctx.c @@ -0,0 +1,13 @@ +#include "mptcp_rbs_ctx.h" + +bool mptcp_rbs_reg_value_set(struct tcp_sock *meta_tp, + struct mptcp_rbs_reg_value *value) +{ + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if (value->reg_num >= MPTCP_RBS_REG_COUNT) + return false; + + rbs_cb->regs[value->reg_num] = value->value; + return true; +} diff --git a/net/mptcp/mptcp_rbs_ctx.h b/net/mptcp/mptcp_rbs_ctx.h new file mode 100644 index 0000000000000..f4d5d249114d0 --- /dev/null +++ b/net/mptcp/mptcp_rbs_ctx.h @@ -0,0 +1,86 @@ +#ifndef _MPTCP_RBS_CTX_H +#define _MPTCP_RBS_CTX_H + +#include "mptcp_rbs_action.h" +#include "mptcp_rbs_var.h" +#include + +#define MPTCP_RBS_REG_COUNT 6 + +/* Central control block information per meta sock */ +struct mptcp_rbs_cb { + struct mptcp_rbs_scheduler *scheduler; + struct mptcp_rbs_scheduler_variation *variation; + struct mptcp_rbs_actions *open_actions; + unsigned int regs[MPTCP_RBS_REG_COUNT]; + struct sk_buff *queue_position; + u8 skb_prop; + u32 last_number_of_subflows; + u32 calls_since_sbf_change; + /* Execution counter for FOREACH loops. This count is used to detect if + * a loop is entered inside *_NEXT values + */ + u32 exec_count; + u32 highest_seq; + u32 execution_bucket; /* foreach pop and drop, it is increased by 5, foreach execution it is decreased by 1, if no bucket is left, switch to default scheduler! */ +}; + +/* Central control block information per subflow */ +struct mptcp_rbs_sbf_cb { + /* average bw sent */ + u64 bw_out_last_update_ns; + u64 bw_out_bytes; + + /* average bw acknowledged */ + u64 bw_ack_last_update_ns; + u64 bw_ack_bytes; + + /* Delay measurement values */ + u32 delay_in; + u32 delay_out; + + s64 user; + + /* total size = 8 * 6 bytes = 48 bytes */ +}; + +struct mptcp_rbs_eval_ctx { + struct sock *meta_sk; + struct mptcp_cb *mpcb; + struct mptcp_rbs_cb *rbs_cb; + struct mptcp_rbs_var vars[MPTCP_RBS_MAX_VAR_COUNT]; + /* maybe we will have an int with flags in the future */ + bool side_effects; +}; + +/* Struct that is used to set register values */ +struct mptcp_rbs_reg_value { + unsigned int reg_num; + unsigned int value; +}; + +static inline bool mptcp_rbs_is_sched_used(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + + return mpcb && mpcb->sched_ops && + !strncmp(mpcb->sched_ops->name, "rbs", 3); +} + +static inline struct mptcp_rbs_cb *mptcp_rbs_get_cb(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + + return (struct mptcp_rbs_cb *) &mpcb->mptcp_sched[0]; +} + +static inline struct mptcp_rbs_sbf_cb *mptcp_rbs_get_sbf_cb( + struct tcp_sock *sbf) +{ + return (struct mptcp_rbs_sbf_cb *) &sbf->mptcp->mptcp_sched[0]; +} + +bool mptcp_rbs_reg_value_set(struct tcp_sock *meta_tp, + struct mptcp_rbs_reg_value *value); + +#endif diff --git a/net/mptcp/mptcp_rbs_dynarray.h b/net/mptcp/mptcp_rbs_dynarray.h new file mode 100644 index 0000000000000..ec37aee409d8d --- /dev/null +++ b/net/mptcp/mptcp_rbs_dynarray.h @@ -0,0 +1,94 @@ +#ifndef _MPTCP_RBS_DYNARRAY_H +#define _MPTCP_RBS_DYNARRAY_H + +#include +#include + +#define DECL_DA(name, item_type) \ + struct name { \ + item_type *items; \ + int len; \ + int capacity; \ + } + +#define INIT_DA(array) \ + do { \ + (array)->items = NULL; \ + (array)->len = 0; \ + (array)->capacity = 0; \ + } while (0) + +#define FREE_DA(array) kfree((array)->items) + +#define ADD_DA_ITEM(array, item) INSERT_DA_ITEM(array, (array)->len, item) + +#define ADD_DA_ITEM_EX(array, item, grow_func) \ + INSERT_DA_ITEM_EX(array, (array)->len, item, grow_func) + +#define INSERT_DA_ITEM(array, index, item) \ + INSERT_DA_ITEM_EX(array, index, item, \ + !(array)->capacity ? 8 : (array)->capacity << 1) + +#define INSERT_DA_ITEM_EX(array, index, item, grow_func) \ + do { \ + if ((array)->len == (array)->capacity) { \ + (array)->capacity = grow_func; \ + (array)->items = krealloc((array)->items, \ + sizeof((array)->items[0]) * \ + (array)->capacity, \ + GFP_KERNEL); \ + } \ + \ + if ((index) != (array)->len) \ + memmove(&(array)[index + 1], &(array)[index], \ + ((array)->len - (index)) * \ + sizeof((array)->items[0])); \ + (array)->items[index] = item; \ + ++(array)->len; \ + } while (0) + +#define DELETE_DA_ITEM(array, index) \ + do { \ + BUG_ON((index) < 0); \ + BUG_ON((index) >= (array)->len); \ + \ + if ((index) < (array)->len - 1) \ + memmove((array)->items[index], \ + (array)->items[(index) + 1], \ + ((array)->len - 1 - (index)) * \ + sizeof((array)->items[0])); \ + \ + --(array)->len; \ + } while (0) + +#define GET_DA_LEN(array) (array)->len + +#define GET_DA_ITEM(array, index) \ + ({ \ + BUG_ON(index < 0 || index >= (array)->len); \ + (array)->items[index]; \ + }) + +#define FOREACH_DA_ITEM(array, var, cmds) \ + do { \ + typeof(var) *__item = (array)->items; \ + typeof(var) *__end = (array)->items + (array)->len; \ + while (__item != __end) { \ + var = *__item; \ + ++__item; \ + cmds; \ + } \ + } while (0) + +#define FOREACH_DA_ITEM_REV(array, var, cmds) \ + do { \ + typeof(var) *__item = (array)->items + (array)->len - 1; \ + typeof(var) *__end = (array)->items - 1; \ + while (__item != __end) { \ + var = *__item; \ + --__item; \ + cmds; \ + } \ + } while (0) + +#endif diff --git a/net/mptcp/mptcp_rbs_exec.c b/net/mptcp/mptcp_rbs_exec.c new file mode 100644 index 0000000000000..01c57b003b79c --- /dev/null +++ b/net/mptcp/mptcp_rbs_exec.c @@ -0,0 +1,50 @@ +#include "mptcp_rbs_exec.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" + +void mptcp_rbs_exec(struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_scheduler_variation *variation = + ctx->rbs_cb->variation; + struct mptcp_rbs_cfg_block *block = variation->first_block; + int i; + +#ifdef CONFIG_MPTCP_RBSMEASURE + u64 time = __native_read_tsc(); +#endif + + while (block) { + struct mptcp_rbs_smt *smt = block->first_smt; + struct mptcp_rbs_value_bool *cond = block->condition; + + while (smt) { + smt->execute(smt, ctx); + smt = smt->next; + } + + if (cond) { + s32 b = cond->execute(cond, ctx); + if (b <= 0) { + /* + * Else should be executed if the condition + * evaluates to false or to null + */ + block = block->next_else; + continue; + } + } + + block = block->next; + } + +#ifdef CONFIG_MPTCP_RBSMEASURE + variation->exec_count += 1; + variation->total_time += __native_read_tsc() - time; +#endif + + /* Release allocated variables */ + for (i = 0; i < variation->used_vars; ++i) { + mptcp_rbs_var_free(&ctx->vars[i]); + } +} diff --git a/net/mptcp/mptcp_rbs_exec.h b/net/mptcp/mptcp_rbs_exec.h new file mode 100644 index 0000000000000..e13749bac270d --- /dev/null +++ b/net/mptcp/mptcp_rbs_exec.h @@ -0,0 +1,11 @@ +#ifndef _MPTCP_RBS_EXEC_H +#define _MPTCP_RBS_EXEC_H + +#include "mptcp_rbs_ctx.h" + +/* + * Executes the CFG in the given context + */ +void mptcp_rbs_exec(struct mptcp_rbs_eval_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_lexer.c b/net/mptcp/mptcp_rbs_lexer.c new file mode 100644 index 0000000000000..8fd8991c65674 --- /dev/null +++ b/net/mptcp/mptcp_rbs_lexer.c @@ -0,0 +1,712 @@ +#include "mptcp_rbs_lexer.h" +#include +#include + +static bool inline is_whitespace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +static bool inline is_linebreak(char c) +{ + return c == '\n'; +} + +static bool inline is_number(char c) +{ + return c >= '0' && c <= '9'; +} + +static bool inline is_char(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static char last_error[64]; + +bool mptcp_rbs_get_next_token(char const **str, int *position, + int *line, int *line_position, struct mptcp_rbs_token *token) +{ + char c; + + /* Jump over whitespaces */ + while (true) { + c = **str; + + if (c == 0) { + /* End of data found */ + token->kind = TOKEN_KIND_EOD; + token->position = *position; + return true; + } else if (!is_whitespace(c)) + break; + + ++*str; + ++*position; + ++*line_position; + + if (is_linebreak(c)) { + ++*line; + *line_position = 0; + } + } + + /* Store the position of this token and increase it for future calls */ + token->position = *position; + token->line = *line; + token->line_position = *line_position; + ++*str; + ++*position; + ++*line_position; + + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + /* Must be a number literal */ + int value = c - '0'; + + while (true) { + c = **str; + + if (!is_number(c)) + break; + + value = value * 10 + c - '0'; + ++*str; + ++*position; + ++*line_position; + } + + token->kind = TOKEN_KIND_NUMBER; + token->number = value; + return true; + } + case '"': { + /* Must be a string literal */ + const char *start = *str; + int len; + + while (true) { + c = **str; + + if (c == 0 || c == '\n' || c == '\r') { + /* String was not terminated */ + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: string exceeds line", + *position); + return false; + } + + ++*str; + ++*position; + ++*line_position; + + if (c == '"') + break; + } + + len = *str - start - 1; + + /* Check if string is too long */ + if (len >= TOKEN_BUFFER_LEN) { + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: string is too long", + *position); + return false; + } + + token->kind = TOKEN_KIND_STRING; + memset(token->string, 0, TOKEN_BUFFER_LEN); + memcpy(token->string, start, len); + replace_escape_chars(token->string); + return true; + } + case '.': { + /* Must be . */ + token->kind = TOKEN_KIND_DOT; + return true; + } + case ',': { + /* Must be , */ + token->kind = TOKEN_KIND_COMMA; + return true; + } + case ';': { + /* Must be ; */ + token->kind = TOKEN_KIND_SEMICOLON; + return true; + } + case '!': { + /* Must be ! or != */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_UNEQUAL; + return true; + } + + token->kind = TOKEN_KIND_NOT; + return true; + } + case '(': { + /* Must be ( */ + token->kind = TOKEN_KIND_OPEN_BRACKET; + return true; + } + case ')': { + /* Must be ) */ + token->kind = TOKEN_KIND_CLOSE_BRACKET; + return true; + } + case '{': { + /* Must be { */ + token->kind = TOKEN_KIND_OPEN_CURLY; + return true; + } + case '}': { + /* Must be } */ + token->kind = TOKEN_KIND_CLOSE_CURLY; + return true; + } + case '=': { + /* Must be = or == */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_EQUAL; + return true; + } + + token->kind = TOKEN_KIND_ASSIGN; + return true; + } + case '<': { + /* Must be < or <= */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_LESS_EQUAL; + return true; + } + + token->kind = TOKEN_KIND_LESS; + return true; + } + case '>': { + /* Must be > or >= */ + if (**str == '=') { + ++*str; + ++*position; + ++*line_position; + + token->kind = TOKEN_KIND_GREATER_EQUAL; + return true; + } + + token->kind = TOKEN_KIND_GREATER; + return true; + } + case '+': { + /* Must be + */ + token->kind = TOKEN_KIND_ADD; + return true; + } + case '-': { + /* Must be - */ + token->kind = TOKEN_KIND_SUB; + return true; + } + case '*': { + /* Must be * */ + token->kind = TOKEN_KIND_MUL; + return true; + } + case '/': { + /* Might be a comment or / */ + if (**str == '*') { + int start_position = *position; + ++*str; + ++*position; + ++*line_position; + + while (true) { + c = **str; + ++*str; + ++*position; + ++*line_position; + + if (c == 0) { + /* End of comment is missing */ + memset(last_error, 0, + sizeof(last_error)); + sprintf(last_error, + "%d: Comment is not closed", + start_position); + return false; + } else if (is_linebreak(c)) { + ++*line; + *line_position = 0; + } else if (c == '*' && **str == '/') + break; + } + + ++*str; + ++*position; + ++*line_position; + return mptcp_rbs_get_next_token(str, position, line, line_position, token); + } + + token->kind = TOKEN_KIND_DIV; + return true; + } + case '%': { + /* Must be % */ + token->kind = TOKEN_KIND_REM; + return true; + } + default: { + const char *start = *str - 1; + int len; + + if (!is_char(c)) { + /* Illegal character found */ + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: illegal character %c", + *position, c); + return false; + } + + /* Must be keyword or identifier */ + while (true) { + c = **str; + + if (!is_char(c) && !is_number(c)) + break; + + ++*str; + ++*position; + } + + len = *str - start; + if (len == 2) { + if (!strncmp(start, "IF", len)) { + token->kind = TOKEN_KIND_IF; + return true; + } + if (!strncmp(start, "IN", len)) { + token->kind = TOKEN_KIND_IN; + return true; + } + if (!strncmp(start, "OR", len)) { + token->kind = TOKEN_KIND_OR; + return true; + } + } else if (len == 3) { + if (!strncmp(start, "AND", len)) { + token->kind = TOKEN_KIND_AND; + return true; + } + if (!strncmp(start, "SET", len)) { + token->kind = TOKEN_KIND_SET; + return true; + } + if (!strncmp(start, "VAR", len)) { + token->kind = TOKEN_KIND_VAR; + return true; + } + } else if (len == 4) { + if (!strncmp(start, "DROP", len)) { + token->kind = TOKEN_KIND_DROP; + return true; + } + if (!strncmp(start, "ELSE", len)) { + token->kind = TOKEN_KIND_ELSE; + return true; + } + if (!strncmp(start, "NULL", len)) { + token->kind = TOKEN_KIND_NULL; + return true; + } + if (!strncmp(start, "PUSH", len)) { + token->kind = TOKEN_KIND_PUSH; + return true; + } + if (!strncmp(start, "VOID", len)) { + token->kind = TOKEN_KIND_VOID; + return true; + } + } else if (len == 5) { + if (!strncmp(start, "PRINT", len)) { + token->kind = TOKEN_KIND_PRINT; + return true; + } + } else if (len == 6) { + if (!strncmp(start, "RETURN", len)) { + token->kind = TOKEN_KIND_RETURN; + return true; + } + } else if (len == 7) { + if (!strncmp(start, "FOREACH", len)) { + token->kind = TOKEN_KIND_FOREACH; + return true; + } + } else if (len == 8) { + if (!strncmp(start, "SET_USER", len)) { + token->kind = TOKEN_KIND_SET_USER; + return true; + } + } else if (len == 9) { + if (!strncmp(start, "SCHEDULER", len)) { + token->kind = TOKEN_KIND_SCHEDULER; + return true; + } + } else if (len >= TOKEN_BUFFER_LEN) { + /* Identifier is too long */ + memset(last_error, 0, sizeof(last_error)); + sprintf(last_error, "%d: identifier is too long", + *position); + return false; + } + + /* Must be identifier */ + token->kind = TOKEN_KIND_IDENT; + memset(token->string, 0, TOKEN_BUFFER_LEN); + memcpy(token->string, start, len); + return true; + } + } +} + +bool mptcp_rbs_get_next_token_lookahead(const char *str, int position, + int line, int line_position, struct mptcp_rbs_token *token) +{ + return mptcp_rbs_get_next_token(&str, &position, &line, &line_position, token); +} + +const char *mptcp_rbs_get_last_error(void) +{ + return last_error; +} + +void mptcp_rbs_token_kind_to_string(enum mptcp_rbs_token_kind kind, + char *buffer) +{ + switch (kind) { + case TOKEN_KIND_EOD: { + strcpy(buffer, "end of data"); + break; + } + case TOKEN_KIND_NUMBER: { + strcpy(buffer, "number"); + break; + } + case TOKEN_KIND_STRING: { + strcpy(buffer, "string"); + break; + } + case TOKEN_KIND_IDENT: { + strcpy(buffer, "identifier"); + break; + } + case TOKEN_KIND_NOT: { + strcpy(buffer, "!"); + break; + } + case TOKEN_KIND_ASSIGN: { + strcpy(buffer, "="); + break; + } + case TOKEN_KIND_EQUAL: { + strcpy(buffer, "=="); + break; + } + case TOKEN_KIND_UNEQUAL: { + strcpy(buffer, "!="); + break; + } + case TOKEN_KIND_LESS: { + strcpy(buffer, "<"); + break; + } + case TOKEN_KIND_LESS_EQUAL: { + strcpy(buffer, "<="); + break; + } + case TOKEN_KIND_GREATER: { + strcpy(buffer, ">"); + break; + } + case TOKEN_KIND_GREATER_EQUAL: { + strcpy(buffer, ">="); + break; + } + case TOKEN_KIND_ADD: { + strcpy(buffer, "+"); + break; + } + case TOKEN_KIND_SUB: { + strcpy(buffer, "-"); + break; + } + case TOKEN_KIND_MUL: { + strcpy(buffer, "*"); + break; + } + case TOKEN_KIND_DIV: { + strcpy(buffer, "/"); + break; + } + case TOKEN_KIND_REM: { + strcpy(buffer, "%"); + break; + } + case TOKEN_KIND_DOT: { + strcpy(buffer, "."); + break; + } + case TOKEN_KIND_COMMA: { + strcpy(buffer, ","); + break; + } + case TOKEN_KIND_SEMICOLON: { + strcpy(buffer, ";"); + break; + } + case TOKEN_KIND_OPEN_BRACKET: { + strcpy(buffer, "("); + break; + } + case TOKEN_KIND_CLOSE_BRACKET: { + strcpy(buffer, ")"); + break; + } + case TOKEN_KIND_OPEN_CURLY: { + strcpy(buffer, "{"); + break; + } + case TOKEN_KIND_CLOSE_CURLY: { + strcpy(buffer, "}"); + break; + } + case TOKEN_KIND_AND: { + strcpy(buffer, "AND"); + break; + } + case TOKEN_KIND_DROP: { + strcpy(buffer, "DROP"); + break; + } + case TOKEN_KIND_ELSE: { + strcpy(buffer, "ELSE"); + break; + } + case TOKEN_KIND_FOREACH: { + strcpy(buffer, "FOREACH"); + break; + } + case TOKEN_KIND_IF: { + strcpy(buffer, "IF"); + break; + } + case TOKEN_KIND_IN: { + strcpy(buffer, "IN"); + break; + } + case TOKEN_KIND_NULL: { + strcpy(buffer, "NULL"); + break; + } + case TOKEN_KIND_OR: { + strcpy(buffer, "OR"); + break; + } + case TOKEN_KIND_PRINT: { + strcpy(buffer, "PRINT"); + break; + } + case TOKEN_KIND_PUSH: { + strcpy(buffer, "PUSH"); + break; + } + case TOKEN_KIND_SET_USER: { + strcpy(buffer, "SET_USER"); + break; + } + case TOKEN_KIND_RETURN: { + strcpy(buffer, "RETURN"); + break; + } + case TOKEN_KIND_SCHEDULER: { + strcpy(buffer, "SCHEDULER"); + break; + } + case TOKEN_KIND_SET: { + strcpy(buffer, "SET"); + break; + } + case TOKEN_KIND_VAR: { + strcpy(buffer, "VAR"); + break; + } + case TOKEN_KIND_VOID: { + strcpy(buffer, "VOID"); + break; + } + } +} + +void mptcp_rbs_token_to_string(const struct mptcp_rbs_token *token, + char *buffer) +{ + switch (token->kind) { + case TOKEN_KIND_NUMBER: { + sprintf(buffer, "%d", token->number); + break; + } + case TOKEN_KIND_STRING: { + sprintf(buffer, "\"%s\"", token->string); + break; + } + case TOKEN_KIND_IDENT: { + sprintf(buffer, "%s", token->string); + break; + } + default: { + mptcp_rbs_token_kind_to_string(token->kind, buffer); + break; + } + } +} + +void replace_escape_chars(char *buffer) +{ + char *pos = buffer; + int remaining = strlen(buffer); + + while (remaining) { + char c = *pos; + ++pos; + --remaining; + + if (c == '\\' && remaining) { + bool is_escape = false; + + switch (*pos) { + case '\\': { + is_escape = true; + break; + } + case '\"': { + *(pos - 1) = '\"'; + is_escape = true; + break; + } + case 'n': { + *(pos - 1) = '\n'; + is_escape = true; + break; + } + case 'r': { + *(pos - 1) = '\r'; + is_escape = true; + break; + } + case 't': { + *(pos - 1) = '\t'; + is_escape = true; + break; + } + } + + if (is_escape) { + --remaining; + + if (remaining) + memcpy(pos, pos + 1, remaining); + *(pos + remaining) = 0; + } + } + } +} + +int replace_with_escape_chars(char *buffer, bool write) +{ + char *pos = buffer; + int len = strlen(buffer); + int remaining = len; + + while (remaining) { + char c = *pos; + ++pos; + --remaining; + + switch (c) { + case '\\': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = '\\'; + ++pos; + } + ++len; + break; + } + case '\"': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = '\"'; + ++pos; + } + ++len; + break; + } + case '\n': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = 'n'; + ++pos; + } + ++len; + break; + } + case '\r': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = 'r'; + ++pos; + } + ++len; + break; + } + case '\t': { + if (write) { + memmove(pos + 1, pos, remaining + 1); + *(pos - 1) = '\\'; + *pos = 't'; + ++pos; + } + ++len; + break; + } + } + } + + return len; +} diff --git a/net/mptcp/mptcp_rbs_lexer.h b/net/mptcp/mptcp_rbs_lexer.h new file mode 100644 index 0000000000000..a9d23e2192a46 --- /dev/null +++ b/net/mptcp/mptcp_rbs_lexer.h @@ -0,0 +1,109 @@ +#ifndef _MPTCP_RBS_LEXER_H +#define _MPTCP_RBS_LEXER_H + +#include + +#define TOKEN_BUFFER_LEN 64 + +/* Enumeration of possible token kinds */ +enum mptcp_rbs_token_kind { + /* End of data */ + TOKEN_KIND_EOD, + /* Number literal */ + TOKEN_KIND_NUMBER, + /* String literal */ + TOKEN_KIND_STRING, + /* Identifier */ + TOKEN_KIND_IDENT, + + TOKEN_KIND_NOT, + TOKEN_KIND_ASSIGN, + TOKEN_KIND_EQUAL, + TOKEN_KIND_UNEQUAL, + TOKEN_KIND_LESS, + TOKEN_KIND_LESS_EQUAL, + TOKEN_KIND_GREATER, + TOKEN_KIND_GREATER_EQUAL, + TOKEN_KIND_ADD, + TOKEN_KIND_SUB, + TOKEN_KIND_MUL, + TOKEN_KIND_DIV, + TOKEN_KIND_REM, + TOKEN_KIND_DOT, + TOKEN_KIND_COMMA, + TOKEN_KIND_SEMICOLON, + TOKEN_KIND_OPEN_BRACKET, + TOKEN_KIND_CLOSE_BRACKET, + TOKEN_KIND_OPEN_CURLY, + TOKEN_KIND_CLOSE_CURLY, + TOKEN_KIND_AND, + TOKEN_KIND_DROP, + TOKEN_KIND_ELSE, + TOKEN_KIND_FOREACH, + TOKEN_KIND_IF, + TOKEN_KIND_IN, + TOKEN_KIND_NULL, + TOKEN_KIND_OR, + TOKEN_KIND_PRINT, + TOKEN_KIND_PUSH, + TOKEN_KIND_SET_USER, + TOKEN_KIND_RETURN, + TOKEN_KIND_SCHEDULER, + TOKEN_KIND_SET, + TOKEN_KIND_VAR, + TOKEN_KIND_VOID +}; + +/* Struct for a single token */ +struct mptcp_rbs_token { + enum mptcp_rbs_token_kind kind; + int position; + int line; + int line_position; + union { + unsigned int number; + char string[TOKEN_BUFFER_LEN]; + }; +}; + +/* + * Returns the next token in a string + * @str: Pointer to the string + * @position: Pointer to the actual position in the string + * @token: Pointer to the token which should be filled + * @return: false if an error occurred. In this case call + * mptcp_rbs_get_last_error to get the error message + */ +bool mptcp_rbs_get_next_token(char const **str, int *position, + int *line, int *line_position, struct mptcp_rbs_token *token); + +/* + * Like mptcp_rbs_get_next_token but does not move to the next token + */ +bool mptcp_rbs_get_next_token_lookahead(const char *str, int position, + int line, int line_position, struct mptcp_rbs_token *token); + +/* + * Returns the last error message of the lexer + */ +const char *mptcp_rbs_get_last_error(void); + +void mptcp_rbs_token_kind_to_string(enum mptcp_rbs_token_kind kind, + char *buffer); + +void mptcp_rbs_token_to_string(const struct mptcp_rbs_token *token, + char *buffer); + +/* + * Replaces escape characters like \n with the actual characters inplace + */ +void replace_escape_chars(char *buffer); + +/* + * Replaces invisible characters like newline with escape characters inplace. + * If write is false the function only computes the length the buffer must have + * and returns it + */ +int replace_with_escape_chars(char *buffer, bool write); + +#endif diff --git a/net/mptcp/mptcp_rbs_multiplatform.h b/net/mptcp/mptcp_rbs_multiplatform.h new file mode 100644 index 0000000000000..f724005ce968d --- /dev/null +++ b/net/mptcp/mptcp_rbs_multiplatform.h @@ -0,0 +1,18 @@ +#ifndef MULTIPLATFORM_H +#define MULTIPLATFORM_H + +#define RBS_STATS + +#ifdef RBS_STATS + #define RBS_DO_STAT(stat, value) rbs_stats_update(stat, value); +#else +#define RBS_DO_STAT(stat, value) // nothing to do +#endif + +#define ALLOC(struct_name) kzalloc(sizeof(struct struct_name), GFP_ATOMIC); +#define FREE(instance) kfree(instance); + +#include +#include + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer.c b/net/mptcp/mptcp_rbs_optimizer.c new file mode 100644 index 0000000000000..10c14ca3ce26e --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer.c @@ -0,0 +1,84 @@ +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer_bm.h" +#include "mptcp_rbs_optimizer_cf.h" +#include "mptcp_rbs_optimizer_cve.h" +#include "mptcp_rbs_optimizer_dce.h" +#include "mptcp_rbs_optimizer_ebpf.h" +#include "mptcp_rbs_optimizer_lu.h" +#include "mptcp_rbs_optimizer_vi.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_value.h" +#include +#include +#include +#include + +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_find_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_opt_value_info *info = ctx->value_infos; + + while (info) { + if (info->value == value) + return info; + + info = info->next; + } + + return NULL; +} + +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_get_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_opt_value_info *info = + mptcp_rbs_opt_find_value_info(ctx, value); + + if (!info) { + info = kzalloc(sizeof(struct mptcp_rbs_opt_value_info), + GFP_KERNEL); + info->next = ctx->value_infos; + info->value = value; + ctx->value_infos = info; + } + + return info; +} + +typedef void (*optimization_func)(struct mptcp_rbs_opt_ctx *); + +/** Array with optimizations that are executed in order */ +static const optimization_func pipeline[] = { + /* mptcp_rbs_opt_lu, */ mptcp_rbs_opt_cve, mptcp_rbs_opt_vi, + mptcp_rbs_opt_dce, mptcp_rbs_opt_cf, mptcp_rbs_opt_bm +}; + +void mptcp_rbs_optimize(struct mptcp_rbs_scheduler_variation *variation, + bool *terminate, int sbf_num, bool ebpf) +{ + int i; + struct mptcp_rbs_opt_ctx ctx; + + /* Fill the context */ + memset(&ctx, 0, sizeof(struct mptcp_rbs_opt_ctx)); + ctx.variation = variation; + ctx.variation->sbf_num = sbf_num; + + /* Apply optimizations in pipeline */ + for (i = 0; i < ARRAY_SIZE(pipeline) && !*terminate; ++i) { + pipeline[i](&ctx); + } + + /* Release the context */ + while (ctx.value_infos) { + struct mptcp_rbs_opt_value_info *info = ctx.value_infos; + ctx.value_infos = ctx.value_infos->next; + kfree(info); + } + + if (!*terminate && sbf_num && ebpf) { + /* Generate eBPF code */ + mptcp_rbs_opt_ebpf(&ctx); + } +} diff --git a/net/mptcp/mptcp_rbs_optimizer.h b/net/mptcp/mptcp_rbs_optimizer.h new file mode 100644 index 0000000000000..8a8eb63260a18 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer.h @@ -0,0 +1,90 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_H +#define _MPTCP_RBS_OPTIMIZER_H + +#include "mptcp_rbs_var.h" + +struct mptcp_rbs_scheduler_variation; +struct mptcp_rbs_value; + +/** + * Information struct for values that contents are preserved over + * optimizations + */ +struct mptcp_rbs_opt_value_info { + struct mptcp_rbs_opt_value_info *next; + struct mptcp_rbs_value *value; + + /** Determines if the value is constant */ + bool is_const; + /** + * The evaluated value if the value is constant. + * -1 for NULL + * Booleans are encoded as 0 (false) and 1 (true) + * Integers are encoded normally + * Strings, subflows and sockbuffers cannot be stored + * The number of items can be stored for lists + */ + s64 const_value; +}; + +/** + * Information struct for variables that contents are preserved over + * optimizations + */ +struct mptcp_rbs_opt_var_info { + /** Declaration statement of the variable */ + struct mptcp_rbs_smt_var *smt; + /** Number of usages of the variable */ + int usage; +}; + +/** + * Context for optimization passes that contents are preserved over + * optimizations + */ +struct mptcp_rbs_opt_ctx { + struct mptcp_rbs_scheduler_variation *variation; + /** + * Singly linked list to "map" values to their information because they + * lack a tag field. For simplicity this is a list instead of a hash map + */ + struct mptcp_rbs_opt_value_info *value_infos; + /** + * Array that is used to connect variable indexes with their var + * statements and the number of usages of the variable + */ + struct mptcp_rbs_opt_var_info var_infos[MPTCP_RBS_MAX_VAR_COUNT]; +}; + +/** + * Searches for the corresponding value information + * @ctx: The optimization context + * @value: The value + * Return: The information of the value or NULL + */ +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_find_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value); + +/** + * Searches for the corresponding value information. If none was found a new + * information struct is created for this value + * @ctx: The optimization context + * @value: The value + * Return: The information of the value + */ +struct mptcp_rbs_opt_value_info *mptcp_rbs_opt_get_value_info( + struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_value *value); + +/** + * Optimizes the given scheduler variation inplace. If terminate is set during + * optimization this function terminates + * @variation: The scheduler variation that should be optimized + * @terminate: Pointer to a value that aborts the optimization process if set to + * true + * @sbf_num: Fixed number of subflows the optimizer should optimize for or 0 + * @ebpf: Determines if eBPF code should be generated if possible + */ +void mptcp_rbs_optimize(struct mptcp_rbs_scheduler_variation *variation, + bool *terminate, int sbf_num, bool ebpf); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_bm.c b/net/mptcp/mptcp_rbs_optimizer_bm.c new file mode 100644 index 0000000000000..18daa05cc9b89 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_bm.c @@ -0,0 +1,103 @@ +#include "mptcp_rbs_optimizer_bm.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +#define SET_INCOMING(block, val) (block)->tag = (void *) (val) +#define GET_INCOMING(block) ((size_t)(block)->tag) +#define INC_INCOMING(block) ++*((size_t *) &(block)->tag) +#define DEC_INCOMING(block) --*((size_t *) &(block)->tag) + +void mptcp_rbs_opt_bm(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_cfg_block *next_block; + bool modified; + + /* Fill tags of all blocks with number of incoming edges */ + INIT_BLOCK_LIST(&list); + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list); + FOREACH_BLOCK(&list, block, SET_INCOMING(block, 0)); + INC_INCOMING(ctx->variation->first_block); + FOREACH_BLOCK(&list, block, { + if (block->next) + INC_INCOMING(block->next); + if (block->next_else) + INC_INCOMING(block->next_else); + }); + + /* This one is a fix point algorithm */ + do { + modified = false; + + FOREACH_BLOCK(&list, block, { + if (GET_INCOMING(block) == 0) + continue; + + /* Remove condition if both paths point to the same + * block + */ + if (block->condition && + block->next == block->next_else) { + block->condition->free(block->condition); + block->condition = NULL; + block->next_else = NULL; + if (block->next) + DEC_INCOMING(block->next); + modified = true; + continue; + } + + if (block->next && GET_INCOMING(block->next) == 1) { + next_block = block->next; + + if (!block->condition) { + /* Merge blocks */ + mptcp_rbs_cfg_block_append( + block, next_block->first_smt); + block->condition = + next_block->condition; + block->next = next_block->next; + block->next_else = + next_block->next_else; + + next_block->first_smt = NULL; + next_block->condition = NULL; + next_block->next = NULL; + next_block->next_else = NULL; + DEC_INCOMING(next_block); + modified = true; + continue; + } else if (!next_block->first_smt && + !next_block->condition) { + /* Remove next block */ + block->next = next_block->next; + DEC_INCOMING(next_block); + modified = true; + continue; + } + } + + if (block->next_else && + GET_INCOMING(block->next_else) == 1) { + next_block = block->next_else; + + if (!next_block->first_smt && + !next_block->condition) { + /* Remove next else block */ + block->next_else = next_block->next; + DEC_INCOMING(next_block); + modified = true; + continue; + } + } + }); + } while (modified); + + FOREACH_BLOCK(&list, block, if (GET_INCOMING(block) == 0) + mptcp_rbs_cfg_block_free(block)); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_bm.h b/net/mptcp/mptcp_rbs_optimizer_bm.h new file mode 100644 index 0000000000000..26722d342f311 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_bm.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_BM_H +#define _MPTCP_RBS_OPTIMIZER_BM_H + +struct mptcp_rbs_opt_ctx; + +/** + * Block Merging: + * Merges and removes empty blocks + * @ctx: The optimization context + */ +void mptcp_rbs_opt_bm(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_cf.c b/net/mptcp/mptcp_rbs_optimizer_cf.c new file mode 100644 index 0000000000000..d5f462078a7a5 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cf.c @@ -0,0 +1,977 @@ +#include "mptcp_rbs_optimizer_cf.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +#define IS_NULL(info) ((info) && (info)->is_const && (info)->const_value == -1) + +/* Since we cannot directly represent true or false in + * the language we have to use a comparison + */ +#define TRUE \ + ((struct mptcp_rbs_value *) mptcp_rbs_value_equal_new( \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0), \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0))) + +#define FALSE \ + ((struct mptcp_rbs_value *) mptcp_rbs_value_unequal_new( \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0), \ + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new(0))) + +static void opt_value(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_value **value_ptr) +{ + struct mptcp_rbs_value *value = *value_ptr; + struct mptcp_rbs_opt_value_info *info; + + /* Note: We can ignore constant NULL values here because they are + * propagated up to the root and as result this function is not called + */ + + switch (value->kind) { + case VALUE_KIND_CONSTINT: + case VALUE_KIND_CONSTSTRING: + case VALUE_KIND_NULL: { + /* Cannot be optimized any further */ + return; + } + case VALUE_KIND_BOOL_VAR: + case VALUE_KIND_INT_VAR: + case VALUE_KIND_STRING_VAR: + case VALUE_KIND_SBF_VAR: + case VALUE_KIND_SBFLIST_VAR: + case VALUE_KIND_SKB_VAR: + case VALUE_KIND_SKBLIST_VAR: { + /* Variables are inlined in an extra pass */ + return; + } + case VALUE_KIND_NOT: { + struct mptcp_rbs_value_not *not_value = + (struct mptcp_rbs_value_not *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + not_value->free(not_value); + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) ¬_value->operand); + + return; + } + case VALUE_KIND_EQUAL: { + struct mptcp_rbs_value_equal *equal_value = + (struct mptcp_rbs_value_equal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + equal_value->free(equal_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) &equal_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) &equal_value + ->right_operand); + } + + return; + } + case VALUE_KIND_UNEQUAL: { + struct mptcp_rbs_value_unequal *unequal_value = + (struct mptcp_rbs_value_unequal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + unequal_value->free(unequal_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &unequal_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &unequal_value + ->right_operand); + } + + return; + } + case VALUE_KIND_LESS: { + struct mptcp_rbs_value_less *less_value = + (struct mptcp_rbs_value_less *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + less_value->free(less_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) &less_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) &less_value + ->right_operand); + } + + return; + } + case VALUE_KIND_LESS_EQUAL: { + struct mptcp_rbs_value_less_equal *less_equal_value = + (struct mptcp_rbs_value_less_equal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + less_equal_value->free(less_equal_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &less_equal_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &less_equal_value + ->right_operand); + } + + return; + } + case VALUE_KIND_GREATER: { + struct mptcp_rbs_value_greater *greater_value = + (struct mptcp_rbs_value_greater *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + greater_value->free(greater_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &greater_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &greater_value + ->right_operand); + } + + return; + } + case VALUE_KIND_GREATER_EQUAL: { + struct mptcp_rbs_value_greater_equal *greater_equal_value = + (struct mptcp_rbs_value_greater_equal *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + greater_equal_value->free(greater_equal_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value * + *) &greater_equal_value->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value * + *) &greater_equal_value->right_operand); + } + + return; + } + case VALUE_KIND_AND: { + struct mptcp_rbs_value_and *and_value = + (struct mptcp_rbs_value_and *) value; + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + and_value->free(and_value); + return; + } + + left_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) and_value->left_operand); + right_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) and_value->right_operand); + + if (left_info && left_info->is_const && + left_info->const_value == 1) { + struct mptcp_rbs_value_bool *right_operand = + and_value->right_operand; + + and_value->right_operand = NULL; + and_value->free(and_value); + + *value_ptr = (struct mptcp_rbs_value *) right_operand; + } else if (right_info && right_info->is_const && + right_info->const_value == 1) { + struct mptcp_rbs_value_bool *left_operand = + and_value->left_operand; + + and_value->left_operand = NULL; + and_value->free(and_value); + + *value_ptr = (struct mptcp_rbs_value *) left_operand; + } + + return; + } + case VALUE_KIND_OR: { + struct mptcp_rbs_value_or *or_value = + (struct mptcp_rbs_value_or *) value; + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + or_value->free(or_value); + return; + } + + left_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) or_value->left_operand); + right_info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) or_value->right_operand); + + if (left_info && left_info->is_const && + left_info->const_value == 0) { + struct mptcp_rbs_value_bool *right_operand = + or_value->right_operand; + + or_value->right_operand = NULL; + or_value->free(or_value); + + *value_ptr = (struct mptcp_rbs_value *) right_operand; + } else if (right_info && right_info->is_const && + right_info->const_value == 0) { + struct mptcp_rbs_value_bool *left_operand = + or_value->left_operand; + + or_value->left_operand = NULL; + or_value->free(or_value); + + *value_ptr = (struct mptcp_rbs_value *) left_operand; + } + + return; + } + case VALUE_KIND_ADD: { + struct mptcp_rbs_value_add *add_value = + (struct mptcp_rbs_value_add *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + add_value->free(add_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) &add_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) &add_value + ->right_operand); + } + + return; + } + case VALUE_KIND_SUBTRACT: { + struct mptcp_rbs_value_subtract *subtract_value = + (struct mptcp_rbs_value_subtract *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + subtract_value->free(subtract_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &subtract_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &subtract_value + ->right_operand); + } + + return; + } + case VALUE_KIND_MULTIPLY: { + struct mptcp_rbs_value_multiply *multiply_value = + (struct mptcp_rbs_value_multiply *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + multiply_value->free(multiply_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &multiply_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &multiply_value + ->right_operand); + } + + return; + } + case VALUE_KIND_DIVIDE: { + struct mptcp_rbs_value_divide *divide_value = + (struct mptcp_rbs_value_divide *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + divide_value->free(divide_value); + } else { + opt_value(ctx, (struct mptcp_rbs_value **) ÷_value + ->left_operand); + opt_value(ctx, (struct mptcp_rbs_value **) ÷_value + ->right_operand); + } + + return; + } + case VALUE_KIND_REMAINDER: { + struct mptcp_rbs_value_remainder *remainder_value = + (struct mptcp_rbs_value_remainder *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(info->const_value); + + remainder_value->free(remainder_value); + } else { + opt_value(ctx, + (struct mptcp_rbs_value **) &remainder_value + ->left_operand); + opt_value(ctx, + (struct mptcp_rbs_value **) &remainder_value + ->right_operand); + } + + return; + } + case VALUE_KIND_IS_NULL: { + struct mptcp_rbs_value_is_null *is_null_value = + (struct mptcp_rbs_value_is_null *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + is_null_value->free(is_null_value); + } else + opt_value(ctx, + (struct mptcp_rbs_value **) &is_null_value + ->operand); + + return; + } + case VALUE_KIND_IS_NOT_NULL: { + struct mptcp_rbs_value_is_not_null *is_not_null_value = + (struct mptcp_rbs_value_is_not_null *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + is_not_null_value->free(is_not_null_value); + } else + opt_value(ctx, + (struct mptcp_rbs_value **) &is_not_null_value + ->operand); + + return; + } + case VALUE_KIND_REG: + case VALUE_KIND_Q: + case VALUE_KIND_QU: + case VALUE_KIND_RQ: + case VALUE_KIND_SUBFLOWS: + case VALUE_KIND_CURRENT_TIME_MS: + case VALUE_KIND_RANDOM: + case VALUE_KIND_SBFLIST_FILTER_SBF: + case VALUE_KIND_SKBLIST_FILTER_SKB: { + /* Cannot be constant */ + return; + } + case VALUE_KIND_SBF_RTT: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_RTT_MS: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt_ms *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_RTT_VAR: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt_var *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_USER: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_user *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_QUEUED: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_queued *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_IS_BACKUP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_is_backup *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_CWND: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_cwnd *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_SKBS_IN_FLIGHT: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_skbs_in_flight *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_LOST_SKBS: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lost_skbs *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_HAS_WINDOW_FOR: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->sbf); + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->skb); + return; + } + case VALUE_KIND_SBF_ID: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_id *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_DELAY_IN: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_in *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_DELAY_OUT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_out *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_BW_OUT_SEND: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_send *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_BW_OUT_ACK: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_ack *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_SSTHRESH: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_ssthresh *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_THROTTLED: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_throttled *) value) + ->sbf); + return; + } + case VALUE_KIND_SBF_LOSSY: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lossy *) value) + ->sbf); + return; + } + case VALUE_KIND_SBFLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_next *) value) + ->list); + return; + } + case VALUE_KIND_SBFLIST_EMPTY: { + struct mptcp_rbs_value_sbf_list_empty *empty_value = + (struct mptcp_rbs_value_sbf_list_empty *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + empty_value->free(empty_value); + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &empty_value->list); + + return; + } + case VALUE_KIND_SBFLIST_FILTER: { + struct mptcp_rbs_value_sbf_list_filter *filter_value = + (struct mptcp_rbs_value_sbf_list_filter *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &filter_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) filter_value->cond); + + if (info && info->is_const && info->const_value == 1) { + struct mptcp_rbs_value_sbf_list *list = + filter_value->list; + + filter_value->list = NULL; + filter_value->free(filter_value); + + *value_ptr = (struct mptcp_rbs_value *) list; + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &filter_value->cond); + + return; + } + case VALUE_KIND_SBFLIST_MAX: { + struct mptcp_rbs_value_sbf_list_max *max_value = + (struct mptcp_rbs_value_sbf_list_max *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &max_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) max_value->cond); + + if (info && info->is_const) { + /* Just take the first subflow */ + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_get_new( + max_value->list, + (struct mptcp_rbs_value_int *) + mptcp_rbs_value_constint_new(0)); + + max_value->list = NULL; + max_value->free(max_value); + } + + return; + } + case VALUE_KIND_SBFLIST_MIN: { + struct mptcp_rbs_value_sbf_list_min *min_value = + (struct mptcp_rbs_value_sbf_list_min *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &min_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) min_value->cond); + + if (info && info->is_const) { + /* Just take the first subflow */ + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_get_new( + min_value->list, + (struct mptcp_rbs_value_int *) + mptcp_rbs_value_constint_new(0)); + + min_value->list = NULL; + min_value->free(min_value); + } + + return; + } + case VALUE_KIND_SBFLIST_GET: { + struct mptcp_rbs_value_sbf_list_get *get_value = + (struct mptcp_rbs_value_sbf_list_get *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &get_value->list); + opt_value(ctx, (struct mptcp_rbs_value **) &get_value->index); + return; + } + case VALUE_KIND_SBFLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_count *) value) + ->list); + return; + } + case VALUE_KIND_SBFLIST_SUM: { + struct mptcp_rbs_value_sbf_list_sum *sum_value = + (struct mptcp_rbs_value_sbf_list_sum *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &sum_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) sum_value->cond); + + if (info && info->is_const && info->const_value == 0) { + /* Just 0 */ + *value_ptr = (struct mptcp_rbs_value *) + mptcp_rbs_value_constint_new(0); + + sum_value->free(sum_value); + } + + return; + } + case VALUE_KIND_SKB_SENT_ON: { + struct mptcp_rbs_value_skb_sent_on *sent_on_value = + (struct mptcp_rbs_value_skb_sent_on *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &sent_on_value->sbf); + opt_value(ctx, (struct mptcp_rbs_value **) &sent_on_value->skb); + return; + } + case VALUE_KIND_SKB_SENT_ON_ALL: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on_all *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_USER: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_user *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_SEQ: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_seq *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_PSH: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_psh *) value) + ->skb); + return; + } + case VALUE_KIND_SKB_LENGTH: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_length *) value) + ->skb); + return; + } + case VALUE_KIND_SKBLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_next *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_EMPTY: { + struct mptcp_rbs_value_skb_list_empty *empty_value = + (struct mptcp_rbs_value_skb_list_empty *) value; + + info = mptcp_rbs_opt_find_value_info(ctx, value); + + if (info && info->is_const) { + if (info->const_value == 1) + *value_ptr = TRUE; + else + *value_ptr = FALSE; + + empty_value->free(empty_value); + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &empty_value->list); + + return; + } + case VALUE_KIND_SKBLIST_POP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_pop *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_FILTER: { + struct mptcp_rbs_value_skb_list_filter *filter_value = + (struct mptcp_rbs_value_skb_list_filter *) value; + + opt_value(ctx, (struct mptcp_rbs_value **) &filter_value->list); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) filter_value->cond); + + if (info && info->is_const && info->const_value == 1) { + struct mptcp_rbs_value_skb_list *list = + filter_value->list; + + filter_value->list = NULL; + filter_value->free(filter_value); + + *value_ptr = (struct mptcp_rbs_value *) list; + } else + opt_value( + ctx, + (struct mptcp_rbs_value **) &filter_value->cond); + + return; + } + case VALUE_KIND_SKBLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_count *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_TOP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_top *) value) + ->list); + return; + } + case VALUE_KIND_SKBLIST_GET: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_get *) value) + ->list); + return; + } + } +} + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_smt *smt) +{ + struct mptcp_rbs_opt_value_info *info; + + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) drop_smt->skb); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &drop_smt->skb); + + return; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->msg); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &print_smt->msg); + + if (print_smt->arg) { + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->arg); + if (!IS_NULL(info)) + opt_value(ctx, &print_smt->arg); + } + + return; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->sbf); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &push_smt->sbf); + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->skb); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &push_smt->skb); + + return; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &set_smt->value); + + return; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_user_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &set_user_smt->value); + + return; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) var_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &var_smt->value); + + return; + } + case SMT_KIND_VOID: { + struct mptcp_rbs_smt_void *void_smt = + (struct mptcp_rbs_smt_void *) smt; + + if (void_smt->value) { + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) void_smt->value); + if (!IS_NULL(info)) + opt_value(ctx, + (struct mptcp_rbs_value **) &void_smt + ->value); + } + + return; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + return; + } + } +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_smt *smt; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + smt = block->first_smt; + while (smt) { + opt_smt(ctx, smt); + smt = smt->next; + } + + if (block->condition) + opt_value(ctx, (struct mptcp_rbs_value **) &block->condition); + if (block->next) + opt_block(ctx, block->next, list); + if (block->next_else) + opt_block(ctx, block->next_else, list); +} + +void mptcp_rbs_opt_cf(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + + INIT_BLOCK_LIST(&list); + opt_block(ctx, ctx->variation->first_block, &list); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_cf.h b/net/mptcp/mptcp_rbs_optimizer_cf.h new file mode 100644 index 0000000000000..9f18a3dd8606a --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cf.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_CF_H +#define _MPTCP_RBS_OPTIMIZER_CF_H + +struct mptcp_rbs_opt_ctx; + +/** + * Constant Folding: + * Combines constant values + * @ctx: The optimization context + */ +void mptcp_rbs_opt_cf(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_cve.c b/net/mptcp/mptcp_rbs_optimizer_cve.c new file mode 100644 index 0000000000000..59acb03aaa5d4 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cve.c @@ -0,0 +1,1160 @@ +#include "mptcp_rbs_optimizer_cve.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" +#include + +static void find_var_smts_in_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block) +{ + struct mptcp_rbs_smt *smt; + struct mptcp_rbs_smt_var *var_smt; + + smt = block->first_smt; + while (smt) { + if (smt->kind == SMT_KIND_VAR) { + var_smt = (struct mptcp_rbs_smt_var *) smt; + + ctx->var_infos[var_smt->var_number].smt = var_smt; + } + + smt = smt->next; + } +} + +static struct mptcp_rbs_opt_value_info *opt_value(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_opt_value_info *info = NULL; + +#define APPLY_ON_BIN(val, op) \ + struct mptcp_rbs_opt_value_info *left_info; \ + struct mptcp_rbs_opt_value_info *right_info; \ + left_info = \ + opt_value(ctx, (struct mptcp_rbs_value *) (val)->left_operand); \ + right_info = \ + opt_value(ctx, (struct mptcp_rbs_value *) (val)->right_operand); \ + \ + if (left_info && left_info->is_const) { \ + if (left_info->const_value == -1) { \ + info = mptcp_rbs_opt_get_value_info(ctx, value); \ + info->is_const = true; \ + info->const_value = -1; \ + } else if (right_info && right_info->is_const) { \ + info = mptcp_rbs_opt_get_value_info(ctx, value); \ + info->is_const = true; \ + if (right_info->const_value == -1) \ + info->const_value = -1; \ + else \ + op; \ + } \ + } else if (right_info && right_info->is_const && \ + right_info->const_value == -1) { \ + info = mptcp_rbs_opt_get_value_info(ctx, value); \ + info->is_const = true; \ + info->const_value = -1; \ + } \ + \ + return info; + + switch (value->kind) { + case VALUE_KIND_CONSTINT: { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = + ((struct mptcp_rbs_value_constint *) value)->value; + return info; + } + case VALUE_KIND_CONSTSTRING: { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + return info; + } + case VALUE_KIND_NULL: { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + return info; + } + case VALUE_KIND_BOOL_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_INT_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_STRING_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SBF_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SBFLIST_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SKB_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_SKBLIST_VAR: { + struct mptcp_rbs_opt_var_info *var_info; + struct mptcp_rbs_opt_value_info *var_value_info; + + var_info = + &ctx->var_infos[((struct mptcp_rbs_value_int_var *) value) + ->var_number]; + ++var_info->usage; + var_value_info = + mptcp_rbs_opt_find_value_info(ctx, var_info->smt->value); + + if (var_value_info && var_value_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = var_value_info->const_value; + } + + return info; + } + case VALUE_KIND_NOT: { + struct mptcp_rbs_opt_value_info *operand_info; + operand_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_not *) + value) + ->operand); + + if (operand_info && operand_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (operand_info->const_value == -1) + info->const_value = -1; + else + info->const_value = !operand_info->const_value; + } + + return info; + } + case VALUE_KIND_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_equal *) value, + info->const_value = left_info->const_value == + right_info->const_value) + } + case VALUE_KIND_UNEQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_unequal *) value, + info->const_value = left_info->const_value != + right_info->const_value) + } + case VALUE_KIND_LESS: { + APPLY_ON_BIN((struct mptcp_rbs_value_less *) value, + info->const_value = left_info->const_value < + right_info->const_value) + } + case VALUE_KIND_LESS_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_less_equal *) value, + info->const_value = left_info->const_value <= + right_info->const_value) + } + case VALUE_KIND_GREATER: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater *) value, + info->const_value = left_info->const_value > + right_info->const_value) + } + case VALUE_KIND_GREATER_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater_equal *) value, + info->const_value = left_info->const_value >= + right_info->const_value) + } + case VALUE_KIND_AND: { + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + left_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->left_operand); + right_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->right_operand); + + if (left_info && left_info->is_const) { + if (left_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + } else if (right_info && right_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = + right_info->const_value == 1; + } + } else if (right_info && right_info->is_const && + right_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_OR: { + struct mptcp_rbs_opt_value_info *left_info; + struct mptcp_rbs_opt_value_info *right_info; + left_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->left_operand); + right_info = opt_value( + ctx, + (struct mptcp_rbs_value *) ((struct mptcp_rbs_value_and *) + value) + ->right_operand); + + if (left_info && left_info->is_const) { + if (left_info->const_value == 1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 1; + } else if (right_info && right_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = + right_info->const_value == 1; + } + } else if (right_info && right_info->is_const && + right_info->const_value == 1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 1; + } + + return info; + } + case VALUE_KIND_ADD: { + APPLY_ON_BIN((struct mptcp_rbs_value_add *) value, { + unsigned int result = + left_info->const_value + right_info->const_value; + info->const_value = result; + }) + } + case VALUE_KIND_SUBTRACT: { + APPLY_ON_BIN((struct mptcp_rbs_value_subtract *) value, { + unsigned int result = + left_info->const_value - right_info->const_value; + info->const_value = result; + }) + } + case VALUE_KIND_MULTIPLY: { + APPLY_ON_BIN((struct mptcp_rbs_value_multiply *) value, { + unsigned int result = + left_info->const_value * right_info->const_value; + info->const_value = result; + }) + } + case VALUE_KIND_DIVIDE: { + APPLY_ON_BIN((struct mptcp_rbs_value_divide *) value, { + if (!right_info->const_value) + info->const_value = -1; + else { + unsigned int result = left_info->const_value / + right_info->const_value; + info->const_value = result; + } + }) + } + case VALUE_KIND_REMAINDER: { + APPLY_ON_BIN((struct mptcp_rbs_value_remainder *) value, { + if (!right_info->const_value) + info->const_value = -1; + else { + unsigned int result = left_info->const_value % + right_info->const_value; + info->const_value = result; + } + }) + } + case VALUE_KIND_IS_NULL: { + struct mptcp_rbs_opt_value_info *operand_info; + operand_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_is_null *) value) + ->operand); + + if (operand_info && operand_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = operand_info->const_value == -1; + } else if (((struct mptcp_rbs_value_is_null *) value) + ->operand->kind == VALUE_KIND_REG) { + /* Registers can never hold NULL */ + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_IS_NOT_NULL: { + struct mptcp_rbs_opt_value_info *operand_info; + operand_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_is_not_null *) value) + ->operand); + + if (operand_info && operand_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = operand_info->const_value != -1; + } else if (((struct mptcp_rbs_value_is_not_null *) value) + ->operand->kind == VALUE_KIND_REG) { + /* Registers can never hold NULL */ + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = 1; + } + + return info; + } + case VALUE_KIND_REG: + case VALUE_KIND_Q: + case VALUE_KIND_QU: + case VALUE_KIND_RQ: + case VALUE_KIND_CURRENT_TIME_MS: + case VALUE_KIND_RANDOM: + case VALUE_KIND_SBFLIST_FILTER_SBF: + case VALUE_KIND_SKBLIST_FILTER_SKB: { + /* Cannot be constant */ + return NULL; + } + case VALUE_KIND_SUBFLOWS: { + if (!ctx->variation->sbf_num) + return NULL; + + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = ctx->variation->sbf_num; + + return info; + } + case VALUE_KIND_SBF_RTT: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_rtt *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_IS_BACKUP: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_is_backup *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_CWND: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_cwnd *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_SKBS_IN_FLIGHT: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_skbs_in_flight *) + value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_LOST_SKBS: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_lost_skbs *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_HAS_WINDOW_FOR: { + struct mptcp_rbs_opt_value_info *sbf_info; + struct mptcp_rbs_opt_value_info *skb_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_has_window_for *) + value) + ->sbf); + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_has_window_for *) + value) + ->skb); + + if ((sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) || + (skb_info && skb_info->is_const && + skb_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_ID: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_id *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_DELAY_IN: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_delay_in *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_DELAY_OUT: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_delay_out *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_BW_OUT_SEND: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_bw_out_send *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_BW_OUT_ACK: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_bw_out_ack *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_SSTHRESH: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_ssthresh *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_THROTTLED: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_throttled *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBF_LOSSY: { + struct mptcp_rbs_opt_value_info *sbf_info; + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_lossy *) value) + ->sbf); + + if (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_NEXT: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_next *) value) + ->list); + + if (sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_EMPTY: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_empty *) value) + ->list); + + if (sbf_list_info && sbf_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (sbf_list_info->const_value == -1) + info->const_value = -1; + else if (!sbf_list_info->const_value) + info->const_value = 1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SBFLIST_FILTER: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + cond_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + + if (sbf_list_info && sbf_list_info->is_const) { + if (sbf_list_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } else if (cond_info && cond_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else if (!cond_info->const_value) + info->const_value = 0; + else + info->const_value = + sbf_list_info->const_value; + } + } else if (cond_info && cond_info->is_const && + cond_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SBFLIST_MAX: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_max *) value) + ->list); + cond_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_max *) value) + ->cond); + + if ((sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value == -1) || + (cond_info && cond_info->is_const && + cond_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_MIN: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_min *) value) + ->list); + cond_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_min *) value) + ->cond); + + if ((sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value == -1) || + (cond_info && cond_info->is_const && + cond_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SBFLIST_GET: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *index_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_get *) value) + ->list); + index_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_get *) value) + ->index); + + if (sbf_list_info && sbf_list_info->is_const) { + if (sbf_list_info->const_value == -1 || + (index_info && index_info->is_const && + index_info->const_value >= + sbf_list_info->const_value)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + } else if (index_info && index_info->is_const) { + if (index_info->const_value == -1 || + (ctx->variation->sbf_num && + index_info->const_value >= + ctx->variation->sbf_num)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + } + + return info; + } + case VALUE_KIND_SBFLIST_COUNT: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + sbf_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_count *) value) + ->list); + + if (sbf_list_info && sbf_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = sbf_list_info->const_value; + } + + return info; + } + case VALUE_KIND_SBFLIST_SUM: { + struct mptcp_rbs_opt_value_info *sbf_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + sbf_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_sum *) value) + ->list); + cond_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_sum *) value) + ->cond); + + if ((sbf_list_info && sbf_list_info->is_const && + sbf_list_info->const_value == -1) || + (cond_info && cond_info->is_const && + cond_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKB_SENT_ON: { + struct mptcp_rbs_opt_value_info *skb_info; + struct mptcp_rbs_opt_value_info *sbf_info; + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + sbf_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_sent_on *) value) + ->sbf); + + if ((skb_info && skb_info->is_const && + skb_info->const_value == -1) || + (sbf_info && sbf_info->is_const && + sbf_info->const_value == -1)) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKB_SENT_ON_ALL: { + struct mptcp_rbs_opt_value_info *skb_info; + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + + if (skb_info && skb_info->is_const && + skb_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKB_USER: { + struct mptcp_rbs_opt_value_info *skb_info; + skb_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_user *) value) + ->skb); + + if (skb_info && skb_info->is_const && + skb_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKBLIST_NEXT: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_next *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const && + skb_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKBLIST_EMPTY: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_empty *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (skb_list_info->const_value == -1) + info->const_value = -1; + else if (!skb_list_info->const_value) + info->const_value = 1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SKBLIST_POP: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_pop *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const && + skb_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + case VALUE_KIND_SKBLIST_FILTER: { + struct mptcp_rbs_opt_value_info *skb_list_info; + struct mptcp_rbs_opt_value_info *cond_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + cond_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + + if (skb_list_info && skb_list_info->is_const) { + if (skb_list_info->const_value == -1) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } else if (cond_info && cond_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else if (!cond_info->const_value) + info->const_value = 0; + else + info->const_value = + skb_list_info->const_value; + } + } else if (cond_info && cond_info->is_const && + cond_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + if (cond_info->const_value == -1) + info->const_value = -1; + else + info->const_value = 0; + } + + return info; + } + case VALUE_KIND_SKBLIST_COUNT: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, + (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_count *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = skb_list_info->const_value; + } + + return info; + } + case VALUE_KIND_SKBLIST_TOP: { + struct mptcp_rbs_opt_value_info *skb_list_info; + skb_list_info = opt_value( + ctx, (struct mptcp_rbs_value + *) ((struct mptcp_rbs_value_skb_list_top *) value) + ->list); + + if (skb_list_info && skb_list_info->is_const && + skb_list_info->const_value <= 0) { + info = mptcp_rbs_opt_get_value_info(ctx, value); + info->is_const = true; + info->const_value = -1; + } + + return info; + } + default: + return NULL; + } +} + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_smt *smt) +{ + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) drop_smt->skb); + break; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) print_smt->msg); + if (print_smt->arg) + opt_value(ctx, print_smt->arg); + break; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) push_smt->sbf); + opt_value(ctx, (struct mptcp_rbs_value *) push_smt->skb); + break; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) set_smt->value); + break; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + opt_value(ctx, (struct mptcp_rbs_value *) set_user_smt->value); + break; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + + opt_value(ctx, var_smt->value); + break; + } + case SMT_KIND_VOID: { + struct mptcp_rbs_smt_void *void_smt = + (struct mptcp_rbs_smt_void *) smt; + + if (void_smt->value) + opt_value(ctx, void_smt->value); + break; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + break; + } + } +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block) +{ + struct mptcp_rbs_smt *smt; + + smt = block->first_smt; + while (smt) { + opt_smt(ctx, smt); + smt = smt->next; + } + + if (block->condition) + opt_value(ctx, (struct mptcp_rbs_value *) block->condition); +} + +void mptcp_rbs_opt_cve(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block *block; + + /* Clear variable information */ + memset(ctx->var_infos, 0, sizeof(ctx->var_infos)); + + INIT_BLOCK_LIST(&list); + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list); + + /* Find var statements */ + FOREACH_BLOCK(&list, block, find_var_smts_in_block(ctx, block)); + + /* Calculate constant values */ + FOREACH_BLOCK(&list, block, opt_block(ctx, block)); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_cve.h b/net/mptcp/mptcp_rbs_optimizer_cve.h new file mode 100644 index 0000000000000..30857b5cbe615 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_cve.h @@ -0,0 +1,14 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_CVE_H +#define _MPTCP_RBS_OPTIMIZER_CVE_H + +struct mptcp_rbs_opt_ctx; + +/** + * Constant Value Evaluation: + * Searches for constant values, evaluates them and stores the results inside + * the values' info + * @ctx: The optimization context + */ +void mptcp_rbs_opt_cve(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_dce.c b/net/mptcp/mptcp_rbs_optimizer_dce.c new file mode 100644 index 0000000000000..34ef5b9c1332f --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_dce.c @@ -0,0 +1,183 @@ +#include "mptcp_rbs_optimizer_dce.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +#define IS_NULL(info) ((info) && (info)->is_const && (info)->const_value == -1) + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_smt ***smt_ptr) +{ + struct mptcp_rbs_smt *smt = **smt_ptr; + struct mptcp_rbs_opt_value_info *info; + struct mptcp_rbs_opt_value_info *info2; + + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) drop_smt->skb); + if (!IS_NULL(info)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->msg); + if (print_smt->arg) + info2 = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) print_smt->arg); + else + info2 = NULL; + + if (!IS_NULL(info) && !IS_NULL(info2)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->sbf); + info2 = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) push_smt->skb); + + if (!IS_NULL(info) && !IS_NULL(info2)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_smt->value); + + if (!IS_NULL(info)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) set_user_smt->value); + + if (!IS_NULL(info)) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + struct mptcp_rbs_opt_var_info *var_info; + + var_info = &ctx->var_infos[var_smt->var_number]; + if (!var_info || !var_info->smt || var_info->usage) { + *smt_ptr = &smt->next; + return; + } + + break; + } + case SMT_KIND_VOID: { + /* Since VOID is only for measurements we are allowed to remove + * it + */ + break; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + return; + } + } + + /* Remove the statement */ + **smt_ptr = smt->next; + smt->free(smt); +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block) +{ + struct mptcp_rbs_smt **smt; + struct mptcp_rbs_opt_value_info *info; + + smt = &block->first_smt; + while (smt && *smt) { + opt_smt(ctx, &smt); + } + + if (block->condition) { + info = mptcp_rbs_opt_find_value_info( + ctx, (struct mptcp_rbs_value *) block->condition); + + if (info && info->is_const) { + if (info->const_value != 1) + block->next = block->next_else; + + block->next_else = NULL; + block->condition->free(block->condition); + block->condition = NULL; + } + } +} + +void mptcp_rbs_opt_dce(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_cfg_block_list list2; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_cfg_block *block2; + bool found; + + INIT_BLOCK_LIST(&list); + INIT_BLOCK_LIST(&list2); + + /* Fill list with blocks */ + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list); + + /* Remove NULL statements and constant ifs */ + FOREACH_BLOCK(&list, block, opt_block(ctx, block)); + + /* Free unused blocks */ + mptcp_rbs_cfg_block_traverse(ctx->variation->first_block, &list2); + FOREACH_BLOCK(&list, block, { + found = false; + FOREACH_BLOCK(&list2, block2, if (block == block2) { + found = true; + break; + }); + if (!found) + mptcp_rbs_cfg_block_free(block); + }); + + FREE_BLOCK_LIST(&list); + FREE_BLOCK_LIST(&list2); +} + +// TODO RBS Compact variables if some were removed diff --git a/net/mptcp/mptcp_rbs_optimizer_dce.h b/net/mptcp/mptcp_rbs_optimizer_dce.h new file mode 100644 index 0000000000000..87d72e2fd29b5 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_dce.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_DCE_H +#define _MPTCP_RBS_OPTIMIZER_DCE_H + +struct mptcp_rbs_opt_ctx; + +/** + * Dead Code Elimination: + * Erases code that is never executed + * @ctx: The optimization context + */ +void mptcp_rbs_opt_dce(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf.c b/net/mptcp/mptcp_rbs_optimizer_ebpf.c new file mode 100644 index 0000000000000..fb7f67f0f949c --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf.c @@ -0,0 +1,4172 @@ +#include "mptcp_rbs_optimizer_ebpf.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_optimizer_ebpf_regalloc.h" +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" +#include + +/** Some fixed temporaries */ +enum { CTX_TMP, VARS_TMP, REGS_TMP, FIXED_TMP_COUNT }; + +/** Information about a filter/min/max/sum variable */ +struct filter_var { + const void *progress; + int temp; +}; + +/* + * Filter var information lists + */ + +DECL_DA(filter_var_list, struct filter_var *); + +#define INIT_FILTER_VAR_LIST(list) INIT_DA(list) + +#define FREE_FILTER_VAR_LIST(list) FREE_DA(list) + +#define PUSH_FILTER_VAR(list, var) ADD_DA_ITEM(list, var) + +#define POP_FILTER_VAR(list) DELETE_DA_ITEM(list, GET_DA_LEN(list) - 1) + +#define FOREACH_FILTER_VAR(list, var, cmds) FOREACH_DA_ITEM(list, var, cmds) + +/** Context for eBPF code generation */ +struct ebpf_ctx { + /** Pointer to the optimization context */ + struct mptcp_rbs_opt_ctx *ctx; + /** Number of used temporaries */ + int used_temps; + /** Map with used temporaries */ + u64 used_temps_map; + /** The buffer for string constants */ + char **strs; + /** Length of strs */ + int strs_len; + /** Capacity of instruction list inside the current block */ + int capacity; + /** The current block */ + struct mptcp_rbs_cfg_block *block; + /** The current eBPF block */ + struct mptcp_rbs_ebpf_block *eblock; + /** List with active filter variables */ + struct filter_var_list filter_var_list; + /** Variable number of a found *_NEXT value or -1 */ + int next_var; + /** NULL eBPF block of the variable with *_NEXT value or NULL */ + struct mptcp_rbs_ebpf_block *next_var_null_eblock; +}; + +/** Information about a block that is stored inside the tag field */ +struct block_info { + /** The corresponding eBPF block of this block */ + struct mptcp_rbs_ebpf_block *eblock; + /** + * Break eBPF block if this block is the beginning of a foreach loop or + * NULL + */ + struct mptcp_rbs_ebpf_block *break_eblock; + /** + * Continue eBPF block if this block is the beginning of a foreach loop + * or NULL + */ + struct mptcp_rbs_ebpf_block *cont_eblock; + /** Mask with reserved temporaries by a foreach loop */ + u64 reserved_temps_map; +}; + +#define BLOCK_INFO(block) ((struct block_info *) (block)->tag) + +/** + * Adds an eBPF instruction to a block + * @eblock: The block where the instruction should be added + * @capacity: Pointer to the capacity of the block + * @instr: The instruction to add + */ +static void add_instr(struct mptcp_rbs_ebpf_block *eblock, int *capacity, + struct mptcp_rbs_ebpf_instr instr) +{ + if (*capacity == eblock->instr_count) { + *capacity = *capacity == 0 ? 4 : *capacity << 1; + eblock->instrs = + krealloc(eblock->instrs, + *capacity * sizeof(struct mptcp_rbs_ebpf_instr), + GFP_KERNEL); + } + + eblock->instrs[eblock->instr_count] = instr; + ++eblock->instr_count; +} + +#define add_instr_ectx(ectx, instr) \ + add_instr(ectx->eblock, &ectx->capacity, instr) + +#define TEMP_TO_MAP(t) (1 << (t)) + +/** + * Reserves a temporary + * @ectx: The generation context + * @return: The reserved temporary + */ +static int reserve(struct ebpf_ctx *ectx) +{ + int i; + int temp = -1; + int count = 0; + + for (i = 0; i < 64; ++i) { + if (TEMP_TO_MAP(i) & ectx->used_temps_map) + ++count; + else if (temp == -1) + temp = i; + } + + ectx->used_temps_map |= TEMP_TO_MAP(temp); + ectx->used_temps = max(ectx->used_temps, count); + return temp; +} + +/** + * Reserves all temporaries in a bitmap + * @ectx: The generation context + * @reserved_map: The temporary bitmap + */ +static void reserve_all(struct ebpf_ctx *ectx, u64 reserved_map) +{ + BUG_ON(ectx->used_temps_map & reserved_map); + + ectx->used_temps_map |= reserved_map; +} + +/** + * Dereserves one temporary + * @ectx: The generation context + * @t: The temporary to dereserve + */ +static void dereserve(struct ebpf_ctx *ectx, int t) +{ + BUG_ON(!(ectx->used_temps_map & TEMP_TO_MAP(t))); + + ectx->used_temps_map &= ~TEMP_TO_MAP(t); +} + +/** + * Dereserves all temporaries in a bitmap + * @ectx: The generation context + * @reserved_map: The temporary bitmap + */ +static void dereserve_all(struct ebpf_ctx *ectx, u64 reserved_map) +{ + BUG_ON((ectx->used_temps_map & reserved_map) != reserved_map); + + ectx->used_temps_map &= ~reserved_map; +} + +/* + * Functions that can be called from inside eBPF code + */ + +u64 ebpf_printk(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + printk(*((char **) &r1), r2); + return 0; +} + +u64 ebpf_add_drop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + bool reinject = r3; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_DROP, + NULL, skb, reinject); + return 0; +} + +u64 ebpf_add_push(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct tcp_sock *sbf = *((struct tcp_sock **) &r2); + struct sk_buff *skb = *((struct sk_buff **) &r3); + bool reinject = r4; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_PUSH, + sbf, skb, reinject); + return 0; +} + +u64 ebpf_ktime_get_raw_ms(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return ktime_get_raw_ns() / 1000000; +} + +u64 ebpf_random(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + unsigned int n; + + get_random_bytes(&n, sizeof(unsigned int)); + return n; +} + +u64 ebpf_has_window_for(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct tcp_sock *sbf = *((struct tcp_sock **) &r2); + struct sk_buff *skb = *((struct sk_buff **) &r3); + unsigned int mss_now = tcp_current_mss(ctx->meta_sk); + + /* RBS copied from mptcp_sched.c */ + /* Don't send on this subflow if we bypass the allowed send-window at + * the per-subflow level. Similar to tcp_snd_wnd_test, but manually + * calculated end_seq (because here at this point end_seq is still at + * the meta-level). + */ + if (after(sbf->write_seq + min(skb->len, mss_now), tcp_wnd_end(sbf))) + return 0; + return 1; +} + +u64 ebpf_bw_out_send(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return mptcp_rbs_sbf_get_bw_send(mptcp_rbs_get_sbf_cb(sbf)); +} + +u64 ebpf_bw_out_ack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return mptcp_rbs_sbf_get_bw_ack(mptcp_rbs_get_sbf_cb(sbf)); +} + +u64 ebpf_sbf_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return mptcp_rbs_get_sbf_cb(sbf)->user; +} + +u64 ebpf_rtt_ms(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return (sbf->srtt_us >> 3) / 1000; +} + +u64 ebpf_queued(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + return (sbf->write_seq - sbf->snd_nxt) / sbf->mss_cache; +} + +u64 ebpf_lossy(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct tcp_sock *sbf = *((struct tcp_sock **) &r1); + + if (inet_csk((struct sock *) sbf)->icsk_ca_state == TCP_CA_Loss) { + mptcp_debug("sbf_is_available %p loss state -> false\n", sbf); + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been + * acked. (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ + if (!tcp_is_reno(sbf)) + return true; + else if (sbf->snd_una != sbf->high_seq) + return true; + } + + return false; +} + +u64 ebpf_sent_on_all(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + u32 mask; + struct tcp_sock *sbf; + + mask = TCP_SKB_CB(skb)->path_mask; + sbf = ctx->mpcb->connection_list; + + while (sbf) { + if (!(mask & mptcp_pi_to_flag(sbf->mptcp->path_index))) + return 0; + + sbf = sbf->mptcp->next; + } + + return 1; +} + +u64 ebpf_skb_length(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = *((struct sk_buff **) &r1); + + return TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; +} + +u64 ebpf_skb_seq(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = *((struct sk_buff **) &r1); + + return TCP_SKB_CB(skb)->seq; +} + +u64 ebpf_skb_psh(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = *((struct sk_buff **) &r1); + + return TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH; +} + +u64 ebpf_q_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb_candidate = *((struct sk_buff **) &r2); + struct sk_buff *skb; + + if (skb_candidate) { + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, + skb_candidate)) + skb_candidate = NULL; + else + skb_candidate = skb_queue_next( + &ctx->meta_sk->sk_write_queue, skb_candidate); + } else + skb_candidate = ctx->rbs_cb->queue_position; + + skb = mptcp_rbs_next_in_queue(&ctx->meta_sk->sk_write_queue, + skb_candidate); + + return (size_t) skb; +} + +u64 ebpf_qu_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + + if (skb) { + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb)) + skb = NULL; + else { + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + } else { + if (ctx->meta_sk->sk_write_queue.qlen == 0) + skb = NULL; + else + skb = skb_peek(&ctx->meta_sk->sk_write_queue); + } + + if (skb == ctx->rbs_cb->queue_position) { + mptcp_debug( + "%s skb %p matches the queue_position, we are at the end\n", + __func__, skb); + skb = NULL; + } + + while (skb && TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue) { + mptcp_debug("%s skips skb %p\n", __func__, skb); + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb) || + /* Empty because it points to the element in Q */ + skb == ctx->rbs_cb->queue_position) { + skb = NULL; + break; + } else + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + + return (size_t) skb; +} + +u64 ebpf_rq_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb_candidate = *((struct sk_buff **) &r2); + struct sk_buff *skb; + + if (skb_candidate) { + if (skb_queue_is_last(&ctx->mpcb->reinject_queue, + skb_candidate)) { + skb_candidate = NULL; + } else { + skb_candidate = skb_queue_next( + &ctx->mpcb->reinject_queue, skb_candidate); + } + } else + skb_candidate = skb_peek(&ctx->mpcb->reinject_queue); + + skb = + mptcp_rbs_next_in_queue(&ctx->mpcb->reinject_queue, skb_candidate); + + return (size_t) skb; +} + +u64 ebpf_subflows_next(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct tcp_sock *sbf = *((struct tcp_sock **) &r2); + + if (sbf) + sbf = sbf->mptcp->next; + else + sbf = ctx->mpcb->connection_list; + + /* Skip unavailable subflows */ + while (sbf && !mptcp_rbs_sbf_is_available(sbf)) { + sbf = sbf->mptcp->next; + } + + return (size_t) sbf; +} + +u64 ebpf_varlist_expand(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_var *var = *((struct mptcp_rbs_var **) &r1); + struct tcp_sock **item = *((struct tcp_sock ***) &r2); + int capacity; + int index; + + BUILD_BUG_ON(offsetof(struct mptcp_rbs_var, sbf_list_value) != + offsetof(struct mptcp_rbs_var, skb_list_value)); + + if (!item) { + index = 0; + capacity = 8; + } else { + index = + (item - var->sbf_list_value) / sizeof(struct tcp_sock *); + capacity = (index + 1) * 2; + } + + var->sbf_list_value = + krealloc(var->sbf_list_value, capacity * sizeof(struct tcp_sock *), + GFP_KERNEL); + memset(&var->sbf_list_value[index], 0, + (capacity - index - 1) * sizeof(struct tcp_sock *)); + var->sbf_list_value[capacity - 1] = (struct tcp_sock *) 1; + + return (size_t) &var->sbf_list_value[index]; +} + +u64 ebpf_skb_list_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct mptcp_rbs_eval_ctx *ctx = *((struct mptcp_rbs_eval_ctx **) &r1); + struct sk_buff *skb = *((struct sk_buff **) &r2); + enum mptcp_rbs_value_kind underlying_queue_kind = + (enum mptcp_rbs_value_kind) r3; + + ctx->side_effects = 1; + + if (underlying_queue_kind == VALUE_KIND_Q) { + /* + * Pop an element from Q might be the queue_position or later + */ + if (skb == ctx->rbs_cb->queue_position) { + mptcp_rbs_advance_send_head( + ctx->meta_sk, &ctx->rbs_cb->queue_position); + mptcp_rbs_debug( + "rbs_q_pop returns %p, new queue head %p\n", skb, + ctx->rbs_cb->queue_position); + } else { + /* we can not unlink the packet, as all skbs have to + * stay in the circular buffer */ + mptcp_debug( + "%s sets not_in_queue for packet %p in Q, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + } + + return (size_t) skb; + } + + if (underlying_queue_kind == VALUE_KIND_RQ) { + mptcp_debug("%s sets not_in_queue, to_free and to_unlink for " + "packet %p in RQ, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_free = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_unlink = 1; + + return (size_t) skb; + } + + if (underlying_queue_kind == VALUE_KIND_QU) { + mptcp_debug( + "%s sets not_in_queue for packet %p in QU, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + return (size_t) skb; + } + + BUG_ON(true); + return 0; +} + +static struct bpf_func_proto func_protos[] = { + { + .func = &ebpf_printk, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_add_drop, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }, + { + .func = &ebpf_add_push, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, + }, + { + .func = &ebpf_ktime_get_raw_ms, + .gpl_only = false, + .ret_type = RET_INTEGER, + }, + { + .func = &ebpf_random, + .gpl_only = false, + .ret_type = RET_INTEGER, + }, + { + .func = &ebpf_has_window_for, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }, + { + .func = &ebpf_bw_out_send, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_bw_out_ack, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_rtt_ms, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_lossy, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_queued, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_length, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_seq, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_psh, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_sbf_user, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + }, + { + .func = &ebpf_sent_on_all, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_q_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_qu_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_rq_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_subflows_next, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_varlist_expand, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + }, + { + .func = &ebpf_skb_list_pop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }, +}; + +static const struct bpf_func_proto *get_func_proto(enum bpf_func_id func_id) +{ + int index = func_id - BPF_FUNC_mptcp_rbs_printk; + if (index < 0 || index >= ARRAY_SIZE(func_protos)) + return NULL; + return &func_protos[index]; +} + +static bool is_valid_access(int off, int size, enum bpf_access_type type) +{ + return false; +} + +static struct bpf_verifier_ops bpf_ops = { + .get_func_proto = get_func_proto, + .is_valid_access = is_valid_access, +}; + +static bool gen_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock); + +static bool gen_list_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, + u64 *reserved_temps_map); + +static bool noinline mptcp_rbs_value_constint_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_constint *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, value->value)); + + return false; +} + +static bool noinline mptcp_rbs_value_conststring_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_conststring *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* Put the string in strs */ + int len = strlen(value->value); + int idx = ectx->strs_len; + char *new_str; + + ++ectx->strs_len; + ectx->strs = + krealloc(ectx->strs, ectx->strs_len * sizeof(char *), GFP_KERNEL); + ectx->strs[idx] = kmalloc(len + 1, GFP_KERNEL); + new_str = ectx->strs[idx]; + memcpy(new_str, value->value, len + 1); + + add_instr_ectx( + ectx, + EBPF_RAW_INSTR(((struct bpf_insn){.code = BPF_LD | BPF_DW | BPF_IMM, + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = (u32)(size_t)(new_str) }), + -1, -1, -1, -1, -1, temp)); + add_instr_ectx( + ectx, EBPF_RAW_INSTR(((struct bpf_insn){ + .code = 0, + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = ((u64)(size_t)(new_str)) >> 32 }), + -1, -1, -1, -1, -1, -1)); + + return false; +} + +static bool noinline mptcp_rbs_value_null_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_null *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* TODO We don't need this right? */ + BUG_ON(true); + return false; +} + +static bool noinline mptcp_rbs_value_bool_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_bool_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(s32)), temp, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, bool_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_int_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_int_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(s64)), temp, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, int_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_string_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_string_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(char *)), temp, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, string_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_sbf_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock *)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, sbf_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_sbf_list_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock **)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, sbf_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_sbf_list_var_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int temp_ptr; + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + /* Check if variable is NULL */ + temp_ptr = reserve(ectx); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock **)), + temp_ptr, VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, sbf_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock *)), + temp, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_ALU_IMM(BPF_ADD, temp_ptr, sizeof(struct tcp_sock *))); + add_instr(*cont_eblock, &capacity, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct tcp_sock *)), + temp, temp_ptr, 0)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (sbf) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = TEMP_TO_MAP(temp_ptr); + return true; +} + +static bool noinline mptcp_rbs_value_skb_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff *)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, skb_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_skb_list_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + struct mptcp_rbs_ebpf_block *eblock; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff **)), temp, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, skb_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next = eblock; + ectx->eblock->next_else = null_eblock; + + ectx->eblock = eblock; + ectx->capacity = 0; + + return true; +} + +static bool noinline mptcp_rbs_value_skb_list_var_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int temp_ptr; + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + /* Check if variable is NULL */ + temp_ptr = reserve(ectx); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff **)), temp_ptr, + VARS_TMP, + sizeof(struct mptcp_rbs_var) * value->var_number + + offsetof(struct mptcp_rbs_var, skb_list_value))); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff *)), + temp, temp_ptr, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_ALU_IMM(BPF_ADD, temp_ptr, sizeof(struct sk_buff *))); + add_instr(*cont_eblock, &capacity, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct sk_buff *)), + temp, temp_ptr, 0)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_ATOMIC); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = TEMP_TO_MAP(temp_ptr); + return true; +} + +static bool noinline mptcp_rbs_value_not_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_not *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->operand, + temp, null_eblock); + + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_XOR, temp, 1)); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_equal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_equal *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JNE, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (unequal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_unequal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_unequal *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JEQ, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (unequal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_less_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_less *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGE, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_less_equal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_less_equal *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGT, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_greater_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_greater *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGT, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_greater_equal_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_greater_equal *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *last_eblock; + int temp_right; + int capacity; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGE, temp, temp_right)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_right); + + last_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Add instructions to else (greater equal) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next_else = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Add instructions to then (less) branch */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + ectx->eblock->next = eblock; + add_instr(eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(eblock, &capacity, EBPF_JMP_OFF()); + eblock->next = last_eblock; + + /* Set last_block as current */ + ectx->eblock = last_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_and_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_and *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *false_eblock; + int capacity; + + /* Prepare the false block that is used instead of the null block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(false_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(false_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate left operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, false_eblock); + + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = false_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* Calculate right operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp, false_eblock); + + /* Create the last block as jump target for the false block */ + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + false_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_or_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_or *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *true_eblock; + struct mptcp_rbs_ebpf_block *false_eblock; + int capacity; + + /* Prepare the true block */ + true_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(true_eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(true_eblock, &capacity, EBPF_JMP_OFF()); + + /* Prepare the false block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Calculate left operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, false_eblock); + + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + ectx->eblock->next_else = true_eblock; + ectx->eblock->next = false_eblock; + ectx->eblock = false_eblock; + ectx->capacity = 0; + + /* Prepare the a new false block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(false_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(false_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate right operand */ + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp, false_eblock); + + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + true_eblock->next = ectx->eblock->next; + false_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_add_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_add *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_right; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_ADD, temp, temp_right)); + dereserve(ectx, temp_right); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_subtract_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_subtract *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_right; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_SUB, temp, temp_right)); + dereserve(ectx, temp_right); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_multiply_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_multiply *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_right; + + null_eblock_used = gen_value( + ectx, (const struct mptcp_rbs_value *) value->left_operand, temp, + null_eblock); + temp_right = reserve(ectx); + null_eblock_used = + gen_value(ectx, + (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock) || + null_eblock_used; + + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_MUL, temp, temp_right)); + dereserve(ectx, temp_right); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_divide_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_divide *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_right; + + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, null_eblock); + temp_right = reserve(ectx); + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock); + + /* Check if right operand is 0 */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_right, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_DIV, temp, temp_right)); + dereserve(ectx, temp_right); + + return true; +} + +static bool noinline mptcp_rbs_value_remainder_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_remainder *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_right; + + gen_value(ectx, (const struct mptcp_rbs_value *) value->left_operand, + temp, null_eblock); + temp_right = reserve(ectx); + gen_value(ectx, (const struct mptcp_rbs_value *) value->right_operand, + temp_right, null_eblock); + + /* Check if right operand is 0 */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_right, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_MOD, temp, temp_right)); + dereserve(ectx, temp_right); + + return true; +} + +static bool noinline mptcp_rbs_value_is_null_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_is_null *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *true_eblock; + int capacity; + + /* Prepare true block */ + true_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(true_eblock, &capacity, EBPF_MOV_IMM(temp, 1)); + add_instr(true_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate the operand */ + if (!gen_value(ectx, (const struct mptcp_rbs_value *) value->operand, + temp, true_eblock)) { + mptcp_rbs_ebpf_block_free(true_eblock); + true_eblock = NULL; + } + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (true_eblock) + true_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_is_not_null_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_is_not_null *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *false_eblock; + int capacity; + + /* Prepare false block */ + false_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(false_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(false_eblock, &capacity, EBPF_JMP_OFF()); + + /* Calculate the operand */ + if (!gen_value(ectx, (const struct mptcp_rbs_value *) value->operand, + temp, false_eblock)) { + mptcp_rbs_ebpf_block_free(false_eblock); + false_eblock = NULL; + } + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (false_eblock) + false_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + return false; +} + +static bool noinline mptcp_rbs_value_reg_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_reg *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(unsigned int)), + temp, REGS_TMP, + sizeof(unsigned int) * value->reg_number)); + + return false; +} + +static bool noinline mptcp_rbs_value_sbf_list_next_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_next *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &BLOCK_INFO(ectx->block)->break_eblock, + &BLOCK_INFO(ectx->block)->cont_eblock, + &BLOCK_INFO(ectx->block)->reserved_temps_map); + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_skb_list_next_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_next *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &BLOCK_INFO(ectx->block)->break_eblock, + &BLOCK_INFO(ectx->block)->cont_eblock, + &BLOCK_INFO(ectx->block)->reserved_temps_map); + + return null_eblock_used; +} + +/* + * Q sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_q_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_q *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_q_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_q *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, + EBPF_CALL(ebpf_q_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_CALL(ebpf_q_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * QU sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_qu_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_qu *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_qu_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_qu *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx( + ectx, EBPF_CALL(ebpf_qu_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_CALL(ebpf_qu_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * RQ sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_rq_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_rq *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_rq_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_rq *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx( + ectx, EBPF_CALL(ebpf_rq_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(*cont_eblock, &capacity, + EBPF_CALL(ebpf_rq_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (skb) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * SUBFLOWS subflow list value + */ + +static bool noinline mptcp_rbs_value_subflows_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_subflows *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + /* This function is only used to determine if the list is NULL */ + return false; +} + +static bool noinline mptcp_rbs_value_subflows_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_subflows *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + int capacity; + struct mptcp_rbs_ebpf_block *start_eblock; + + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_subflows_next, CTX_TMP, temp, -1, + -1, -1, temp)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + /* Prepare start block */ + start_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare break block */ + *break_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Prepare continue block */ + *cont_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr( + *cont_eblock, &capacity, + EBPF_CALL(ebpf_subflows_next, CTX_TMP, temp, -1, -1, -1, temp)); + add_instr(*cont_eblock, &capacity, EBPF_JMP_OFF()); + (*cont_eblock)->next = start_eblock; + + /* while (sbf) { */ + ectx->eblock->next = start_eblock; + ectx->eblock = start_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock->next_else = *break_eblock; + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + *reserved_temps_map = 0; + return false; +} + +/* + * CURRENT_TIME_MS integer value + */ + +static bool noinline mptcp_rbs_value_current_time_ms_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_current_time_ms *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx( + ectx, EBPF_CALL(ebpf_ktime_get_raw_ms, -1, -1, -1, -1, -1, temp)); + + return false; +} + +/* + * RANDOM integer value + */ + +static bool noinline mptcp_rbs_value_random_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_random *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + add_instr_ectx(ectx, EBPF_CALL(ebpf_random, -1, -1, -1, -1, -1, temp)); + + return false; +} + +/* + * .RTT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_rtt_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_rtt *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, srtt_us))); + + return null_eblock_used; +} + +/* + * .RTT_VAR integer value + */ + +static bool noinline mptcp_rbs_value_sbf_rtt_var_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_rtt_var *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, rttvar_us))); + + return null_eblock_used; +} + +/* + * .RTT_MS integer value + */ +static bool noinline mptcp_rbs_value_sbf_rtt_ms_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_rtt_ms *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_rtt_ms, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .QUEUED integer value + */ +static bool noinline mptcp_rbs_value_sbf_queued_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_queued *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_queued, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .USER integer value + */ + +static bool noinline mptcp_rbs_value_sbf_user_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_user *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_sbf_user, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .IS_BACKUP boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_is_backup_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_is_backup *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + struct mptcp_tcp_sock bitfields; + int val; + int shift; + int shift2; + int temp2; + + /* We need to find a shift amount to access the low_prio and + * rcv_low_prio bit fields + */ + memset(&bitfields, 0, sizeof(struct mptcp_tcp_sock)); + bitfields.low_prio = 1; + bitfields.rcv_low_prio = 1; + val = *(&bitfields.map_data_len + 1); + shift = 0; + while (!(val & 1)) { + val >>= 1; + ++shift; + } + val >>= 1; + shift2 = shift + 1; + while (!(val & 1)) { + val >>= 1; + ++shift2; + } + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u16)), temp, temp, + offsetof(struct mptcp_tcp_sock, map_data_len) + + sizeof(u16))); + temp2 = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_REG(temp2, temp)); + if (shift) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp, shift)); + if (shift2) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp2, shift2)); + + add_instr_ectx(ectx, EBPF_ALU_REG(BPF_OR, temp, temp2)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 1)); + dereserve(ectx, temp2); + + return null_eblock_used; +} + +/* + * .CWND integer value + */ + +static bool noinline mptcp_rbs_value_sbf_cwnd_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_cwnd *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, snd_cwnd))); + + return null_eblock_used; +} + +/* + * .SKBS_IN_FLIGHT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_skbs_in_flight_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_sbf_skbs_in_flight *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, packets_out))); + + return null_eblock_used; +} + +/* + * .LOST_SKBS integer value + */ + +static bool noinline mptcp_rbs_value_sbf_lost_skbs_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_lost_skbs *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, lost_out))); + + return null_eblock_used; +} + +/* + * .HAS_WINDOW_FOR boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_has_window_for_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_sbf_has_window_for *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_skb; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + temp_skb = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, + temp_skb, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_CALL(ebpf_has_window_for, CTX_TMP, temp, + temp_skb, -1, -1, temp)); + dereserve(ectx, temp_skb); + + return null_eblock_used; +} + +/* + * .ID integer value + */ + +static bool noinline mptcp_rbs_value_sbf_id_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_id *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u8)), temp, temp, + offsetof(struct mptcp_tcp_sock, sbf_id))); + + return null_eblock_used; +} + +/* + * .DELAY_IN integer value + */ + +static bool noinline mptcp_rbs_value_sbf_delay_in_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_delay_in *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct mptcp_tcp_sock, mptcp_sched) + + offsetof(struct mptcp_rbs_sbf_cb, delay_in))); + + return null_eblock_used; +} + +/* + * .DELAY_OUT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_delay_out_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_delay_out *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp, temp, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct mptcp_tcp_sock, mptcp_sched) + + offsetof(struct mptcp_rbs_sbf_cb, delay_out))); + + return null_eblock_used; +} + +/* + * .BW_OUT_ACK integer value + */ + +static bool noinline mptcp_rbs_value_sbf_bw_out_ack_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_bw_out_ack *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_CALL(ebpf_bw_out_ack, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .BW_OUT_SEND integer value + */ + +static bool noinline mptcp_rbs_value_sbf_bw_out_send_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_bw_out_send *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_CALL(ebpf_bw_out_send, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .SSTHRESH integer value + */ + +static bool noinline mptcp_rbs_value_sbf_ssthresh_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_ssthresh *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct tcp_sock, snd_ssthresh))); + + return null_eblock_used; +} + +/* + * .THROTTLED boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_throttled_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_throttled *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(unsigned long)), temp, + temp, offsetof(struct tcp_sock, tsq_flags))); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp, TSQ_THROTTLED)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 1)); + + return null_eblock_used; +} + +/* + * .LOSSY boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_lossy_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_lossy *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, temp, + null_eblock); + + add_instr_ectx(ectx, EBPF_CALL(ebpf_lossy, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .EMPTY boolean value + */ + +static bool noinline mptcp_rbs_value_sbf_list_empty_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_empty *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* empty = true; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 1)); + + temp_sbf = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_sbf, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* empty = false; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + + mptcp_rbs_ebpf_block_free(cont_eblock); + + dereserve(ectx, temp_sbf); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .FILTER subflow list value + */ + +static bool noinline mptcp_rbs_value_sbf_list_filter_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp, + null_eblock) || + null_eblock_used; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_sbf_list_filter_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + bool null_eblock_used; + int temp_t; + struct filter_var var; + struct mptcp_rbs_ebpf_block *cont_eblock2; + int capacity; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, break_eblock, cont_eblock, reserved_temps_map); + + /* if (cond) */ + var.progress = &value->cur; + var.temp = temp; + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + temp_t = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, + temp_t, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_t, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_t); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = *cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* Create extra continue block because they value above might free the + * continue block with the assumption that it is not used. But actually + * it is used + */ + cont_eblock2 = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(cont_eblock2, &capacity, EBPF_JMP_OFF()); + cont_eblock2->next = *cont_eblock; + *cont_eblock = cont_eblock2; + + return null_eblock_used; +} + +/* + * Special value holding the actual subflow for FILTER subflow list value + */ + +static bool noinline mptcp_rbs_value_sbf_list_filter_sbf_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_sbf_list_filter_sbf *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct filter_var *var; + + FOREACH_FILTER_VAR(&ectx->filter_var_list, var, { + if (var->progress == value->cur) { + add_instr_ectx(ectx, EBPF_MOV_REG(temp, var->temp)); + return false; + } + }); + + /* Not found */ + BUG_ON(true); + return false; +} + +/* + * .MAX subflow value + */ + +static bool noinline mptcp_rbs_value_sbf_list_max_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_max *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct filter_var var; + int temp_max; + int temp_t; + + /* temp_max = -1; temp = NULL; */ + temp_max = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_max, -1)); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, + temp_sbf, null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + + /* if (temp_max < item) */ + var.progress = &value->cur; + var.temp = temp_sbf; + temp_t = reserve(ectx); + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp_t, + null_eblock); + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JSGE, temp_max, temp_t)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_MOV_REG(temp_max, temp_t)); + add_instr_ectx(ectx, EBPF_MOV_REG(temp, temp_sbf)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + dereserve(ectx, temp_t); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve(ectx, temp_sbf); + dereserve(ectx, temp_max); + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .MIN subflow value + */ + +static bool noinline mptcp_rbs_value_sbf_list_min_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_min *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct filter_var var; + int temp_min; + int temp_t; + + /* temp_min = 0x100000000; temp = NULL; */ + temp_min = reserve(ectx); + add_instr_ectx( + ectx, + EBPF_RAW_INSTR(((struct bpf_insn){.code = BPF_LD | BPF_DW | BPF_IMM, + .dst_reg = temp_min, + .src_reg = 0, + .off = 0, + .imm = 0 }), + -1, -1, -1, -1, -1, temp_min)); + add_instr_ectx(ectx, EBPF_RAW_INSTR(((struct bpf_insn){.code = 0, + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = 1 }), + -1, -1, -1, -1, -1, -1)); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, + temp_sbf, null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + + /* if (temp_min > item) */ + var.progress = &value->cur; + var.temp = temp_sbf; + temp_t = reserve(ectx); + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp_t, + null_eblock); + add_instr_ectx(ectx, EBPF_JMP_REG(BPF_JGE, temp_t, temp_min)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_MOV_REG(temp_min, temp_t)); + add_instr_ectx(ectx, EBPF_MOV_REG(temp, temp_sbf)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + dereserve(ectx, temp_t); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve(ectx, temp_sbf); + dereserve(ectx, temp_min); + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .GET subflow value + */ + +static bool noinline mptcp_rbs_value_sbf_list_get_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_get *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + int temp_i; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + temp_i = reserve(ectx); + gen_value(ectx, (const struct mptcp_rbs_value *) value->index, temp_i, + null_eblock); + + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + + /* if (i == 0) */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_i, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = break_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* --i; */ + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_SUB, temp_i, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if index was found */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve(ectx, temp_i); + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .COUNT integer value + */ + +static bool noinline mptcp_rbs_value_sbf_list_count_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_count *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* i = 0; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_sbf, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* ++i; */ + add_instr_ectx(ectx, EBPF_ALU32_IMM(BPF_ADD, temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + dereserve(ectx, temp_sbf); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .SUM integer value + */ + +static bool noinline mptcp_rbs_value_sbf_list_sum_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_sbf_list_sum *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct filter_var var; + int temp_t; + + /* sum = 0; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_sbf = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_sbf, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* sum += item; */ + var.progress = &value->cur; + var.temp = temp_sbf; + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + temp_t = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, + temp_t, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_ALU32_REG(BPF_ADD, temp, temp_t)); + dereserve(ectx, temp_t); + POP_FILTER_VAR(&ectx->filter_var_list); + + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + dereserve(ectx, temp_sbf); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .SENT_ON boolean value + */ + +static bool noinline mptcp_rbs_value_skb_sent_on_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_sent_on *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_sbf; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + temp_sbf = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->sbf, + temp_sbf, null_eblock) || + null_eblock_used; + + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(struct mptcp_tcp_sock *)), + temp_sbf, temp_sbf, offsetof(struct tcp_sock, mptcp))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u8)), temp_sbf, temp_sbf, + offsetof(struct mptcp_tcp_sock, path_index))); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_SUB, temp_sbf, 1)); + + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u32)), temp, temp, + offsetof(struct sk_buff, cb) + + offsetof(struct tcp_skb_cb, path_mask))); + add_instr_ectx(ectx, EBPF_ALU_REG(BPF_RSH, temp, temp_sbf)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 1)); + dereserve(ectx, temp_sbf); + + return null_eblock_used; +} + +/* + * .SENT_ON_ALL boolean value + */ + +static bool noinline mptcp_rbs_value_skb_sent_on_all_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_sent_on_all *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_sent_on_all, CTX_TMP, temp, -1, -1, -1, temp)); + + return null_eblock_used; +} + + +/* + * .LENGTH integer value + */ + +static bool noinline mptcp_rbs_value_skb_length_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_length *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_skb_length, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + + +/* + * .SKB_SEQ integer value + */ + +static bool noinline mptcp_rbs_value_skb_seq_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_seq *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_skb_seq, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + + +/* + * .PSH boolean value + */ + +static bool noinline mptcp_rbs_value_skb_psh_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_psh *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_CALL(ebpf_skb_psh, temp, -1, -1, -1, -1, temp)); + + return null_eblock_used; +} + +/* + * .USER integer value + */ + +static bool noinline mptcp_rbs_value_skb_user_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_user *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + union tcp_skb_cb_rbs bitfields; + int shift; + + /* We need to find a shift amount to access the user bit field */ + bitfields.b = 0; + bitfields.user = 0x1f; + shift = 0; + while (!(bitfields.b & 1)) { + bitfields.b >>= 1; + ++shift; + } + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->skb, temp, + null_eblock); + + add_instr_ectx( + ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(u8)), temp, temp, + offsetof(struct sk_buff, cb) + + offsetof(struct tcp_skb_cb, mptcp_rbs))); + if (shift) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_RSH, temp, shift)); + if (shift < 3) + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_AND, temp, 0x1f)); + + return null_eblock_used; +} + +/* + * .EMPTY boolean value + */ + +static bool noinline mptcp_rbs_value_skb_list_empty_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_empty *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_skb; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* empty = true; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 1)); + + temp_skb = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_skb, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* empty = false; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + + mptcp_rbs_ebpf_block_free(cont_eblock); + + dereserve(ectx, temp_skb); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .POP() sockbuffer value + */ + +static bool noinline mptcp_rbs_value_skb_list_pop_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_pop *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + int temp_underlying_queue_kind; + + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + mptcp_rbs_ebpf_block_free(cont_eblock); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve_all(ectx, reserved_temps_map); + + temp_underlying_queue_kind = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_underlying_queue_kind, + value->list->underlying_queue_kind)); + add_instr_ectx(ectx, + EBPF_CALL(ebpf_skb_list_pop, CTX_TMP, temp, + temp_underlying_queue_kind, -1, -1, temp)); + dereserve(ectx, temp_underlying_queue_kind); + return true; +} + +/* + * .FILTER() sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_skb_list_filter_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, temp, + null_eblock) || + null_eblock_used; + + return null_eblock_used; +} + +static bool noinline mptcp_rbs_value_skb_list_filter_gen2( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_filter *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, u64 *reserved_temps_map) +{ + bool null_eblock_used; + int temp_t; + struct filter_var var; + struct mptcp_rbs_ebpf_block *cont_eblock2; + int capacity; + + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, break_eblock, cont_eblock, reserved_temps_map); + + /* if (cond) */ + var.progress = &value->progress; + var.temp = temp; + PUSH_FILTER_VAR(&ectx->filter_var_list, &var); + temp_t = reserve(ectx); + null_eblock_used = + gen_value(ectx, (const struct mptcp_rbs_value *) value->cond, + temp_t, null_eblock) || + null_eblock_used; + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp_t, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_t); + POP_FILTER_VAR(&ectx->filter_var_list); + ectx->eblock->next_else = *cont_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + /* Create extra continue block because they value above might free the + * continue block with the assumption that it is not used. But actually + * it is used + */ + cont_eblock2 = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(cont_eblock2, &capacity, EBPF_JMP_OFF()); + cont_eblock2->next = *cont_eblock; + *cont_eblock = cont_eblock2; + + return null_eblock_used; +} + +/* + * Special value holding the actual sockbuffer for FILTER sockbuffer list value + */ + +static bool noinline mptcp_rbs_value_skb_list_filter_skb_gen( + struct ebpf_ctx *ectx, + const struct mptcp_rbs_value_skb_list_filter_skb *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct filter_var *var; + + FOREACH_FILTER_VAR(&ectx->filter_var_list, var, { + if (var->progress == value->progress) { + add_instr_ectx(ectx, EBPF_MOV_REG(temp, var->temp)); + return false; + } + }); + + /* Not found */ + BUG_ON(true); + return false; +} + +/* + * .COUNT integer value + */ + +static bool noinline mptcp_rbs_value_skb_list_count_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_count *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + bool null_eblock_used; + int temp_skb; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + /* i = 0; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp, 0)); + + temp_skb = reserve(ectx); + null_eblock_used = gen_list_value( + ectx, (const struct mptcp_rbs_value *) value->list, temp_skb, + null_eblock, &break_eblock, &cont_eblock, &reserved_temps_map); + + /* ++i; */ + add_instr_ectx(ectx, EBPF_ALU32_IMM(BPF_ADD, temp, 1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + dereserve(ectx, temp_skb); + dereserve_all(ectx, reserved_temps_map); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + return null_eblock_used; +} + +/* + * .TOP sockbuffer value + */ + +static bool noinline mptcp_rbs_value_skb_list_top_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_top *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + + gen_list_value(ectx, (const struct mptcp_rbs_value *) value->list, temp, + null_eblock, &break_eblock, &cont_eblock, + &reserved_temps_map); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = break_eblock; + mptcp_rbs_ebpf_block_free(cont_eblock); + + ectx->eblock = break_eblock; + ectx->capacity = 0; + + /* Check if list was empty */ + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next_else = null_eblock; + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + + dereserve_all(ectx, reserved_temps_map); + + return true; +} + +/* + * .GET sockbuffer value + */ + +static bool noinline mptcp_rbs_value_skb_list_get_gen( + struct ebpf_ctx *ectx, const struct mptcp_rbs_value_skb_list_get *value, + int temp, struct mptcp_rbs_ebpf_block *null_eblock) +{ + printk("%s is not implemented yet for eBPF.", __func__); + BUG_ON(true); + return true; +} + +/** + * Generates the eBPF instructions for a value in the current block. For values + * returning lists this function can only check for NULL + * @ectx: The generation context + * @value: The CFG value + * @temp: Temporary where the value should be stored + * @null_eblock: The eBPF block the code should jump to if this value is NULL + * @return: true if the null_eblock was referenced + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +static bool gen_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock) +{ +#define APPLY_GEN_VALUE(ENUM, STRUCT) \ + case ENUM: \ + return STRUCT##_gen(ectx, (const struct STRUCT *) value, temp, \ + null_eblock); + + switch (value->kind) { + case VALUE_KIND_CONSTINT: + return mptcp_rbs_value_constint_gen( + ectx, (const struct mptcp_rbs_value_constint *) value, temp, + null_eblock); + case VALUE_KIND_CONSTSTRING: + return mptcp_rbs_value_conststring_gen( + ectx, (const struct mptcp_rbs_value_conststring *) value, + temp, null_eblock); + case VALUE_KIND_NULL: + return mptcp_rbs_value_null_gen( + ectx, (const struct mptcp_rbs_value_null *) value, temp, + null_eblock); + case VALUE_KIND_BOOL_VAR: + return mptcp_rbs_value_bool_var_gen( + ectx, (const struct mptcp_rbs_value_bool_var *) value, temp, + null_eblock); + case VALUE_KIND_INT_VAR: + return mptcp_rbs_value_int_var_gen( + ectx, (const struct mptcp_rbs_value_int_var *) value, temp, + null_eblock); + case VALUE_KIND_STRING_VAR: + return mptcp_rbs_value_string_var_gen( + ectx, (const struct mptcp_rbs_value_string_var *) value, + temp, null_eblock); + case VALUE_KIND_SBF_VAR: + return mptcp_rbs_value_sbf_var_gen( + ectx, (const struct mptcp_rbs_value_sbf_var *) value, temp, + null_eblock); + case VALUE_KIND_SBFLIST_VAR: + return mptcp_rbs_value_sbf_list_var_gen( + ectx, (const struct mptcp_rbs_value_sbf_list_var *) value, + temp, null_eblock); + case VALUE_KIND_SKB_VAR: + return mptcp_rbs_value_skb_var_gen( + ectx, (const struct mptcp_rbs_value_skb_var *) value, temp, + null_eblock); + case VALUE_KIND_SKBLIST_VAR: + return mptcp_rbs_value_skb_list_var_gen( + ectx, (const struct mptcp_rbs_value_skb_list_var *) value, + temp, null_eblock); + case VALUE_KIND_NOT: + return mptcp_rbs_value_not_gen( + ectx, (const struct mptcp_rbs_value_not *) value, temp, + null_eblock); + case VALUE_KIND_EQUAL: + return mptcp_rbs_value_equal_gen( + ectx, (const struct mptcp_rbs_value_equal *) value, temp, + null_eblock); + case VALUE_KIND_UNEQUAL: + return mptcp_rbs_value_unequal_gen( + ectx, (const struct mptcp_rbs_value_unequal *) value, temp, + null_eblock); + case VALUE_KIND_LESS: + return mptcp_rbs_value_less_gen( + ectx, (const struct mptcp_rbs_value_less *) value, temp, + null_eblock); + case VALUE_KIND_LESS_EQUAL: + return mptcp_rbs_value_less_equal_gen( + ectx, (const struct mptcp_rbs_value_less_equal *) value, + temp, null_eblock); + case VALUE_KIND_GREATER: + return mptcp_rbs_value_greater_gen( + ectx, (const struct mptcp_rbs_value_greater *) value, temp, + null_eblock); + case VALUE_KIND_GREATER_EQUAL: + return mptcp_rbs_value_greater_equal_gen( + ectx, (const struct mptcp_rbs_value_greater_equal *) value, + temp, null_eblock); + case VALUE_KIND_AND: + return mptcp_rbs_value_and_gen( + ectx, (const struct mptcp_rbs_value_and *) value, temp, + null_eblock); + case VALUE_KIND_OR: + return mptcp_rbs_value_or_gen( + ectx, (const struct mptcp_rbs_value_or *) value, temp, + null_eblock); + case VALUE_KIND_ADD: + return mptcp_rbs_value_add_gen( + ectx, (const struct mptcp_rbs_value_add *) value, temp, + null_eblock); + case VALUE_KIND_SUBTRACT: + return mptcp_rbs_value_subtract_gen( + ectx, (const struct mptcp_rbs_value_subtract *) value, temp, + null_eblock); + case VALUE_KIND_MULTIPLY: + return mptcp_rbs_value_multiply_gen( + ectx, (const struct mptcp_rbs_value_multiply *) value, temp, + null_eblock); + case VALUE_KIND_DIVIDE: + return mptcp_rbs_value_divide_gen( + ectx, (const struct mptcp_rbs_value_divide *) value, temp, + null_eblock); + case VALUE_KIND_REMAINDER: + return mptcp_rbs_value_remainder_gen( + ectx, (const struct mptcp_rbs_value_remainder *) value, + temp, null_eblock); + case VALUE_KIND_IS_NULL: + return mptcp_rbs_value_is_null_gen( + ectx, (const struct mptcp_rbs_value_is_null *) value, temp, + null_eblock); + case VALUE_KIND_IS_NOT_NULL: + return mptcp_rbs_value_is_not_null_gen( + ectx, (const struct mptcp_rbs_value_is_not_null *) value, + temp, null_eblock); + case VALUE_KIND_REG: + return mptcp_rbs_value_reg_gen( + ectx, (const struct mptcp_rbs_value_reg *) value, temp, + null_eblock); + case VALUE_KIND_SBFLIST_NEXT: + return mptcp_rbs_value_sbf_list_next_gen( + ectx, (const struct mptcp_rbs_value_sbf_list_next *) value, + temp, null_eblock); + case VALUE_KIND_SKBLIST_NEXT: + return mptcp_rbs_value_skb_list_next_gen( + ectx, (const struct mptcp_rbs_value_skb_list_next *) value, + temp, null_eblock); + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN_VALUE(ENUM, STRUCT) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } + + return false; +} +#pragma GCC diagnostic pop + +/** + * Generates the eBPF instructions for a list value in the current block + * @ectx: The generation context + * @value: The CFG value + * @temp: Temporary where the item should be stored + * @null_eblock: The eBPF block the code should jump to if this value is NULL + * @break_eblock: Here will be stored the eBPF block that can be jumped to to + * break from the loop + * @cont_eblock: The eBPF block the code can use to continue to the next loop + * iteration. Note that this block should directly jump to the start block of + * the loop without any blocks inbetween + * @reserved_temps: Number of temporaries that were reserved for the loop + * @return: true if the null_eblock was referenced + */ +static bool gen_list_value(struct ebpf_ctx *ectx, + const struct mptcp_rbs_value *value, int temp, + struct mptcp_rbs_ebpf_block *null_eblock, + struct mptcp_rbs_ebpf_block **break_eblock, + struct mptcp_rbs_ebpf_block **cont_eblock, + u64 *reserved_temps_map) +{ +#define APPLY_GEN2_VALUE_TYPE_KIND_BOOL(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_INT(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_STRING(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_SBF(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_SBFLIST(ENUM, STRUCT) \ + case ENUM: \ + return STRUCT##_gen2(ectx, (const struct STRUCT *) value, \ + temp, null_eblock, break_eblock, \ + cont_eblock, reserved_temps_map); +#define APPLY_GEN2_VALUE_TYPE_KIND_SKB(ENUM, STRUCT) +#define APPLY_GEN2_VALUE_TYPE_KIND_SKBLIST(ENUM, STRUCT) \ + APPLY_GEN2_VALUE_TYPE_KIND_SBFLIST(ENUM, STRUCT) +#define APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE_##RETURNTYPE(ENUM, STRUCT) + + switch (value->kind) { + case VALUE_KIND_SBFLIST_VAR: + return mptcp_rbs_value_sbf_list_var_gen2( + ectx, (const struct mptcp_rbs_value_sbf_list_var *) value, + temp, null_eblock, break_eblock, cont_eblock, + reserved_temps_map); + case VALUE_KIND_SKBLIST_VAR: + return mptcp_rbs_value_skb_list_var_gen2( + ectx, (const struct mptcp_rbs_value_skb_list_var *) value, + temp, null_eblock, break_eblock, cont_eblock, + reserved_temps_map); +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GEN2_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + default: { + BUG_ON(true); + return false; + } + } +} + +static void gen_smt_drop(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_drop *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp_skb; + int temp_reinject; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + temp_skb = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->skb, temp_skb, eblock); + + temp_reinject = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_reinject, smt->skb->reinject)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_add_drop, CTX_TMP, temp_skb, + temp_reinject, -1, -1, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_reinject); + dereserve(ectx, temp_skb); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_print(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_print *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp_str; + int temp_arg = -1; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + temp_str = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->msg, temp_str, eblock); + if (smt->arg) { + temp_arg = reserve(ectx); + gen_value(ectx, smt->arg, temp_arg, eblock); + } + add_instr_ectx( + ectx, EBPF_CALL(ebpf_printk, temp_str, temp_arg, -1, -1, -1, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + if (temp_arg != -1) + dereserve(ectx, temp_arg); + dereserve(ectx, temp_str); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_push(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_push *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp_sbf; + int temp_skb; + int temp_reinject; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + temp_sbf = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->sbf, temp_sbf, eblock); + temp_skb = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->skb, temp_skb, eblock); + + temp_reinject = reserve(ectx); + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_reinject, smt->skb->reinject)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_add_push, CTX_TMP, temp_sbf, + temp_skb, temp_reinject, -1, -1)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp_reinject); + dereserve(ectx, temp_skb); + dereserve(ectx, temp_sbf); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_set(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_set *smt) +{ + struct mptcp_rbs_ebpf_block *eblock; + int temp; + + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + add_instr_ectx( + ectx, + EBPF_ST_MEM(bytes_to_bpf_size(sizeof(bool)), CTX_TMP, + offsetof(struct mptcp_rbs_eval_ctx, side_effects), 1)); + temp = reserve(ectx); + gen_value(ectx, (struct mptcp_rbs_value *) smt->value, temp, eblock); + + add_instr_ectx(ectx, + EBPF_STX_MEM(bytes_to_bpf_size(sizeof(unsigned int)), + REGS_TMP, temp, + smt->reg_number * sizeof(unsigned int))); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp); + + ectx->eblock->next = eblock; + ectx->eblock = eblock; + ectx->capacity = 0; +} + +static void gen_smt_list_var(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_var *smt) +{ + struct mptcp_rbs_ebpf_block *null_eblock; + int temp_var; + int temp_item; + int temp_cur; + int temp; + struct mptcp_rbs_ebpf_block *break_eblock; + struct mptcp_rbs_ebpf_block *cont_eblock; + u64 reserved_temps_map; + struct mptcp_rbs_ebpf_block *call_eblock; + int capacity; + + temp_var = reserve(ectx); + temp_item = reserve(ectx); + temp_cur = reserve(ectx); + + /* Prepare null block */ + null_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr( + null_eblock, &capacity, + EBPF_ST_MEM(bytes_to_bpf_size(sizeof(void *)), VARS_TMP, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, sbf_list_value), + 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + /* Get var pointer */ + add_instr_ectx(ectx, EBPF_MOV_REG(temp_var, VARS_TMP)); + add_instr_ectx( + ectx, EBPF_ALU_IMM(BPF_ADD, temp_var, + sizeof(struct mptcp_rbs_var) * smt->var_number)); + + /* cur = NULL; */ + add_instr_ectx(ectx, EBPF_MOV_IMM(temp_cur, 0)); + add_instr_ectx(ectx, EBPF_CALL(ebpf_varlist_expand, temp_var, temp_cur, + -1, -1, -1, temp_cur)); + + if (gen_list_value(ectx, smt->value, temp_item, null_eblock, + &break_eblock, &cont_eblock, &reserved_temps_map)) + null_eblock->next = break_eblock; + else + mptcp_rbs_ebpf_block_free(null_eblock); + + /* Check if we have to allocate more space */ + temp = reserve(ectx); + add_instr_ectx(ectx, EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), + temp, temp_cur, 0)); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JNE, temp, 0)); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + dereserve(ectx, temp); + ectx->eblock->next = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + + /* Fill the call block */ + call_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(call_eblock, &capacity, + EBPF_CALL(ebpf_varlist_expand, temp_var, temp_cur, -1, -1, -1, + temp_cur)); + add_instr(call_eblock, &capacity, EBPF_JMP_OFF()); + call_eblock->next = ectx->eblock->next; + ectx->eblock->next_else = call_eblock; + + /* *cur = item; ++cur; */ + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_STX_MEM(bytes_to_bpf_size(sizeof(void *)), + temp_cur, temp_item, 0)); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_ADD, temp_cur, sizeof(void *))); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = cont_eblock; + + ectx->eblock = break_eblock; + ectx->capacity = 0; + add_instr_ectx(ectx, EBPF_ST_MEM(bytes_to_bpf_size(sizeof(void *)), + temp_cur, 0, 0)); + + dereserve(ectx, temp_cur); + dereserve(ectx, temp_item); + dereserve(ectx, temp_var); + dereserve_all(ectx, reserved_temps_map); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +static void gen_smt_var(struct ebpf_ctx *ectx, + const struct mptcp_rbs_smt_var *smt) +{ + enum mptcp_rbs_type_kind type; + struct mptcp_rbs_ebpf_block *null_eblock; + int capacity; + int temp; + + /* var->type = type; */ + type = mptcp_rbs_value_get_type(smt->value->kind); + add_instr_ectx( + ectx, EBPF_ST_MEM( + bytes_to_bpf_size(sizeof(enum mptcp_rbs_type_kind)), + VARS_TMP, sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, type), + type)); + + /* We do not support lazy evaluation + * var->is_lazy = false; + */ + add_instr_ectx( + ectx, EBPF_ST_MEM(bytes_to_bpf_size(sizeof(bool)), VARS_TMP, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, is_lazy), + false)); + + switch (type) { + case TYPE_KIND_NULL: + break; + case TYPE_KIND_BOOL: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, -1)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(s32)), VARS_TMP, temp, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, bool_value))); + + dereserve(ectx, temp); + break; + } + case TYPE_KIND_INT: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, -1)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(s64)), VARS_TMP, temp, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, int_value))); + + dereserve(ectx, temp); + break; + } + case TYPE_KIND_STRING: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, + EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(char *)), VARS_TMP, temp, + sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, string_value))); + + dereserve(ectx, temp); + break; + } + case TYPE_KIND_SBF: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (smt->value->kind == VALUE_KIND_SBFLIST_NEXT) + ectx->next_var_null_eblock = null_eblock; + else + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, + EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(struct tcp_sock *)), VARS_TMP, + temp, sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, sbf_value))); + + dereserve(ectx, temp); + + /* Set next_var if the value was a *_NEXT call */ + if (smt->value->kind == VALUE_KIND_SBFLIST_NEXT) + ectx->next_var = smt->var_number; + break; + } + case TYPE_KIND_SBFLIST: + case TYPE_KIND_SKBLIST: { + gen_smt_list_var(ectx, smt); + break; + } + case TYPE_KIND_SKB: { + temp = reserve(ectx); + + /* Prepare null block */ + null_eblock = + kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(null_eblock, &capacity, EBPF_MOV_IMM(temp, 0)); + add_instr(null_eblock, &capacity, EBPF_JMP_OFF()); + + if (!gen_value(ectx, smt->value, temp, null_eblock)) + mptcp_rbs_ebpf_block_free(null_eblock); + else { + add_instr_ectx(ectx, EBPF_JMP_OFF()); + ectx->eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + if (smt->value->kind == VALUE_KIND_SKBLIST_NEXT) + ectx->next_var_null_eblock = null_eblock; + else + null_eblock->next = ectx->eblock->next; + ectx->eblock = ectx->eblock->next; + ectx->capacity = 0; + } + + add_instr_ectx( + ectx, + EBPF_STX_MEM( + bytes_to_bpf_size(sizeof(struct sk_buff *)), VARS_TMP, + temp, sizeof(struct mptcp_rbs_var) * smt->var_number + + offsetof(struct mptcp_rbs_var, skb_value))); + dereserve(ectx, temp); + + /* Set next_var if the value was a *_NEXT call */ + if (smt->value->kind == VALUE_KIND_SKBLIST_NEXT) + ectx->next_var = smt->var_number; + break; + } + } +} +#pragma GCC diagnostic pop + +/** + * Generates the eBPF instructions for a statement in the current block + * @ectx: The generation context + * @smt: The CFG statement + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +static void gen_smt(struct ebpf_ctx *ectx, const struct mptcp_rbs_smt *smt) +{ + switch (smt->kind) { + case SMT_KIND_DROP: { + gen_smt_drop(ectx, (const struct mptcp_rbs_smt_drop *) smt); + break; + } + case SMT_KIND_PRINT: { + gen_smt_print(ectx, (const struct mptcp_rbs_smt_print *) smt); + break; + } + case SMT_KIND_PUSH: { + gen_smt_push(ectx, (const struct mptcp_rbs_smt_push *) smt); + break; + } + case SMT_KIND_SET: { + gen_smt_set(ectx, (const struct mptcp_rbs_smt_set *) smt); + break; + } + case SMT_KIND_VAR: { + gen_smt_var(ectx, (const struct mptcp_rbs_smt_var *) smt); + break; + } + case SMT_KIND_VOID: { + /* We do not generate eBPF code for VOID */ + break; + } + case SMT_KIND_EBPF: { + /* Cannot generate eBPF code from eBPF */ + BUG_ON(true); + break; + } + case SMT_KIND_SET_USER: { + printk("eBPF for set user is not implemented yet\n"); + BUG_ON(true); + break; + } + } +} +#pragma GCC diagnostic pop + +static bool path_exists_helper(const struct mptcp_rbs_cfg_block *a, + const struct mptcp_rbs_cfg_block *b, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block; + FOREACH_BLOCK(list, block, { + if (a == block) + return false; + }); + ADD_BLOCK(list, (struct mptcp_rbs_cfg_block *) a); + + if (a == b) + return true; + if (a->next && path_exists_helper(a->next, b, list)) + return true; + if (a->next_else && path_exists_helper(a->next_else, b, list)) + return true; + + return false; +} + +static bool path_exists(const struct mptcp_rbs_cfg_block *a, + const struct mptcp_rbs_cfg_block *b) +{ + struct mptcp_rbs_cfg_block_list list; + bool found; + + INIT_BLOCK_LIST(&list); + found = path_exists_helper(a, b, &list); + FREE_BLOCK_LIST(&list); + + return found; +} + +/** + * Generates the eBPF instructions for a CFG block and its successors + * @ectx: The generation context + * @block: The CFG block + * @list: List with CFG blocks that were already processed + * @return: The generated eBPF block + */ +static struct mptcp_rbs_ebpf_block *gen_block( + struct ebpf_ctx *ectx, struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_cfg_block *old_block; + struct mptcp_rbs_ebpf_block *old_eblock; + int old_capacity; + int old_next_var; + struct mptcp_rbs_ebpf_block *eblock; + struct mptcp_rbs_ebpf_block *else_eblock; + struct mptcp_rbs_smt *smt; + int temp; + int capacity; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(list, block2, { + if (block == block2) + return BLOCK_INFO(block)->eblock; + }); + ADD_BLOCK(list, block); + + /* Remember current block and its capacity */ + old_block = ectx->block; + old_eblock = ectx->eblock; + old_capacity = ectx->capacity; + old_next_var = ectx->next_var; + + /* Create eBPF block */ + eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + block->tag = kzalloc(sizeof(struct block_info), GFP_KERNEL); + BLOCK_INFO(block)->eblock = eblock; + ectx->block = block; + ectx->eblock = eblock; + ectx->capacity = 0; + + /* Generate code of the statements */ + ectx->next_var = -1; + smt = block->first_smt; + while (smt) { + /* Make sure that no statement follows after a var statement + * with *_NEXT value + */ + BUG_ON(ectx->next_var != -1); + + gen_smt(ectx, smt); + smt = smt->next; + } + + /* Evaluate condition if there is one */ + if (block->condition) { + if (ectx->next_var != -1) { + /* Must be part of a foreach loop */ + BUG_ON(block->condition->kind != + VALUE_KIND_IS_NOT_NULL); + BUG_ON(((const struct mptcp_rbs_value_is_not_null *) + block->condition) + ->operand->kind != VALUE_KIND_SBF_VAR && + ((const struct mptcp_rbs_value_is_not_null *) + block->condition) + ->operand->kind != VALUE_KIND_SKB_VAR); + + /* Release temporaries of foreach loop because the else + * branch is outside of it + */ + dereserve_all(ectx, + BLOCK_INFO(block)->reserved_temps_map); + + else_eblock = BLOCK_INFO(block)->break_eblock; + capacity = 0; + add_instr(else_eblock, &capacity, EBPF_JMP_OFF()); + + /* Set the jump target of the null block after the whole + * loop + */ + if (ectx->next_var_null_eblock) + ectx->next_var_null_eblock->next = else_eblock; + } else { + else_eblock = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(else_eblock, &capacity, EBPF_JMP_OFF()); + + temp = reserve(ectx); + gen_value(ectx, + (struct mptcp_rbs_value *) block->condition, + temp, else_eblock); + add_instr_ectx(ectx, EBPF_JMP_IMM(BPF_JEQ, temp, 0)); + dereserve(ectx, temp); + ectx->eblock->next_else = else_eblock; + } + + if (block->next_else) { + else_eblock->next = + gen_block(ectx, block->next_else, list); + } else { + else_eblock->next = kzalloc( + sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + capacity = 0; + add_instr(else_eblock->next, &capacity, EBPF_EXIT()); + } + + if (ectx->next_var != -1) { + /* The next branch is in the foreach loop -> reserve the + * temporaries of the foreach loop again + */ + reserve_all(ectx, + BLOCK_INFO(block)->reserved_temps_map); + } + } else + BUG_ON(ectx->next_var != -1); + + /* Add jump to next block */ + if (block->next) { + /* Check if the jump target is a loop start */ + bool is_loop_start = false; + FOREACH_BLOCK(list, block2, { + if (block->next == block2) { + is_loop_start = + BLOCK_INFO(block2)->break_eblock != NULL; + break; + } + }); + + if (is_loop_start) { + /* If there exists a path from block->next to block + * without using the break block we have to use the + * continue block + */ + if (path_exists(block->next->next, block)) + ectx->eblock->next = + BLOCK_INFO(block->next)->cont_eblock; + else + ectx->eblock->next = + BLOCK_INFO(block->next)->eblock; + } else + ectx->eblock->next = gen_block(ectx, block->next, list); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + } else + add_instr_ectx(ectx, EBPF_EXIT()); + + /* Reset current block and its capacity */ + if (ectx->next_var != -1) { + /* Release temporaries of foreach loop because we are now + * outside of it + */ + dereserve_all(ectx, BLOCK_INFO(block)->reserved_temps_map); + } + + ectx->block = old_block; + ectx->eblock = old_eblock; + ectx->capacity = old_capacity; + ectx->next_var = old_next_var; + + return eblock; +} + +/** + * Generates the eBPF instructions for a CFG + * @ectx: The generation context + * @return: The first eBPF block + */ +static struct mptcp_rbs_ebpf_block *gen(struct ebpf_ctx *ectx) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_ebpf_block *first_eblock; + struct mptcp_rbs_cfg_block *block; + + /* Generate start block that puts values in fixed temporaries */ + first_eblock = kzalloc(sizeof(struct mptcp_rbs_ebpf_block), GFP_KERNEL); + ectx->eblock = first_eblock; + ectx->capacity = 0; + ectx->used_temps = FIXED_TMP_COUNT; + ectx->used_temps_map = ((u64) -1ll) >> (64 - FIXED_TMP_COUNT); + add_instr_ectx(ectx, EBPF_MOV_RAW_REG(CTX_TMP, BPF_REG_ARG1)); + add_instr_ectx(ectx, EBPF_MOV_REG(VARS_TMP, CTX_TMP)); + add_instr_ectx(ectx, + EBPF_ALU_IMM(BPF_ADD, VARS_TMP, + offsetof(struct mptcp_rbs_eval_ctx, vars))); + add_instr_ectx( + ectx, + EBPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), REGS_TMP, CTX_TMP, + offsetof(struct mptcp_rbs_eval_ctx, rbs_cb))); + add_instr_ectx(ectx, EBPF_ALU_IMM(BPF_ADD, REGS_TMP, + offsetof(struct mptcp_rbs_cb, regs))); + add_instr_ectx(ectx, EBPF_JMP_OFF()); + + INIT_BLOCK_LIST(&list); + first_eblock->next = + gen_block(ectx, ectx->ctx->variation->first_block, &list); + FOREACH_BLOCK(&list, block, kfree(block->tag)); + FREE_BLOCK_LIST(&list); + + return first_eblock; +} + +void mptcp_rbs_opt_ebpf(struct mptcp_rbs_opt_ctx *ctx) +{ + struct ebpf_ctx ectx; + struct bpf_prog *prog; + struct mptcp_rbs_cfg_block *block; + struct mptcp_rbs_ebpf_block *first_eblock; + + /* Generate code */ + memset(&ectx, 0, sizeof(struct ebpf_ctx)); + ectx.ctx = ctx; + INIT_FILTER_VAR_LIST(&ectx.filter_var_list); + first_eblock = gen(&ectx); + FREE_FILTER_VAR_LIST(&ectx.filter_var_list); + + /* Register functions that can be called from eBPF */ + prog = bpf_prog_alloc(bpf_prog_size(1), 0); + atomic_set(&prog->aux->refcnt, 1); + prog->gpl_compatible = true; + prog->aux->ops = &bpf_ops; + prog->type = BPF_PROG_TYPE_RBS; + + /* Run register allocator */ + prog = mptcp_rbs_ebpf_alloc_regs(first_eblock, ectx.used_temps, prog); + mptcp_rbs_ebpf_blocks_free(first_eblock); + + /* JIT the result */ + bpf_prog_select_runtime(prog); + + /* Create eBPF statement and replace whole CFG with it */ + mptcp_rbs_cfg_blocks_free(ctx->variation->first_block); + + block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + block->first_smt = (struct mptcp_rbs_smt *) mptcp_rbs_smt_ebpf_new( + prog, ectx.strs, ectx.strs_len); + ctx->variation->first_block = block; +} + +static struct bpf_prog_type_list rbs_bpf_tl = { + .ops = &bpf_ops, + .type = BPF_PROG_TYPE_RBS, +}; + +static int __init register_ebpf_prog_type(void) +{ + bpf_register_prog_type(&rbs_bpf_tl); + return 0; +} +module_init(register_ebpf_prog_type); diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf.h b/net/mptcp/mptcp_rbs_optimizer_ebpf.h new file mode 100644 index 0000000000000..988d0bc7ff52b --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_H + +struct mptcp_rbs_opt_ctx; + +/** + * eBPF code generation: + * Generates eBPF code and replaces the existing CFG with it + * @ctx: The optimization context + */ +void mptcp_rbs_opt_ebpf(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c new file mode 100644 index 0000000000000..8ac5cd00b81eb --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.c @@ -0,0 +1,309 @@ +#include "mptcp_rbs_optimizer_ebpf_disasm.h" +#include "mptcp_rbs_parser.h" +#include +#include + +int mptcp_rbs_ebpf_dump(const struct bpf_prog *prog, char *buffer) +{ + int len = 0; + int i; + +#define PRINT(fmt, ...) len += sprintf_null(&buffer, fmt, ##__VA_ARGS__) +#define PRINT_ALU(op) \ + if (BPF_SRC(instr->code)) \ + PRINT(op "%s r%d, r%d\n", cl == BPF_ALU ? "w" : "", \ + instr->dst_reg, instr->src_reg); \ + else \ + PRINT(op "%s r%d, %d\n", cl == BPF_ALU ? "w" : "", \ + instr->dst_reg, instr->imm); +#define PRINT_JMP(op) \ + if (BPF_SRC(instr->code)) \ + PRINT(op " r%d, r%d, 0x%x\n", instr->dst_reg, instr->src_reg, \ + (i + instr->off + 1) * sizeof(struct bpf_insn)); \ + else \ + PRINT(op " r%d, %d, 0x%x\n", instr->dst_reg, instr->imm, \ + (i + instr->off + 1) * sizeof(struct bpf_insn)); +#define PRINT_LDX(op) \ + if (instr->off > 0) \ + PRINT(op " r%d, [r%d + %d]\n", instr->dst_reg, instr->src_reg, \ + instr->off); \ + else if (instr->off < 0) \ + PRINT(op " r%d, [r%d - %d]\n", instr->dst_reg, instr->src_reg, \ + instr->off * -1); \ + else \ + PRINT(op " r%d, [r%d]\n", instr->dst_reg, instr->src_reg); +#define PRINT_ST(op) \ + if (instr->off > 0) \ + PRINT(op " [r%d + %d], %d\n", instr->dst_reg, instr->off, \ + instr->imm); \ + else if (instr->off < 0) \ + PRINT(op " [r%d - %d], %d\n", instr->dst_reg, instr->off * -1, \ + instr->imm); \ + else \ + PRINT(op " [r%d], %d\n", instr->dst_reg, instr->imm); +#define PRINT_STX(op) \ + if (instr->off > 0) \ + PRINT(op " [r%d + %d], r%d\n", instr->dst_reg, instr->off, \ + instr->src_reg); \ + else if (instr->off < 0) \ + PRINT(op " [r%d - %d], r%d\n", instr->dst_reg, \ + instr->off * -1, instr->src_reg); \ + else \ + PRINT(op " [r%d], r%d\n", instr->dst_reg, instr->src_reg); + + PRINT("eBPF\n\n"); + for (i = 0; i < prog->len; ++i) { + const struct bpf_insn *instr = &prog->insnsi[i]; + int cl = BPF_CLASS(instr->code); + + PRINT("0x%06x ", i * sizeof(struct bpf_insn)); + + switch (cl) { + case BPF_ALU: + case BPF_ALU64: { + switch (BPF_OP(instr->code)) { + case BPF_ADD: { + PRINT_ALU("add"); + break; + } + case BPF_SUB: { + PRINT_ALU("sub"); + break; + } + case BPF_MUL: { + PRINT_ALU("mul"); + break; + } + case BPF_DIV: { + PRINT_ALU("div"); + break; + } + case BPF_OR: { + PRINT_ALU("or"); + break; + } + case BPF_AND: { + PRINT_ALU("and"); + break; + } + case BPF_LSH: { + PRINT_ALU("lsh"); + break; + } + case BPF_RSH: { + PRINT_ALU("rsh"); + break; + } + case BPF_NEG: { + PRINT("neg%s r%d\n", cl == BPF_ALU ? "w" : "", + instr->dst_reg); + break; + } + case BPF_MOD: { + PRINT_ALU("mod"); + break; + } + case BPF_XOR: { + PRINT_ALU("xor"); + break; + } + case BPF_MOV: { + PRINT_ALU("mov"); + break; + } + case BPF_ARSH: { + PRINT_ALU("arsh"); + break; + } + case BPF_END: { + PRINT("%s%d r%d\n", + BPF_SRC(instr->code) ? "be" : "le", + instr->imm, instr->dst_reg); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + case BPF_JMP: { + switch (BPF_OP(instr->code)) { + case BPF_JA: { + PRINT("ja 0x%x\n", (i + instr->off + 1) * + sizeof(struct bpf_insn)); + break; + } + case BPF_JEQ: { + PRINT_JMP("jeq"); + break; + } + case BPF_JGT: { + PRINT_JMP("jgt"); + break; + } + case BPF_JGE: { + PRINT_JMP("jge"); + break; + } + case BPF_JSET: { + PRINT_JMP("jset"); + break; + } + case BPF_JNE: { + PRINT_JMP("jne"); + break; + } + case BPF_JSGT: { + PRINT_JMP("jsgt"); + break; + } + case BPF_JSGE: { + PRINT_JMP("jsge"); + break; + } + case BPF_CALL: { + PRINT("call %d\n", instr->imm); + break; + } + case BPF_EXIT: { + PRINT("exit\n"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + case BPF_LD: { + if (BPF_MODE(instr->code) == BPF_IMM && + BPF_SIZE(instr->code) == BPF_DW && + i + 1 < prog->len) { + ++i; + PRINT("ld r%d, %lld\n", instr->dst_reg, + instr->imm | + (((s64) prog->insnsi[i].imm) << 32)); + } else + PRINT("???\n"); + break; + } + case BPF_LDX: { + if (BPF_MODE(instr->code) == BPF_MEM) { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_LDX("ldxw"); + break; + } + case BPF_H: { + PRINT_LDX("ldxh"); + break; + } + case BPF_B: { + PRINT_LDX("ldxb"); + break; + } + case BPF_DW: { + PRINT_LDX("ldx"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + } else + PRINT("???\n"); + break; + } + case BPF_ST: { + if (BPF_MODE(instr->code) == BPF_MEM) { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_ST("stw"); + break; + } + case BPF_H: { + PRINT_ST("sth"); + break; + } + case BPF_B: { + PRINT_ST("stb"); + break; + } + case BPF_DW: { + PRINT_ST("st"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + } else + PRINT("???\n"); + break; + } + case BPF_STX: { + switch (BPF_MODE(instr->code)) { + case BPF_XADD: { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_STX("xaddw"); + break; + } + case BPF_DW: { + PRINT_STX("xadd"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + case BPF_MEM: { + switch (BPF_SIZE(instr->code)) { + case BPF_W: { + PRINT_STX("stxw"); + break; + } + case BPF_H: { + PRINT_STX("stxh"); + break; + } + case BPF_B: { + PRINT_STX("stxb"); + break; + } + case BPF_DW: { + PRINT_STX("stx"); + break; + } + default: { + PRINT("???\n"); + break; + } + } + break; + } + default: { + PRINT("???\n"); + break; + } + } + + break; + } + default: { + PRINT("???\n"); + break; + } + } + } + + return len; +} diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h new file mode 100644 index 0000000000000..057a1395a4cd4 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_disasm.h @@ -0,0 +1,14 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_DISASM_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_DISASM_H + +struct bpf_prog; + +/** + * Writes a string representation of an eBPF program to the given buffer + * @prog: The eBPF program + * @buffer: Pointer to the buffer where the string should be stored or NULL + * Return: Number of written characters + */ +int mptcp_rbs_ebpf_dump(const struct bpf_prog *prog, char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c new file mode 100644 index 0000000000000..02245f450e907 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.c @@ -0,0 +1,413 @@ +#include "mptcp_rbs_optimizer_ebpf_lse.h" +#include +#include +#include + +struct instr_info { + u8 visited : 1, is_jump_target : 1, deleted : 1; + /* 0 = unused, 255 = multiple values */ + u8 reg_states[__MAX_BPF_REG]; +}; + +#define VAR_STATE 255 + +static bool includes(const u8 *reg_states1, const u8 *reg_states2) +{ + int i; + + for (i = 0; i < __MAX_BPF_REG; ++i) { + int reg_state1 = reg_states1[i]; + int reg_state2 = reg_states2[i]; + + if (reg_state1 != reg_state2 && reg_state1 != VAR_STATE && + reg_state2) + return false; + } + + return true; +} + +static void unite(u8 *reg_states_dst, const u8 *reg_states_src) +{ + int i; + + for (i = 0; i < __MAX_BPF_REG; ++i) { + int reg_state_dst = reg_states_dst[i]; + int reg_state_src = reg_states_src[i]; + + if (reg_state_src && reg_state_dst != reg_state_src) { + if (!reg_state_dst) + reg_states_dst[i] = reg_state_src; + else + reg_states_dst[i] = VAR_STATE; + } + } +} + +static void fill_infos(struct bpf_prog *prog, int pos, struct instr_info *infos, + const u8 *start_reg_states) +{ + u8 reg_states[__MAX_BPF_REG]; + if (start_reg_states) + memcpy(reg_states, start_reg_states, sizeof(reg_states)); + else + memset(reg_states, 0, sizeof(reg_states)); + + for (; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + info->visited = true; + unite(info->reg_states, reg_states); + + switch (BPF_CLASS(insn->code)) { + case BPF_LD: { + reg_states[insn->dst_reg] = VAR_STATE; + break; + } + case BPF_LDX: { + if (insn->src_reg == BPF_REG_10) + reg_states[insn->dst_reg] = + insn->off / -((int) sizeof(u64)) + 1; + else + reg_states[insn->dst_reg] = VAR_STATE; + break; + } + case BPF_ST: + case BPF_STX: { + if (insn->dst_reg == BPF_REG_10) { + int i; + int val = insn->off / -((int) sizeof(u64)) + 1; + + for (i = 0; i < __MAX_BPF_REG; ++i) { + if (i != insn->src_reg && + reg_states[i] == val) + reg_states[i] = VAR_STATE; + } + + if (BPF_CLASS(insn->code) == BPF_STX) + reg_states[insn->src_reg] = val; + } + break; + } + case BPF_ALU: + case BPF_ALU64: { + /* If the 2. operand is a constant check for typical + * values that won't change the result + */ + if (!BPF_SRC(insn->code)) { + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_OR: + case BPF_LSH: + case BPF_RSH: + case BPF_XOR: + case BPF_ARSH: { + if (!insn->imm) + continue; + break; + } + case BPF_MUL: + case BPF_DIV: { + if (insn->imm == 1) + continue; + break; + } + case BPF_AND: { + if (insn->imm == -1) + continue; + break; + } + } + } + + if (BPF_SRC(insn->code) && + BPF_OP(insn->code) == BPF_MOV) + reg_states[insn->dst_reg] = + reg_states[insn->src_reg]; + else + reg_states[insn->dst_reg] = VAR_STATE; + break; + } + case BPF_JMP: { + switch (BPF_OP(insn->code)) { + case BPF_JA: { + int target_pos = pos + insn->off + 1; + struct instr_info *target_info = + &infos[target_pos]; + + target_info->is_jump_target = true; + + if (target_info->visited && + includes(target_info->reg_states, + reg_states)) + return; + + pos = target_pos - 1; + break; + } + case BPF_JEQ: + case BPF_JGT: + case BPF_JGE: + case BPF_JSET: + case BPF_JNE: + case BPF_JSGT: + case BPF_JSGE: { + int target_pos = pos + insn->off + 1; + struct instr_info *target_info = + &infos[target_pos]; + + target_info->is_jump_target = true; + + if (!target_info->visited || + !includes(target_info->reg_states, + reg_states)) + fill_infos(prog, target_pos, infos, + reg_states); + + break; + } + case BPF_CALL: { + reg_states[BPF_REG_0] = VAR_STATE; + reg_states[BPF_REG_1] = VAR_STATE; + reg_states[BPF_REG_2] = VAR_STATE; + reg_states[BPF_REG_3] = VAR_STATE; + reg_states[BPF_REG_4] = VAR_STATE; + reg_states[BPF_REG_5] = VAR_STATE; + break; + } + case BPF_EXIT: { + return; + } + } + break; + } + } + } +} + +static void print_infos(const struct bpf_prog *prog, + const struct instr_info *infos) +{ + int i; + int j; + + for (i = 0; i < prog->len; ++i) { + const struct instr_info *info = &infos[i]; + + printk("0x%6x v=%d,jt=%d,d=%d", i * 8, info->visited, + info->is_jump_target, info->deleted); + for (j = 0; j < __MAX_BPF_REG; ++j) { + printk(",r%d=%d", j, info->reg_states[j]); + } + printk("\n"); + } +} + +static void find_deletable(struct bpf_prog *prog, struct instr_info *infos) +{ + int pos; + int count = 0; + + for (pos = 0; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + switch (BPF_CLASS(insn->code)) { + case BPF_LDX: { + if (insn->src_reg == BPF_REG_10 && + info->reg_states[insn->dst_reg] == + insn->off / -((int) sizeof(u64)) + 1) { + info->deleted = true; + ++count; + } + break; + } + case BPF_STX: { + if (insn->dst_reg == BPF_REG_10 && + info->reg_states[insn->src_reg] == + insn->off / -((int) sizeof(u64)) + 1) { + info->deleted = true; + ++count; + } + break; + } + case BPF_ALU: + case BPF_ALU64: { + /* If the 2. operand is a constant check for typical + * values that won't change the result + */ + if (!BPF_SRC(insn->code)) { + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_OR: + case BPF_LSH: + case BPF_RSH: + case BPF_XOR: + case BPF_ARSH: { + if (!insn->imm) { + info->deleted = true; + ++count; + } + break; + } + case BPF_MUL: + case BPF_DIV: { + if (insn->imm == 1) { + info->deleted = true; + ++count; + } + break; + } + case BPF_AND: { + if (insn->imm == -1) { + info->deleted = true; + ++count; + } + break; + } + } + } else if (BPF_OP(insn->code) == BPF_MOV) { + if (info->reg_states[insn->dst_reg] == + info->reg_states[insn->src_reg] && + info->reg_states[insn->dst_reg] != + VAR_STATE && + info->reg_states[insn->src_reg]) { + info->deleted = true; + ++count; + } + } + + break; + } + } + } + + for (pos = 0; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + if (BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) >= BPF_JA && + BPF_OP(insn->code) <= BPF_JSGE) { + /* Find jump target that is not deleted and no + * unconditional jump + */ + int target_pos = pos + insn->off + 1; + int pos2; + bool found; + + while (true) { + struct bpf_insn *insn2 = + &prog->insnsi[target_pos]; + + if (BPF_CLASS(insn2->code) == BPF_JMP && + BPF_OP(insn2->code) == BPF_JA) + target_pos += insn2->off + 1; + else if (infos[target_pos].deleted) + ++target_pos; + else + break; + } + + /* If instruction jumps only to the following -> delete + */ + pos2 = min(pos + 1, target_pos); + found = false; + while (pos2 < max(pos + 1, target_pos)) { + if (!infos[pos2].deleted) { + found = true; + break; + } + + ++pos2; + } + + if (!found) { + /* Jump not necessary */ + info->deleted = true; + ++count; + } + } + } +} + +static void delete_deletable(struct bpf_prog *prog, struct instr_info *infos) +{ + struct bpf_insn *new_insns; + struct bpf_insn *new_insn; + int pos; + int len; + + new_insns = kmalloc(sizeof(struct bpf_insn) * prog->len, GFP_KERNEL); + new_insn = new_insns; + + for (pos = 0; pos < prog->len; ++pos) { + struct bpf_insn *insn = &prog->insnsi[pos]; + struct instr_info *info = &infos[pos]; + + if (info->deleted) + continue; + + *new_insn = *insn; + if (BPF_CLASS(new_insn->code) == BPF_JMP) { + /* Find jump target that is not deleted and no + * unconditional jump + */ + int target_pos = pos + new_insn->off + 1; + int pos2; + + while (true) { + insn = &prog->insnsi[target_pos]; + info = &infos[target_pos]; + + if (BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_JA) + target_pos += insn->off + 1; + else if (info->deleted) + ++target_pos; + else + break; + } + + /* Fix jump offset */ + new_insn->off = target_pos - pos - 1; + + if (new_insn->off >= 0) { + for (pos2 = pos + 1; pos2 < target_pos; + ++pos2) { + if (infos[pos2].deleted) + --new_insn->off; + } + } else { + for (pos2 = pos - 1; pos2 > target_pos; + --pos2) { + if (infos[pos2].deleted) + ++new_insn->off; + } + } + } + + ++new_insn; + } + + len = new_insn - new_insns; + bpf_prog_realloc(prog, bpf_prog_size(len), 0); + memcpy(prog->insnsi, new_insns, len * sizeof(struct bpf_insn)); + prog->len = len; + kfree(new_insns); +} + +void mptcp_rbs_optimize_ebpf_ld_sts(struct bpf_prog *prog) +{ + struct instr_info *infos; + + infos = kzalloc(sizeof(struct instr_info) * prog->len, GFP_KERNEL); + fill_infos(prog, 0, infos, NULL); + find_deletable(prog, infos); + delete_deletable(prog, infos); + + kfree(infos); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h new file mode 100644 index 0000000000000..7d01e23f03f18 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_lse.h @@ -0,0 +1,12 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_LSE_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_LSE_H + +struct bpf_prog; + +/** + * Removes unnecessary loads and stores inside an eBPF program + * @prog: Pointer to the eBPF program + */ +void mptcp_rbs_optimize_ebpf_ld_sts(struct bpf_prog *prog); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c new file mode 100644 index 0000000000000..fa33012759c4e --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.c @@ -0,0 +1,1169 @@ +#include "mptcp_rbs_optimizer_ebpf_regalloc.h" +#include "mptcp_rbs_dynarray.h" +#include "mptcp_rbs_optimizer_ebpf_lse.h" +#include +#include + +#define IS_OFF_JMP(insn) \ + (BPF_CLASS((insn)->code) == BPF_JMP && BPF_OP((insn)->code) <= BPF_JSGE) +#define TEMP_TO_BIT(temp) (1ull << (temp)) +#define BIT_DIFF(a, b) ((a) & ~(b)) + +/* + * eBPF block lists + */ + +DECL_DA(block_list, struct mptcp_rbs_ebpf_block *); + +#define INIT_BLOCK_LIST(list) INIT_DA(list) + +#define FREE_BLOCK_LIST(list) FREE_DA(list) + +#define ADD_BLOCK(list, block) ADD_DA_ITEM(list, block) + +#define GET_BLOCK_LIST_LEN(list) GET_DA_LEN(list) + +#define GET_BLOCK(list, index) GET_DA_ITEM(list, index) + +#define FOREACH_BLOCK(list, var, cmds) FOREACH_DA_ITEM(list, var, cmds) + +#define FOREACH_BLOCK_REV(list, var, cmds) FOREACH_DA_ITEM_REV(list, var, cmds) + +/* + * Critical edge block lists + */ + +DECL_DA(edge_block_list, struct edge_block *); + +#define INIT_EDGE_BLOCK_LIST(list) INIT_DA(list) + +#define FREE_EDGE_BLOCK_LIST(list) FREE_DA(list) + +#define ADD_EDGE_BLOCK(list, block) ADD_DA_ITEM(list, block) + +#define GET_EDGE_BLOCK_LIST_LEN(list) GET_DA_LEN(list) + +#define GET_EDGE_BLOCK(list, index) GET_DA_ITEM(list, index) + +enum { + /** Temporary was not written yet */ + TEMP_STATE_NONE, + /** Temporary is spilled */ + TEMP_STATE_SPILLED, + /** Temporary is in register */ + TEMP_STATE_REG +}; + +/** Macro to determine the offset on the stack where the temporary can be + * spilled + */ +#define TEMP_STACK_OFF(temp) ((temp + 1) * -8) + +/** The state of a temporary */ +struct temp_state { + /** + * State of the temporary (see TEMP_STATE_* and the register number it + * is in + */ + u8 state : 2, reg : 6; +}; + +/** A single live range. Organized in a linked list */ +struct live_range { + struct live_range *next; + /** + * Start position of the range and a bit determining if the temporary is + * written at the position + */ + u32 def : 1, start : 31; + /** End position of the range - 1 */ + u32 end; +}; + +/** Information about a temporary */ +struct temp_info { + /** Live interval of the temporary */ + struct live_range *live_interval; + /** The previous active live range */ + struct live_range *prev_range; +}; + +/** Context for register allocation */ +struct ctx { + /** The eBPF program where the instructions should be stored */ + struct bpf_prog *prog; + /** List with eBPF blocks in depth first order */ + struct block_list blocks; + /** List with critical edge blocks */ + struct edge_block_list edge_blocks; + /** The current block */ + struct mptcp_rbs_ebpf_block *block; + /** Information of the current block */ + // struct block_info *block_info; + /** Current eBPF instruction */ + struct mptcp_rbs_ebpf_instr *instr; + /** The ARE_CONSISTENT set */ + u64 A; + /** Position of current instruction */ + u32 pos; + /** Information about temporaries */ + struct { + /** Information about temporaries */ + struct temp_info *infos[MAX_TEMPS]; + /** State of temporaries */ + struct temp_state states[MAX_TEMPS]; + /** Number of used temporaries */ + u8 count; + } temps; + /** + * Array to map registers to the temporary that is currently placed in + + * 1. 0 means that no temporary is stored. R10 is reserved for the stack + * pointer + */ + u8 regs[MAX_BPF_REG - 1]; +}; + +/** Block that is inserted by the allocator on a critical edge */ +struct edge_block { + /** Absolute position of the first instruction of the block */ + u32 insn_pos; + /** + * Number of instructions in this block. This is important to calculate + * jump offsets + */ + u16 insn_count; + /** Index of the block in the edge block list */ + u16 idx; +}; + +/** Per block information for register allocation */ +struct block_info { + /** List with predecessor blocks */ + struct block_list preds; + /** USE set to calculate lifetimes */ + u64 use; + /** DEF set to calculate lifetimes */ + u64 def; + /** IN set for the data flow analysis */ + u64 in; + /** OUT set for the data flow analysis */ + u64 out; + /** Local copy of the ARE_CONSISTENT set */ + u64 A; + /** The KILL set */ + u64 K; + /** Absolute position of the first instruction of the block */ + u32 insn_pos; + /** + * Number of instructions in this block. This is important to calculate + * jump offsets + */ + u16 insn_count; + /** Index of the block in the list */ + u16 idx; + /** + * Critical edge block of the else edge of this block. If edge_block. + * insn_count == 0 there is no critical edge block + */ + struct edge_block edge_block; + /** Position of the first instruction of the block */ + u16 pos; + /** true if jump is necessary on the end of the block */ + bool needs_jmp; + /** Information about temporaries */ + struct { + /** State of temporaries when the block is entered */ + struct temp_state entry_states[MAX_TEMPS]; + /** State of temporaries when the block is left */ + struct temp_state exit_states[MAX_TEMPS]; + } temps; +}; + +#define BLOCK_INFO(block) ((struct block_info *) (block)->tag) + +/** + * Traverses through eBPF blocks and stores them inside a list in depth first + * order + * @ctx: The allocator context + * @block: The current block + */ +static void traverse(struct ctx *ctx, struct mptcp_rbs_ebpf_block *block) +{ + struct mptcp_rbs_ebpf_block *block2; + struct block_info *info; + u8 code; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(&ctx->blocks, block2, if (block == block2) return ); + + /* Check if every block has a jump instruction at the end */ + code = block->instrs[block->instr_count - 1].insn.code; + BUG_ON(BPF_CLASS(code) != BPF_JMP); + BUG_ON(BPF_OP(code) != BPF_JA && BPF_OP(code) != BPF_EXIT); + + info = kzalloc(sizeof(struct block_info), GFP_KERNEL); + INIT_BLOCK_LIST(&info->preds); + info->idx = GET_BLOCK_LIST_LEN(&ctx->blocks); + info->pos = ctx->pos; + block->tag = info; + ADD_BLOCK(&ctx->blocks, block); + + ctx->pos += block->instr_count; + + if (block->next) + traverse(ctx, block->next); + if (block->next_else) + traverse(ctx, block->next_else); +} + +/** + * Calculates the lifetimes of all used temporaries and stores them as struct + * lifetime_block_info in the blocks' tag fields + * @ctx: The allocator context + */ +static void calc_lifetimes(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block *block; + bool changes; + + /* Calculate USE and DEF sets */ + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + struct mptcp_rbs_ebpf_instr *cur; + struct mptcp_rbs_ebpf_instr *end; + + cur = block->instrs; + end = cur + block->instr_count; + for (; cur != end; ++cur) { + u64 read = 0; + u64 write = 0; + int i; + + for (i = 0; i < MAX_ARGS; ++i) { + if (cur->read[i].used) + read |= TEMP_TO_BIT(cur->read[i].temp); + } + if (cur->write.used) + write = TEMP_TO_BIT(cur->write.temp); + + info->use |= ~info->def & read; + info->def |= ~info->use & write; + } + }); + + /* Iterate until no changes for IN(B) were found */ + do { + changes = false; + + FOREACH_BLOCK_REV(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + u64 old_in = info->in; + + /* OUT(B) = U IN(S) where S elementof succ(B) */ + info->out = + block->next ? BLOCK_INFO(block->next)->in : 0; + if (block->next_else) + info->out |= BLOCK_INFO(block->next_else)->in; + + /* IN(B) = use(B) U (OUT(B) - def(B)) */ + info->in = info->use | BIT_DIFF(info->out, info->def); + + changes = changes || old_in != info->in; + }); + } while (changes); +} + +/** + * Calculates the live ranges of all used temporaries and stores them in the + * context + * @ctx: The allocator context + */ +static void calc_liveranges(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block *block; + struct live_range *last_live_ranges[MAX_TEMPS]; + u32 pos; + + /* Calculate the normal life times first */ + calc_lifetimes(ctx); + + /* Calculate the live ranges */ + memset(last_live_ranges, 0, sizeof(last_live_ranges)); + pos = 0; + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + struct mptcp_rbs_ebpf_instr *cur; + struct mptcp_rbs_ebpf_instr *end; + struct live_range *range; + int i; + + /* Create live ranges without end position for live temporaries + * at block entry + */ + for (i = 0; i < ctx->temps.count; ++i) { + if (info->in & TEMP_TO_BIT(i)) { + range = kzalloc(sizeof(struct live_range), + GFP_KERNEL); + range->start = pos; + if (last_live_ranges[i]) + last_live_ranges[i]->next = range; + else + ctx->temps.infos[i]->live_interval = + range; + last_live_ranges[i] = range; + } + } + + cur = block->instrs; + end = cur + block->instr_count; + for (; cur != end; ++cur, ++pos) { + for (i = 0; i < MAX_ARGS; ++i) { + if (cur->read[i].used) { + BUG_ON(!last_live_ranges[cur->read[i] + .temp]); + + last_live_ranges[cur->read[i].temp] + ->end = pos - 1; + } + } + + if (cur->write.used) { + /* Check if there is already an open live range + */ + i = cur->write.temp; + + range = last_live_ranges[i]; + if (range && !range->end) { + /* Reuse live range */ + } else { + range = + kzalloc(sizeof(struct live_range), + GFP_KERNEL); + if (last_live_ranges[i]) + last_live_ranges[i]->next = + range; + else + ctx->temps.infos[i] + ->live_interval = range; + last_live_ranges[i] = range; + } + + range->def = true; + range->start = pos; + } + } + + /* Set the end positions of ranges for temporaries that are live + * at the end of the block + */ + for (i = 0; i < ctx->temps.count; ++i) { + if (info->out & TEMP_TO_BIT(i)) { + BUG_ON(!last_live_ranges[i]); + + last_live_ranges[i]->end = pos - 1; + } + } + }); +} + +static void insert_insn(struct ctx *ctx, int idx, struct bpf_insn insn) +{ + if (bpf_prog_size(ctx->prog->len) == ctx->prog->pages * PAGE_SIZE) { + ctx->prog = bpf_prog_realloc( + ctx->prog, bpf_prog_size(ctx->prog->len + 10), 0); + } + + if (idx != ctx->prog->len) + memmove(&ctx->prog->insnsi[idx + 1], &ctx->prog->insnsi[idx], + (ctx->prog->len - idx) * sizeof(struct bpf_insn)); + ctx->prog->insnsi[idx] = insn; + ++ctx->prog->len; +} + +static inline void add_insn(struct ctx *ctx, struct bpf_insn insn) +{ + insert_insn(ctx, ctx->prog->len, insn); +} + +static struct live_range *find_next_range(struct ctx *ctx, int temp) +{ + struct live_range *prev_range = ctx->temps.infos[temp]->prev_range; + struct live_range *range; + + if (prev_range) + range = prev_range->next; + else + range = ctx->temps.infos[temp]->live_interval; + + while (range && range->end + 1 < ctx->pos) { + prev_range = range; + range = range->next; + } + + ctx->temps.infos[temp]->prev_range = prev_range; + return range; +} + +/** + * Spills a register if necessary + * @ctx: The allocation context + * @reg: The register to spill + */ +static void spill_reg(struct ctx *ctx, int reg) +{ + struct block_info *info = BLOCK_INFO(ctx->block); + int temp = ((int) ctx->regs[reg]) - 1; + struct live_range *range; + bool store = true; + + if (temp == -1) { + /* Register is empty */ + return; + } + + /* If the register is clean omit the store */ + if (ctx->A & TEMP_TO_BIT(temp)) + store = false; + else { + /* If the temporary's value won't be used in the future omit the + * store + */ + if (!(info->out & TEMP_TO_BIT(temp))) { + range = find_next_range(ctx, temp); + + if (!range || + range->start >= + info->pos + ctx->block->instr_count || + (range->def && range->start >= ctx->pos)) + store = false; + } + } + + if (store) { + add_insn(ctx, BPF_STX_MEM(BPF_DW, BPF_REG_FP, reg, + TEMP_STACK_OFF(temp))); + ++info->insn_count; + ctx->A |= TEMP_TO_BIT(temp); + } + + ctx->temps.states[temp].state = TEMP_STATE_SPILLED; + ctx->regs[reg] = 0; +} + +/** + * Loads a temporary in a certain register. If its state != TEMP_STATE_NONE the + * function will insert a load from the spill location + * @ctx: The allocation context + * @reg: Register that should contain the temporary + * @temp: The temporary that should be loaded into the register + */ +static void load_in_reg(struct ctx *ctx, int reg, int temp) +{ + struct temp_state *state = &ctx->temps.states[temp]; + + BUG_ON(state->state == TEMP_STATE_REG); + + /* Spill register if necessary */ + spill_reg(ctx, reg); + + if (state->state != TEMP_STATE_NONE) { + add_insn(ctx, BPF_LDX_MEM(BPF_DW, reg, BPF_REG_FP, + TEMP_STACK_OFF(temp))); + ++BLOCK_INFO(ctx->block)->insn_count; + ctx->A |= TEMP_TO_BIT(temp); + } else + ctx->A &= ~TEMP_TO_BIT(temp); + + /* Assign register to temporary */ + ctx->regs[reg] = temp + 1; + state->state = TEMP_STATE_REG; + state->reg = reg; +} + +/** + * The heuristic function of the allocator. This function decides which register + * to allocate for a temporary + * @ctx: The allocation context + * @temp: The temporary the function should find a register for + * @return: The register to allocate + */ +static int find_reg(struct ctx *ctx, int temp) +{ + struct mptcp_rbs_ebpf_instr *instr = ctx->instr; + int start; + int end; + int reg; + int temp2; + struct live_range *range; + int i; + int prop_reg = -1; + int prop_delta = -1; + int prop_reg2 = -1; + int prop_delta2 = -1; + bool used; + + /* Check the callee saved registers first */ + start = BPF_REG_6; + end = BPF_REG_9; + + while (true) { + for (reg = start; reg <= end; ++reg) { + temp2 = ((int) ctx->regs[reg]) - 1; + + if (temp2 == -1) { + /* Register is free */ + return reg; + } + + /* Check if register is already used by another operand + */ + used = false; + for (i = 0; i < MAX_ARGS; ++i) { + if (instr->read[i].used && + instr->read[i].temp == temp2) { + used = true; + break; + } + } + if (used) + continue; + + /* Check live ranges */ + range = find_next_range(ctx, temp2); + + if (!range) { + /* Best candidate found because the temporary + * won't be used again + */ + return reg; + } + if (range->start > ctx->pos) { + /* Very good candidate found */ + int delta = range->start - ctx->pos; + if (prop_delta < delta) { + prop_reg = reg; + prop_delta = range->start - ctx->pos; + } + } else { + /* Candidate found */ + int delta = range->end - ctx->pos; + if (prop_delta2 < delta) { + prop_reg2 = reg; + prop_delta2 = range->end - ctx->pos; + } + } + } + + if (start == BPF_REG_0) + break; + + /* Next check all other registers except R10 */ + start = BPF_REG_0; + end = BPF_REG_5; + } + + if (prop_reg != -1) + return prop_reg; + + BUG_ON(prop_reg2 == -1); + return prop_reg2; +} + +/** + * Generalized version of load_in_reg where the function decides the register. + * This function ensures that other instruction operands are not spilled + * @ctx: The allocation context + * @temp: The temporary that should be loaded in a register + */ +static void load_in_any_reg(struct ctx *ctx, int temp) +{ + load_in_reg(ctx, find_reg(ctx, temp), temp); +} + +/** + * Allocates registers for the current call instruction + * @ctx: The allocation context + * @insn: The target instruction that will be added to the eBPF program + */ +static void alloc_call_instr_regs(struct ctx *ctx, struct bpf_insn *insn) +{ + struct block_info *info = BLOCK_INFO(ctx->block); + struct mptcp_rbs_ebpf_instr *instr = ctx->instr; + int temp; + struct temp_state *state; + int i; + + /* Parameters */ + for (i = 0; i < MAX_ARGS; ++i) { + if (instr->read[i].used) { + temp = instr->read[i].temp; + state = &ctx->temps.states[temp]; + + if (state->state == TEMP_STATE_REG) { + if (state->reg != BPF_REG_ARG1 + i) { + spill_reg(ctx, BPF_REG_ARG1 + i); + + /* Copy the value */ + add_insn(ctx, + BPF_MOV64_REG(BPF_REG_ARG1 + i, + state->reg)); + ++info->insn_count; + continue; + } + } else { + /* Load the value and spill it to ensure + * that it is not lost during the call + */ + load_in_reg(ctx, BPF_REG_ARG1 + i, temp); + } + } + + spill_reg(ctx, BPF_REG_ARG1 + i); + } + + /* Return value */ + if (instr->write.used) { + temp = instr->write.temp; + state = &ctx->temps.states[temp]; + + if (state->state == TEMP_STATE_REG) { + /* Discard register */ + ctx->regs[state->reg] = 0; + } + state->state = TEMP_STATE_NONE; + + load_in_reg(ctx, BPF_REG_0, temp); + + info->K |= TEMP_TO_BIT(temp); + } else + spill_reg(ctx, BPF_REG_0); +} + +/** + * Allocates registers for the current instruction + * @ctx: The allocation context + * @insn: The target instruction that will be added to the eBPF program + */ +static void alloc_instr_regs(struct ctx *ctx, struct bpf_insn *insn) +{ + struct block_info *info = BLOCK_INFO(ctx->block); + struct mptcp_rbs_ebpf_instr *instr = ctx->instr; + int temp; + int temp2; + struct temp_state *state; + struct temp_state *state2; + + if (BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_CALL) { + alloc_call_instr_regs(ctx, insn); + return; + } + + if (instr->read[0].used) { + temp = instr->read[0].temp; + state = &ctx->temps.states[temp]; + + if (instr->write.used && temp != instr->write.temp) { + temp2 = instr->write.temp; + state2 = &ctx->temps.states[temp2]; + + /* Get any register for the written temporary */ + if (state2->state != TEMP_STATE_REG) { + state2->state = TEMP_STATE_NONE; + load_in_any_reg(ctx, temp2); + } + + /* Load value of the read temporary to the written one + */ + if (state->state == TEMP_STATE_REG) + add_insn(ctx, BPF_MOV64_REG(state2->reg, + state->reg)); + else + add_insn(ctx, + BPF_LDX_MEM(BPF_DW, state2->reg, + BPF_REG_FP, + TEMP_STACK_OFF(temp))); + ++info->insn_count; + + insn->dst_reg = state2->reg; + } else { + if (state->state != TEMP_STATE_REG) + load_in_any_reg(ctx, temp); + + insn->dst_reg = state->reg; + } + } + + if (instr->write.used) { + temp = instr->write.temp; + + if (!instr->read[0].used) { + state = &ctx->temps.states[temp]; + + if (state->state != TEMP_STATE_REG) + load_in_any_reg(ctx, temp); + + insn->dst_reg = state->reg; + } + + ctx->A &= ~TEMP_TO_BIT(temp); + info->K |= TEMP_TO_BIT(temp); + } + + if (instr->read[1].used) { + state = &ctx->temps.states[instr->read[1].temp]; + + if (state->state != TEMP_STATE_REG) + load_in_any_reg(ctx, instr->read[1].temp); + + insn->src_reg = state->reg; + } +} + +/** + * Inserts a correction that was found during the resolution phase. This + * function might insert critical edge blocks if necessary + * @ctx: The allocation context + * @p: The predecessor block of the edge where the correction should be inserted + * @s: The successor block of the edge where the correction should be inserted + * @insn: The instruction that should be inserted + */ +static void insert_correction(struct ctx *ctx, struct mptcp_rbs_ebpf_block *p, + struct mptcp_rbs_ebpf_block *s, + struct bpf_insn insn) +{ + struct block_info *info = BLOCK_INFO(p); + struct block_info *info2 = BLOCK_INFO(s); + int edge_list_len = GET_EDGE_BLOCK_LIST_LEN(&ctx->edge_blocks); + int i; + + if (p->next_else == s) { + /* Critical edge */ + struct edge_block *edge = &info->edge_block; + + BUG_ON(p->next == s); + + if (!edge->insn_count) { + /* Create edge block */ + struct bpf_insn *cur; + struct bpf_insn *end; + + edge->insn_pos = ctx->prog->len; + edge->insn_count = 1; + edge->idx = edge_list_len; + ADD_EDGE_BLOCK(&ctx->edge_blocks, edge); + add_insn(ctx, BPF_JMP_OFF(info2->idx)); + + /* Find jump in s to p and replace it with jump to new + * edge block + */ + cur = &ctx->prog->insnsi[info->insn_pos + + info->insn_count - 1]; + end = cur - info->insn_count; + + for (; cur != end; --cur) { + if (IS_OFF_JMP(cur) && cur->off == info2->idx) { + cur->off = + GET_BLOCK_LIST_LEN(&ctx->blocks) + + edge->idx; + break; + } + } + } else { + /* Correct following edge block positions */ + for (i = edge->idx + 1; i < edge_list_len; ++i) { + ++GET_EDGE_BLOCK(&ctx->edge_blocks, i) + ->insn_pos; + } + } + + insert_insn(ctx, edge->insn_pos + edge->insn_count - 1, insn); + ++edge->insn_count; + return; + } + + /* Correct following block and edge block positions */ + for (i = info->idx + 1; i < GET_BLOCK_LIST_LEN(&ctx->blocks); ++i) { + ++BLOCK_INFO(GET_BLOCK(&ctx->blocks, i))->insn_pos; + } + + for (i = 0; i < edge_list_len; ++i) { + ++GET_EDGE_BLOCK(&ctx->edge_blocks, i)->insn_pos; + } + + insert_insn(ctx, info->insn_pos + info->insn_count - + (info->needs_jmp ? 1 : 0), + insn); + ++info->insn_count; +} + +/** + * Performes the resolution phase + * @ctx: The allocation context + */ +static void resolve(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block *block; + bool changes; + + /* Reset IN and OUT sets for resolution phase */ + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + info->in = 0; + info->out = 0; + }); + + /* Perform the dataflow analysis */ + do { + changes = false; + + FOREACH_BLOCK_REV(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + u64 old_in = info->in; + + /* OUT(B) = U IN(S) where S elementof succ(B) */ + info->out = + block->next ? BLOCK_INFO(block->next)->in : 0; + if (block->next_else) + info->out |= BLOCK_INFO(block->next_else)->in; + + /* IN(B) = ~K ^ A U (OUT(B) - K) */ + info->in = + (~info->K & info->A) | BIT_DIFF(info->out, info->K); + + changes = changes || old_in != info->in; + }); + } while (changes); + + /* Search different temporary storages among edges */ + FOREACH_BLOCK(&ctx->blocks, block, { + struct block_info *info = BLOCK_INFO(block); + struct mptcp_rbs_ebpf_block *block2; + + FOREACH_BLOCK(&info->preds, block2, { + struct block_info *info2 = BLOCK_INFO(block2); + u64 s = info->in & ~info2->A; + u64 stored_by_move; + int i; + + /* 0. Fill ctx->regs with registers of block2 that are + * also in registers in block + */ + memset(&ctx->regs[0], 0, sizeof(ctx->regs)); + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if (state2->state == TEMP_STATE_REG && + state->state == TEMP_STATE_REG) + ctx->regs[state2->reg] = i + 1; + } + + /* 1. Insert necessary stores */ + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if (state2->state == TEMP_STATE_REG && + (state->state != TEMP_STATE_REG || + s & TEMP_TO_BIT(i)) && + !(info2->A & TEMP_TO_BIT(i))) { + /* Insert store */ + insert_correction( + ctx, block2, block, + BPF_STX_MEM(BPF_DW, BPF_REG_FP, + state2->reg, + TEMP_STACK_OFF(i))); + } + } + + /* 2. Insert necessary moves */ + stored_by_move = 0; + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if (!(stored_by_move & TEMP_TO_BIT(i)) && + state2->state == TEMP_STATE_REG && + state->state == TEMP_STATE_REG && + state->reg != state2->reg) { + /* Insert move */ + int reg_temp = + ((int) ctx->regs[state->reg]) - 1; + if (reg_temp != -1) { + /* Other value in register */ + /* TODO We could use registers + * for swapping etc. For now we + * just store the value in + * memory + */ + if (!(info2->A & + TEMP_TO_BIT(reg_temp))) { + insert_correction( + ctx, block2, block, + BPF_STX_MEM( + BPF_DW, + BPF_REG_FP, + state->reg, + TEMP_STACK_OFF( + reg_temp))); + } + stored_by_move |= + TEMP_TO_BIT(reg_temp); + } + + insert_correction( + ctx, block2, block, + BPF_MOV64_REG(state->reg, + state2->reg)); + ctx->regs[state2->reg] = 0; + } + } + + /* 3. Insert necessary loads */ + for (i = 0; i < ctx->temps.count; ++i) { + struct temp_state *state = + &info->temps.entry_states[i]; + struct temp_state *state2 = + &info2->temps.exit_states[i]; + + if ((stored_by_move & TEMP_TO_BIT(i) || + state2->state != TEMP_STATE_REG) && + state->state == TEMP_STATE_REG) { + /* Insert load */ + insert_correction( + ctx, block2, block, + BPF_LDX_MEM(BPF_DW, state->reg, + BPF_REG_FP, + TEMP_STACK_OFF(i))); + } + } + }); + }); +} + +/** + * Performs the actual register allocation as described in "Quality and Speed in + * Linear-Scan Register Allocation" written by Omri Traub + * @ctx: The allocation context + */ +static void alloc_regs(struct ctx *ctx) +{ + struct mptcp_rbs_ebpf_block **block_ptr; + struct mptcp_rbs_ebpf_block **block_end; + struct mptcp_rbs_ebpf_block *block; + u32 insn_pos; + + /* Iterate over all instructions and allocate registers */ + ctx->pos = 0; + insn_pos = 0; + block_ptr = ctx->blocks.items; + block_end = ctx->blocks.items + ctx->blocks.len; + for (; block_ptr != block_end; ++block_ptr) { + struct block_info *info; + struct mptcp_rbs_ebpf_instr *end; + struct bpf_insn insn; + + block = *block_ptr; + ctx->block = block; + info = BLOCK_INFO(block); + + /* Set position of block */ + info->insn_pos = insn_pos; + + /* Add block as predecessor */ + if (block->next) + ADD_BLOCK(&BLOCK_INFO(block->next)->preds, block); + if (block->next_else) + ADD_BLOCK(&BLOCK_INFO(block->next_else)->preds, block); + + /* Remember the temporary states on entry */ + memcpy(&info->temps.entry_states, ctx->temps.states, + sizeof(ctx->temps.states)); + + ctx->instr = block->instrs; + end = ctx->instr + block->instr_count; + for (; ctx->instr != end; ++ctx->instr, ++ctx->pos) { + insn = ctx->instr->insn; + alloc_instr_regs(ctx, &insn); + + /* Set jump offsets to the block indexes in the ordered + * list + */ + if (IS_OFF_JMP(&insn)) { + if (BPF_OP(insn.code) == BPF_JA) { + /* JA must be the last instruction in + * the block + */ + BUG_ON(ctx->instr + 1 != end); + BUG_ON(!block->next); + + insn.off = BLOCK_INFO(block->next)->idx; + + /* Do not insert jumps if the next block + * follows directly + */ + info->needs_jmp = + block_ptr + 1 == block_end || + *(block_ptr + 1) != block->next; + if (!info->needs_jmp) + continue; + } else { + /* Jumps except JA must be the second to + * last instructions + */ + BUG_ON(ctx->instr + 1 == end || + ctx->instr + 2 != end); + BUG_ON(!block->next_else); + + insn.off = + BLOCK_INFO(block->next_else)->idx; + } + } + + add_insn(ctx, insn); + ++info->insn_count; + } + + /* Remember the temporary states on exit and store a local copy + * of A + */ + memcpy(&info->temps.exit_states, ctx->temps.states, + sizeof(ctx->temps.states)); + info->A = ctx->A; + + insn_pos += info->insn_count; + } + + /* Resolution phase */ + resolve(ctx); +} + +/** + * Corrects jump instructions to point to the correct destination blocks + * @ctx: The allocation context + */ +static void correct_jmps(struct ctx *ctx) +{ + struct bpf_insn *cur; + struct bpf_insn *end; + int block_count = GET_BLOCK_LIST_LEN(&ctx->blocks); + u32 insn_pos; + + insn_pos = 0; + cur = &ctx->prog->insnsi[0]; + end = cur + ctx->prog->len; + for (; cur != end; ++cur, ++insn_pos) { + if (IS_OFF_JMP(cur)) { + u32 dst; + + if (cur->off >= block_count) + dst = GET_EDGE_BLOCK(&ctx->edge_blocks, + cur->off - block_count) + ->insn_pos; + else + dst = BLOCK_INFO( + GET_BLOCK(&ctx->blocks, cur->off)) + ->insn_pos; + + cur->off = dst - insn_pos - 1; + } + } +} + +struct bpf_prog *mptcp_rbs_ebpf_alloc_regs( + struct mptcp_rbs_ebpf_block *first_block, int used_temps, + struct bpf_prog *prog) +{ + struct mptcp_rbs_ebpf_block *block; + struct ctx ctx; + int i; + + /* Initialize the context */ + memset(&ctx, 0, sizeof(struct ctx)); + INIT_BLOCK_LIST(&ctx.blocks); + INIT_EDGE_BLOCK_LIST(&ctx.edge_blocks); + + for (i = 0; i < used_temps; ++i) { + ctx.temps.infos[i] = + kzalloc(sizeof(struct temp_info), GFP_KERNEL); + } + ctx.temps.count = used_temps; + + /* Traverse CFG into list and set program size to total number of + * instructions + */ + traverse(&ctx, first_block); + ctx.prog = bpf_prog_realloc(prog, bpf_prog_size(ctx.pos), 0); + + /* Calculate live ranges of temporaries */ + calc_liveranges(&ctx); + + /* Perform the actual register allocation */ + alloc_regs(&ctx); + + /* Correct offsets of jump instructions */ + correct_jmps(&ctx); + + /* Release the context and the blocks' tag fields */ + FOREACH_BLOCK(&ctx.blocks, block, { + struct block_info *info = BLOCK_INFO(block); + FREE_BLOCK_LIST(&info->preds); + kfree(info); + }); + FREE_BLOCK_LIST(&ctx.blocks); + FREE_EDGE_BLOCK_LIST(&ctx.edge_blocks); + for (i = 0; i < used_temps; ++i) { + struct temp_info *info = ctx.temps.infos[i]; + + while (info->live_interval) { + struct live_range *range = info->live_interval; + info->live_interval = range->next; + kfree(range); + } + + kfree(info); + } + + mptcp_rbs_optimize_ebpf_ld_sts(ctx.prog); + return ctx.prog; +} + +void mptcp_rbs_ebpf_block_free(struct mptcp_rbs_ebpf_block *block) +{ + kfree(block->instrs); + kfree(block); +} + +static void simple_traverse(struct mptcp_rbs_ebpf_block *block, + struct block_list *list) +{ + struct mptcp_rbs_ebpf_block *block2; + + /* Check if the block is already in the list */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + if (block->next) + simple_traverse(block->next, list); + if (block->next_else) + simple_traverse(block->next_else, list); +} + +void mptcp_rbs_ebpf_blocks_free(struct mptcp_rbs_ebpf_block *first_block) +{ + struct block_list list; + struct mptcp_rbs_ebpf_block *block; + + INIT_BLOCK_LIST(&list); + simple_traverse(first_block, &list); + + FOREACH_BLOCK(&list, block, mptcp_rbs_ebpf_block_free(block)); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h new file mode 100644 index 0000000000000..83d25b01d2843 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_ebpf_regalloc.h @@ -0,0 +1,219 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_EBPF_REGALLOC_H +#define _MPTCP_RBS_OPTIMIZER_EBPF_REGALLOC_H + +#include +#include + +#define MAX_ARGS 5 +#define MAX_TEMPS ((MAX_BPF_STACK / 8) > 64 ? 64 : (MAX_BPF_STACK / 8)) + +/* + * Macros for common instructions + */ + +/* Unconditional jumps, goto pc + off16 */ +#define BPF_JMP_OFF(OFF) \ + ((struct bpf_insn){.code = BPF_JMP | BPF_K, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + +/** ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ +#define EBPF_ALU_REG(OP, DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU64_REG(OP, 0, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ +#define EBPF_ALU32_REG(OP, DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU32_REG(OP, 0, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ +#define EBPF_ALU_IMM(OP, DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU64_IMM(OP, 0, IMM), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ +#define EBPF_ALU32_IMM(OP, DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ALU32_IMM(OP, 0, IMM), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Short form of mov, dst_reg = src_reg */ +#define EBPF_MOV_REG(DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_MOV64_REG(0, 0), \ + .read = {[0] = {.used = 0, .temp = 0 }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Short form of mov, dst_reg = src_reg where src is a "real" BPF register */ +#define EBPF_MOV_RAW_REG(DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_MOV64_REG(0, SRC), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Short form of mov, dst_reg = imm32 */ +#define EBPF_MOV_IMM(DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_MOV64_IMM(0, IMM), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Memory load, dst_reg = *(uint *) (src_reg + off16) */ +#define EBPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_LDX_MEM(SIZE, 0, 0, OFF), \ + .read = {[0] = {.used = 0, .temp = 0 }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 1, .temp = DST } }) + +/** Memory store, *(uint *) (dst_reg + off16) = src_reg */ +#define EBPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_STX_MEM(SIZE, 0, 0, OFF), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** Memory store, *(uint *) (dst_reg + off16) = imm32 */ +#define EBPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_ST_MEM(SIZE, 0, OFF, IMM), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** + * Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + + * off16. Note that the jump target is given by the next/next_else fields of the + * owning eBPF block + */ +#define EBPF_JMP_REG(OP, DST, SRC) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_JMP_REG(OP, 0, 0, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1] = {.used = 1, .temp = SRC }, \ + [2 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** + * Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + + * off16. Note that the jump target is given by the next/next_else fields of the + * owning eBPF block + */ +#define EBPF_JMP_IMM(OP, DST, IMM) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_JMP_IMM(OP, 0, IMM, 0), \ + .read = {[0] = {.used = 1, .temp = DST }, \ + [1 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** + * Unconditional jumps, goto pc + off16. Note that the jump target is given by + * the next/next_else fields of the owning eBPF block + */ +#define EBPF_JMP_OFF() \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_JMP_OFF(0), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** Function call */ +#define EBPF_CALL(FUNC, ARG1, ARG2, ARG3, ARG4, ARG5, RES) \ + EBPF_RAW_INSTR(BPF_EMIT_CALL(FUNC), ARG1, ARG2, ARG3, ARG4, ARG5, RES) + +/** Program exit */ +#define EBPF_EXIT() \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = BPF_EXIT_INSN(), \ + .read = {[0 ... 4] = {.used = 0, .temp = 0 } }, \ + .write = {.used = 0, .temp = 0 } }) + +/** Raw code statement block */ +#define EBPF_RAW_INSTR(INSN, R1, R2, R3, R4, R5, W) \ + ((struct mptcp_rbs_ebpf_instr){ \ + .insn = INSN, \ + .read = {[0] = {.used = R1 != -1, .temp = R1 }, \ + [1] = {.used = R2 != -1, .temp = R2 }, \ + [2] = {.used = R3 != -1, .temp = R3 }, \ + [3] = {.used = R4 != -1, .temp = R4 }, \ + [4] = {.used = R5 != -1, .temp = R5 } }, \ + .write = {.used = W != -1, .temp = W } }) + +struct bpf_prog; + +/** Information about an used temporary */ +struct mptcp_rbs_ebpf_instr_temp_info { + u8 used : 1, temp : 7; +}; + +/** A single eBPF instruction using temporaries instead of "real" registers */ +struct mptcp_rbs_ebpf_instr { + struct bpf_insn insn; + struct mptcp_rbs_ebpf_instr_temp_info read[MAX_ARGS]; + struct mptcp_rbs_ebpf_instr_temp_info write; +}; + +/** A single eBPF block */ +struct mptcp_rbs_ebpf_block { + /** Number of instructions in the block */ + int instr_count; + /** Tag for various values during register allocation */ + void *tag; + /** Array of instructions inside the block */ + struct mptcp_rbs_ebpf_instr *instrs; + /** + * Pointer to the next block or NULL. This field describes the offset + * of a ja instruction + */ + struct mptcp_rbs_ebpf_block *next; + /** + * Pointer to the next alternative block or NULL. This field describes + * the offset of all jump instructions with an offset except ja + */ + struct mptcp_rbs_ebpf_block *next_else; +}; + +/** + * Performes the register allocation + * @first_block: The first eBPF block + * @used_temps: Number of used temporaries + * @prog: eBPF program where the resulting code should be stored + * @return: The eBPF program with the resulting code + */ +struct bpf_prog *mptcp_rbs_ebpf_alloc_regs( + struct mptcp_rbs_ebpf_block *first_block, int used_temps, + struct bpf_prog *prog); + +/** + * Releases a single eBPF block + * @block: The eBPF block + */ +void mptcp_rbs_ebpf_block_free(struct mptcp_rbs_ebpf_block *block); + +/** + * Releases all eBPF blocks in a CFG + * @first_block: The first eBPF block + */ +void mptcp_rbs_ebpf_blocks_free(struct mptcp_rbs_ebpf_block *first_block); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_lu.c b/net/mptcp/mptcp_rbs_optimizer_lu.c new file mode 100644 index 0000000000000..ae4068d6145d4 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_lu.c @@ -0,0 +1,199 @@ +#include "mptcp_rbs_optimizer_lu.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +static struct mptcp_rbs_smt_var *find_next( + struct mptcp_rbs_cfg_block *block, struct mptcp_rbs_cfg_block_list *list, + struct mptcp_rbs_cfg_block_list *done_list, + struct mptcp_rbs_cfg_block **found_block) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_smt_var *var_smt; + struct mptcp_rbs_smt *smt; + bool already_done = false; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return NULL); + ADD_BLOCK(list, block); + + /* Check if this block holds a NEXT value that was already processed */ + FOREACH_BLOCK(done_list, block2, if (block == block2) { + already_done = true; + break; + }); + + if (!already_done) { + smt = block->first_smt; + while (smt) { + if (smt->kind == SMT_KIND_VAR) { + var_smt = (struct mptcp_rbs_smt_var *) smt; + + if (var_smt->value->kind == + VALUE_KIND_SBFLIST_NEXT && + ((struct mptcp_rbs_value_sbf_list_next *) + var_smt->value) + ->list->kind == + VALUE_KIND_SBFLIST_VAR) { + ADD_BLOCK(done_list, block); + *found_block = block; + return var_smt; + } + } + + smt = smt->next; + } + } + + if (block->next) { + var_smt = find_next(block->next, list, done_list, found_block); + if (var_smt) + return var_smt; + } + + if (block->next_else) { + var_smt = + find_next(block->next_else, list, done_list, found_block); + if (var_smt) + return var_smt; + } + + return NULL; +} + +struct clone_ctx { + int var_number; + int list_var_number; + int i; +}; + +static struct mptcp_rbs_value *clone_user(void *user_ctx, + const struct mptcp_rbs_value *value) +{ + struct clone_ctx *ctx = user_ctx; + const struct mptcp_rbs_value_sbf_var *var_value; + + if (value->kind != VALUE_KIND_SBF_VAR) + return NULL; + + var_value = (const struct mptcp_rbs_value_sbf_var *) value; + if (var_value->var_number != ctx->var_number) + return NULL; + + return (struct mptcp_rbs_value *) mptcp_rbs_value_sbf_list_get_new( + (struct mptcp_rbs_value_sbf_list *) + mptcp_rbs_value_sbf_list_var_new(ctx->list_var_number), + (struct mptcp_rbs_value_int *) mptcp_rbs_value_constint_new( + ctx->i)); +} + +static void clone_smts(struct mptcp_rbs_cfg_block *block, + const struct mptcp_rbs_smt *smt_template, int var_number, + int list_var_number, int i) +{ + struct clone_ctx clone_ctx; + + clone_ctx.var_number = var_number; + clone_ctx.list_var_number = list_var_number; + clone_ctx.i = i; + + while (smt_template) { + struct mptcp_rbs_smt *clone; + + clone = + mptcp_rbs_smt_clone(smt_template, &clone_ctx, clone_user); + + mptcp_rbs_cfg_block_append(block, clone); + smt_template = smt_template->next; + } +} + +static void unroll(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_smt_var *var_smt, + struct mptcp_rbs_cfg_block *block) +{ + int list_var_number; + struct mptcp_rbs_smt *smt_template; + struct mptcp_rbs_cfg_block *next_block = block->next; + int i; + + list_var_number = + ((struct mptcp_rbs_value_sbf_list_var + *) ((struct mptcp_rbs_value_sbf_list_next *) var_smt->value) + ->list) + ->var_number; + smt_template = next_block->first_smt; + next_block->first_smt = NULL; + + for (i = 0; i < ctx->variation->sbf_num; ++i) { + clone_smts(next_block, smt_template, var_smt->var_number, + list_var_number, i); + } + smt_template->free(smt_template); + + block->condition->free(block->condition); + block->condition = NULL; + block->next->next = block->next_else; + block->next_else = NULL; +} + +static bool find_loop(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block_list *done_list) +{ + struct mptcp_rbs_cfg_block_list list; + struct mptcp_rbs_smt_var *var_smt; + struct mptcp_rbs_cfg_block *block; + + INIT_BLOCK_LIST(&list); + var_smt = + find_next(ctx->variation->first_block, &list, done_list, &block); + + if (var_smt && block->condition && + block->condition->kind == VALUE_KIND_IS_NOT_NULL) { + struct mptcp_rbs_value_is_not_null *cond = + (struct mptcp_rbs_value_is_not_null *) block->condition; + + if (cond->operand->kind == VALUE_KIND_SBF_VAR && + ((struct mptcp_rbs_value_sbf_var *) cond->operand) + ->var_number == var_smt->var_number) { + /* TODO Support unrolling of ifs etc. + clone blocks until goto to loop block is found + + - Search for block with goto = loop block and + remember it + - Set the tag to a special value and the goto to NULL + - Find the tag in the cloned ones and set the goto + */ + + /* Check if the loop is short enough to be + * unrolled */ + if (block->next && block->next->next == block) { + /* Can be unrolled */ + unroll(ctx, var_smt, block); + } + } + } + + FREE_BLOCK_LIST(&list); + return var_smt != NULL; +} + +void mptcp_rbs_opt_lu(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + + if (!ctx->variation->sbf_num) { + /* Loop unrolling is only possible with a fixed number of + * subflows + */ + return; + } + + INIT_BLOCK_LIST(&list); + while (find_loop(ctx, &list)) { + /* Do nothing */ + } + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_lu.h b/net/mptcp/mptcp_rbs_optimizer_lu.h new file mode 100644 index 0000000000000..bcb6b055f2e48 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_lu.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_LU_H +#define _MPTCP_RBS_OPTIMIZER_LU_H + +struct mptcp_rbs_opt_ctx; + +/** + * Loop Unrolling: + * Unrolls FOREACH loops over subflows if possible + * @ctx: The optimization context + */ +void mptcp_rbs_opt_lu(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_optimizer_vi.c b/net/mptcp/mptcp_rbs_optimizer_vi.c new file mode 100644 index 0000000000000..4e3a545484b32 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_vi.c @@ -0,0 +1,462 @@ +#include "mptcp_rbs_optimizer_vi.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value.h" + +static void opt_value(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_value **value_ptr) +{ + struct mptcp_rbs_value *value = *value_ptr; + +#define APPLY_ON_BIN(val) \ + opt_value(ctx, (struct mptcp_rbs_value **) &(val)->left_operand); \ + opt_value(ctx, (struct mptcp_rbs_value **) &(val)->right_operand); \ + break; + + switch (value->kind) { + case VALUE_KIND_CONSTINT: + case VALUE_KIND_CONSTSTRING: + case VALUE_KIND_NULL: + break; + case VALUE_KIND_BOOL_VAR: + case VALUE_KIND_INT_VAR: + case VALUE_KIND_STRING_VAR: + case VALUE_KIND_SBF_VAR: + case VALUE_KIND_SKB_VAR: + case VALUE_KIND_SKBLIST_VAR: { + /* Right now we only support inlining of SUBFLOWS list values */ + break; + } + case VALUE_KIND_SBFLIST_VAR: { + int var_index = + ((struct mptcp_rbs_value_sbf_list_var *) value)->var_number; + struct mptcp_rbs_opt_var_info *info = + &ctx->var_infos[var_index]; + + if (info->smt && + info->smt->value->kind == VALUE_KIND_SUBFLOWS) { + *value_ptr = + mptcp_rbs_value_clone(info->smt->value, NULL, NULL); + value->free(value); + --info->usage; + } + + break; + } + case VALUE_KIND_NOT: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_not *) value) + ->operand); + break; + } + case VALUE_KIND_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_equal *) value) + } + case VALUE_KIND_UNEQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_unequal *) value) + } + case VALUE_KIND_LESS: { + APPLY_ON_BIN((struct mptcp_rbs_value_less *) value) + } + case VALUE_KIND_LESS_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_less_equal *) value) + } + case VALUE_KIND_GREATER: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater *) value) + } + case VALUE_KIND_GREATER_EQUAL: { + APPLY_ON_BIN((struct mptcp_rbs_value_greater_equal *) value) + } + case VALUE_KIND_AND: { + APPLY_ON_BIN((struct mptcp_rbs_value_and *) value) + } + case VALUE_KIND_OR: { + APPLY_ON_BIN((struct mptcp_rbs_value_or *) value) + } + case VALUE_KIND_ADD: { + APPLY_ON_BIN((struct mptcp_rbs_value_add *) value) + } + case VALUE_KIND_SUBTRACT: { + APPLY_ON_BIN((struct mptcp_rbs_value_subtract *) value) + } + case VALUE_KIND_MULTIPLY: { + APPLY_ON_BIN((struct mptcp_rbs_value_multiply *) value) + } + case VALUE_KIND_DIVIDE: { + APPLY_ON_BIN((struct mptcp_rbs_value_divide *) value) + } + case VALUE_KIND_REMAINDER: { + APPLY_ON_BIN((struct mptcp_rbs_value_remainder *) value) + } + case VALUE_KIND_IS_NULL: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_is_null *) value) + ->operand); + break; + } + case VALUE_KIND_IS_NOT_NULL: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_is_not_null *) value) + ->operand); + break; + } + case VALUE_KIND_REG: + case VALUE_KIND_Q: + case VALUE_KIND_QU: + case VALUE_KIND_RQ: + case VALUE_KIND_CURRENT_TIME_MS: + case VALUE_KIND_RANDOM: + case VALUE_KIND_SBFLIST_FILTER_SBF: + case VALUE_KIND_SKBLIST_FILTER_SKB: + case VALUE_KIND_SUBFLOWS: { + /* Cannot use variables */ + break; + } + case VALUE_KIND_SBF_RTT: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_rtt *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_IS_BACKUP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_is_backup *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_CWND: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_cwnd *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_SKBS_IN_FLIGHT: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_skbs_in_flight *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_LOST_SKBS: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lost_skbs *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_HAS_WINDOW_FOR: { + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->sbf); + opt_value( + ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_has_window_for *) value) + ->skb); + break; + } + case VALUE_KIND_SBF_ID: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_id *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_DELAY_IN: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_in *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_DELAY_OUT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_delay_out *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_BW_OUT_SEND: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_send *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_BW_OUT_ACK: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_bw_out_ack *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_SSTHRESH: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_ssthresh *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_THROTTLED: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_throttled *) value) + ->sbf); + break; + } + case VALUE_KIND_SBF_LOSSY: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_lossy *) value) + ->sbf); + break; + } + case VALUE_KIND_SBFLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_next *) value) + ->list); + break; + } + case VALUE_KIND_SBFLIST_EMPTY: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_empty *) value) + ->list); + break; + } + case VALUE_KIND_SBFLIST_FILTER: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + break; + } + case VALUE_KIND_SBFLIST_MAX: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_max *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_max *) value) + ->cond); + break; + } + case VALUE_KIND_SBFLIST_MIN: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_min *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_min *) value) + ->cond); + break; + } + case VALUE_KIND_SBFLIST_GET: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_get *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_get *) value) + ->index); + break; + } + case VALUE_KIND_SBFLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_count *) value) + ->list); + break; + } + case VALUE_KIND_SBFLIST_SUM: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_sum *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_sum *) value) + ->cond); + break; + } + case VALUE_KIND_SKB_SENT_ON: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on *) value) + ->sbf); + break; + } + case VALUE_KIND_SKB_SENT_ON_ALL: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_sent_on *) value) + ->skb); + break; + } + case VALUE_KIND_SKB_USER: { + opt_value(ctx, (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_user *) value) + ->skb); + break; + } + case VALUE_KIND_SKBLIST_NEXT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_next *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_EMPTY: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_empty *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_POP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_pop *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_FILTER: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->list); + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_sbf_list_filter *) value) + ->cond); + break; + } + case VALUE_KIND_SKBLIST_COUNT: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_count *) value) + ->list); + break; + } + case VALUE_KIND_SKBLIST_TOP: { + opt_value(ctx, + (struct mptcp_rbs_value **) &( + (struct mptcp_rbs_value_skb_list_top *) value) + ->list); + break; + } + default: + break; + } +} + +static void opt_smt(struct mptcp_rbs_opt_ctx *ctx, struct mptcp_rbs_smt *smt) +{ + switch (smt->kind) { + case SMT_KIND_DROP: { + struct mptcp_rbs_smt_drop *drop_smt = + (struct mptcp_rbs_smt_drop *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &drop_smt->skb); + break; + } + case SMT_KIND_PRINT: { + struct mptcp_rbs_smt_print *print_smt = + (struct mptcp_rbs_smt_print *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &print_smt->msg); + if (print_smt->arg) + opt_value(ctx, &print_smt->arg); + break; + } + case SMT_KIND_PUSH: { + struct mptcp_rbs_smt_push *push_smt = + (struct mptcp_rbs_smt_push *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &push_smt->sbf); + opt_value(ctx, (struct mptcp_rbs_value **) &push_smt->skb); + break; + } + case SMT_KIND_SET: { + struct mptcp_rbs_smt_set *set_smt = + (struct mptcp_rbs_smt_set *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &set_smt->value); + break; + } + case SMT_KIND_SET_USER: { + struct mptcp_rbs_smt_set_user *set_user_smt = + (struct mptcp_rbs_smt_set_user *) smt; + + opt_value(ctx, (struct mptcp_rbs_value **) &set_user_smt->value); + break; + } + case SMT_KIND_VAR: { + struct mptcp_rbs_smt_var *var_smt = + (struct mptcp_rbs_smt_var *) smt; + + ctx->var_infos[var_smt->var_number].smt = var_smt; + opt_value(ctx, &var_smt->value); + break; + } + case SMT_KIND_VOID: { + struct mptcp_rbs_smt_void *void_smt = + (struct mptcp_rbs_smt_void *) smt; + + if (void_smt->value) + opt_value(ctx, &void_smt->value); + break; + } + case SMT_KIND_EBPF: { + /* Cannot optimize */ + break; + } + } +} + +static void opt_block(struct mptcp_rbs_opt_ctx *ctx, + struct mptcp_rbs_cfg_block *block, + struct mptcp_rbs_cfg_block_list *list) +{ + struct mptcp_rbs_cfg_block *block2; + struct mptcp_rbs_smt *smt; + + /* Check if the block was already visited */ + FOREACH_BLOCK(list, block2, if (block == block2) return ); + ADD_BLOCK(list, block); + + smt = block->first_smt; + while (smt) { + opt_smt(ctx, smt); + smt = smt->next; + } + + if (block->condition) + opt_value(ctx, (struct mptcp_rbs_value **) &block->condition); + if (block->next) + opt_block(ctx, block->next, list); + if (block->next_else) + opt_block(ctx, block->next_else, list); +} + +void mptcp_rbs_opt_vi(struct mptcp_rbs_opt_ctx *ctx) +{ + struct mptcp_rbs_cfg_block_list list; + + INIT_BLOCK_LIST(&list); + opt_block(ctx, ctx->variation->first_block, &list); + FREE_BLOCK_LIST(&list); +} diff --git a/net/mptcp/mptcp_rbs_optimizer_vi.h b/net/mptcp/mptcp_rbs_optimizer_vi.h new file mode 100644 index 0000000000000..b449b7ba0cbc7 --- /dev/null +++ b/net/mptcp/mptcp_rbs_optimizer_vi.h @@ -0,0 +1,13 @@ +#ifndef _MPTCP_RBS_OPTIMIZER_VI_H +#define _MPTCP_RBS_OPTIMIZER_VI_H + +struct mptcp_rbs_opt_ctx; + +/** + * Variable inlining: + * Inlines the value of a variable directly where the variable is used + * @ctx: The optimization context + */ +void mptcp_rbs_opt_vi(struct mptcp_rbs_opt_ctx *ctx); + +#endif diff --git a/net/mptcp/mptcp_rbs_parser.c b/net/mptcp/mptcp_rbs_parser.c new file mode 100644 index 0000000000000..cde89b2299cf2 --- /dev/null +++ b/net/mptcp/mptcp_rbs_parser.c @@ -0,0 +1,1933 @@ +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_value_parser.h" + +/* Macro to get the name of the parse function of a value */ +#define PARSE_FUNC(STRUCT) STRUCT##_parse + +/* Macro to ignore a value */ +#define APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + +/* Macro to parse custom values without owner */ +#define APPLY_PARSE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value = PARSE_FUNC(STRUCT)(ctx); \ + if (!value) \ + return NULL; \ + return (struct mptcp_rbs_value *) value; \ + } + +/* Macro to parse custom values with a subflow owner */ +#define APPLY_PARSE_SBF_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_sbf *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +/* Macro to parse custom values with a subflow list owner */ +#define APPLY_PARSE_SBF_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_sbf_list *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +/* Macro to parse custom values with a sockbuffer owner */ +#define APPLY_PARSE_SKB_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_skb *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +/* Macro to parse custom values with a sockbuffer list owner */ +#define APPLY_PARSE_SKB_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + if (!strcmp(str, STR)) { \ + struct STRUCT *value2 = PARSE_FUNC(STRUCT)( \ + ctx, (struct mptcp_rbs_value_skb_list *) value); \ + if (!value2) { \ + value->free(value); \ + return NULL; \ + } \ + value = (struct mptcp_rbs_value *) value2; \ + break; \ + } + +static bool expect_token(struct parse_ctx *ctx, enum mptcp_rbs_token_kind kind, + struct mptcp_rbs_token *token) +{ + if (!mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return false; + } + + if (token->kind != kind) { + char s1[TOKEN_BUFFER_LEN]; + char s2[TOKEN_BUFFER_LEN]; + + memset(s1, 0, TOKEN_BUFFER_LEN); + memset(s2, 0, TOKEN_BUFFER_LEN); + + mptcp_rbs_token_kind_to_string(kind, s1); + mptcp_rbs_token_to_string(token, s2); + + printk("%d: Token %s expected but %s found\n", token->position, + s1, s2); + return false; + } + + return true; +} + +static bool lookahead_token(struct parse_ctx *ctx, + struct mptcp_rbs_token *token) +{ + return mptcp_rbs_get_next_token_lookahead( + ctx->str, ctx->position, ctx->line, ctx->line_position, token); +} + +int sprintf_null(char **buf, const char *fmt, ...) +{ + int n; + + va_list args; + va_start(args, fmt); + + if (buf && *buf) { + n = vsprintf(*buf, fmt, args); + *buf += n; + } else + n = vsnprintf(NULL, 0, fmt, args); + + va_end(args); + + return n; +} + +static struct mptcp_rbs_value *parse_value(struct parse_ctx *ctx, + int *value_position); + +static struct mptcp_rbs_value *parse_value_base(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_token token; + + if (!mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return NULL; + } + + *value_position = token.position; + + switch (token.kind) { + case TOKEN_KIND_NUMBER: + return (struct mptcp_rbs_value *) mptcp_rbs_value_constint_new( + token.number); + case TOKEN_KIND_STRING: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_conststring_new(strclone(token.string)); + case TOKEN_KIND_NULL: + return (struct mptcp_rbs_value *) mptcp_rbs_value_null_new(); + case TOKEN_KIND_IDENT: { + const char *str = token.string; + struct repl *repl; + struct var *var; + + /* Might be register */ + if (str[0] == 'R' && str[1] >= '0' && str[1] <= '9' && + str[2] == 0) { + if (str[1] < '1' || + str[1] > ('0' + MPTCP_RBS_REG_COUNT)) { + printk("%d: Register name %s is invalid\n", + token.position, token.string); + return NULL; + } + + return (struct mptcp_rbs_value *) + mptcp_rbs_value_reg_new(str[1] - '1'); + } + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Might be replacement */ + FOREACH_REPL(&ctx->repls, repl, if (!strcmp(str, repl->name)) { + return repl->new_value(repl->tag); + }); + + /* Might be variable */ + FOREACH_STACK_VAR( + &ctx->var_stack, var, if (!strcmp(str, var->name)) { + switch (var->type) { + case TYPE_KIND_NULL: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_null_new(); + case TYPE_KIND_BOOL: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_bool_var_new( + var->var_number); + case TYPE_KIND_INT: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_int_var_new( + var->var_number); + case TYPE_KIND_STRING: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_string_var_new( + var->var_number); + case TYPE_KIND_SBF: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_var_new( + var->var_number); + case TYPE_KIND_SBFLIST: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_var_new( + var->var_number); + case TYPE_KIND_SKB: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_skb_var_new( + var->var_number, var->reinject); + case TYPE_KIND_SKBLIST: + return (struct mptcp_rbs_value *) + mptcp_rbs_value_skb_list_var_new( + var->var_number, + var->underlying_queue_kind); + } + }); + + /* Not found */ + printk("%d:%d (%d): Unknown function/property %s\n", token.line, + token.line_position, token.position, str); + return NULL; + } + case TOKEN_KIND_OPEN_BRACKET: { + struct mptcp_rbs_value *inner = + parse_value(ctx, value_position); + if (!inner) + return NULL; + + *value_position = token.position; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + inner->free(inner); + return NULL; + } + + return inner; + } + default: { + char s1[TOKEN_BUFFER_LEN]; + + memset(s1, 0, TOKEN_BUFFER_LEN); + mptcp_rbs_token_to_string(&token, s1); + + printk("%d: Value expected but %s found\n", token.position, s1); + return NULL; + } + } +} + +static struct mptcp_rbs_value *parse_value_dot(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value *value; + + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return NULL; + } + + value = parse_value_base(ctx, value_position); + if (!value) + return NULL; + + while (true) { + /* . might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + value->free(value); + return NULL; + } + + if (token.kind != TOKEN_KIND_DOT) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* Identifier, SET_USER or PUSH must follow */ + if (!lookahead_token(ctx, &token)) { + value->free(value); + return NULL; + } + + if (token.kind == TOKEN_KIND_PUSH || token.kind == TOKEN_KIND_SET_USER) { + if (mptcp_rbs_value_get_type(value->kind) != + TYPE_KIND_SBF) { + printk( + "%d: Unknown function/property %s for %s\n", + token.position, token.string, + mptcp_rbs_type_get_name( + mptcp_rbs_value_get_type(value->kind))); + value->free(value); + return NULL; + } + + return value; + } + + if (!expect_token(ctx, TOKEN_KIND_IDENT, &token)) { + value->free(value); + return NULL; + } + + switch (mptcp_rbs_value_get_type(value->kind)) { + case TYPE_KIND_SBF: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SBF_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk("%d: Unknown function/property %s for subflow\n", + token.position, str); + value->free(value); + return NULL; + } + case TYPE_KIND_SBFLIST: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SBF_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk("%d:%d (%d): Unknown function/property %s for " + "subflow " + "list\n", + token.line, token.line_position, token.position, + str); + value->free(value); + return NULL; + } + case TYPE_KIND_SKB: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SKB_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk( + "%d: Unknown function/property %s for sockbuffer\n", + token.position, str); + value->free(value); + return NULL; + } + case TYPE_KIND_SKBLIST: { + const char *str = token.string; + +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_IGNORE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PARSE_SKB_LIST_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + + /* Not found */ + printk("%d: Unknown function/property %s for " + "sockbuffer list\n", + token.position, str); + value->free(value); + return NULL; + } + default: { + printk("%d: Unknown function/property %s for %s\n", + token.position, token.string, + mptcp_rbs_type_get_name( + mptcp_rbs_value_get_type(value->kind))); + value->free(value); + return NULL; + } + } + } + + return value; +} + +static struct mptcp_rbs_value *parse_value_not(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *inner; + struct mptcp_rbs_token token; + bool negate; + + /* ! might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + return NULL; + } + + negate = token.kind == TOKEN_KIND_NOT; + if (negate) + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + inner = parse_value_dot(ctx, value_position); + if (!inner) + return NULL; + + if (negate) { + enum mptcp_rbs_type_kind type = + mptcp_rbs_value_get_type(inner->kind); + + if (type != TYPE_KIND_BOOL) { + printk("%d: ! operator cannot be applied on %s\n", + token.position, mptcp_rbs_type_get_name(type)); + inner->free(inner); + return NULL; + } + + return (struct mptcp_rbs_value *) mptcp_rbs_value_not_new( + (struct mptcp_rbs_value_bool *) inner); + } + + return inner; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +static struct mptcp_rbs_value *parse_value_multiply(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_not(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* *, / or % might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_MUL && + token.kind != TOKEN_KIND_DIV && + token.kind != TOKEN_KIND_REM) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_not(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_INT || right_type != TYPE_KIND_INT) { + switch (token.kind) { + case TOKEN_KIND_MUL: { + printk("%d: * operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + case TOKEN_KIND_DIV: { + printk("%d: / operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + case TOKEN_KIND_REM: { + printk("%d: %% operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + } + + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + switch (token.kind) { + case TOKEN_KIND_MUL: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_multiply_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_DIV: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_divide_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_REM: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_remainder_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + } + } + + return left_value; +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +static struct mptcp_rbs_value *parse_value_add(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_multiply(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* + or - might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_ADD && + token.kind != TOKEN_KIND_SUB) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_multiply(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_INT || right_type != TYPE_KIND_INT) { + switch (token.kind) { + case TOKEN_KIND_ADD: { + printk("%d: + operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + case TOKEN_KIND_SUB: { + printk("%d: - operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + break; + } + } + + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + switch (token.kind) { + case TOKEN_KIND_ADD: { + left_value = + (struct mptcp_rbs_value *) mptcp_rbs_value_add_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_SUB: { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_subtract_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + } + } + + return left_value; +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" +static struct mptcp_rbs_value *parse_value_cmp(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_add(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* =, !=, <, <=, > or >= might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_EQUAL && + token.kind != TOKEN_KIND_UNEQUAL && + token.kind != TOKEN_KIND_LESS && + token.kind != TOKEN_KIND_LESS_EQUAL && + token.kind != TOKEN_KIND_GREATER && + token.kind != TOKEN_KIND_GREATER_EQUAL) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_add(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + switch (token.kind) { + case TOKEN_KIND_EQUAL: { + if (right_type == TYPE_KIND_NULL) { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_is_null_new(left_value); + + right_value->free(right_value); + break; + } + + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: = operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_equal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_UNEQUAL: { + if (right_type == TYPE_KIND_NULL) { + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_is_not_null_new(left_value); + + right_value->free(right_value); + break; + } + + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: != operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_unequal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_LESS: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: < operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = + (struct mptcp_rbs_value *) mptcp_rbs_value_less_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_LESS_EQUAL: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: <= operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_less_equal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_GREATER: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: > operator cannot be applied on %s " + "and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_greater_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + case TOKEN_KIND_GREATER_EQUAL: { + if (left_type != TYPE_KIND_INT || + right_type != TYPE_KIND_INT) { + printk("%d: >= operator cannot be applied on " + "%s and %s\n", + token.position, + mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) + mptcp_rbs_value_greater_equal_new( + (struct mptcp_rbs_value_int *) left_value, + (struct mptcp_rbs_value_int *) right_value); + break; + } + } + } + + return left_value; +} +#pragma GCC diagnostic pop + +static struct mptcp_rbs_value *parse_value_and(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_cmp(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* OR might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_AND) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_cmp(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_BOOL || + right_type != TYPE_KIND_BOOL) { + printk( + "%d: AND operator cannot be applied on %s and %s\n", + token.position, mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) mptcp_rbs_value_and_new( + (struct mptcp_rbs_value_bool *) left_value, + (struct mptcp_rbs_value_bool *) right_value); + } + + return left_value; +} + +static struct mptcp_rbs_value *parse_value_allow_pop(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *left_value; + struct mptcp_rbs_value *right_value; + enum mptcp_rbs_type_kind left_type; + enum mptcp_rbs_type_kind right_type; + int right_value_position; + struct mptcp_rbs_token token; + + left_value = parse_value_and(ctx, value_position); + if (!left_value) + return NULL; + + while (true) { + /* OR might follow */ + if (!lookahead_token(ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + left_value->free(left_value); + return NULL; + } + if (token.kind != TOKEN_KIND_OR) + break; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + right_value = parse_value_and(ctx, &right_value_position); + if (!right_value) { + left_value->free(left_value); + return NULL; + } + + left_type = mptcp_rbs_value_get_type(left_value->kind); + right_type = mptcp_rbs_value_get_type(right_value->kind); + + if (left_type != TYPE_KIND_BOOL || + right_type != TYPE_KIND_BOOL) { + printk( + "%d: OR operator cannot be applied on %s and %s\n", + token.position, mptcp_rbs_type_get_name(left_type), + mptcp_rbs_type_get_name(right_type)); + left_value->free(left_value); + right_value->free(right_value); + return NULL; + } + + left_value = (struct mptcp_rbs_value *) mptcp_rbs_value_or_new( + (struct mptcp_rbs_value_bool *) left_value, + (struct mptcp_rbs_value_bool *) right_value); + } + + return left_value; +} + +static struct mptcp_rbs_value *parse_value(struct parse_ctx *ctx, + int *value_position) +{ + struct mptcp_rbs_value *value; + + value = parse_value_allow_pop(ctx, value_position); + if (value && value->kind == VALUE_KIND_SKBLIST_POP) { + printk("%d: POP can only be used inside DROP or PUSH\n", + *value_position); + value->free(value); + return NULL; + } + + return value; +} + +static struct mptcp_rbs_value_bool *parse_value_bool(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_BOOL) { + printk("%d: Boolean value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_bool *) result; +} + +static struct mptcp_rbs_value_int *parse_value_int(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_INT) { + printk("%d: Integer value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_int *) result; +} + +static struct mptcp_rbs_value_string *parse_value_string(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_STRING) { + printk("%d: String value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_string *) result; +} + +static struct mptcp_rbs_value_sbf *parse_value_sbf(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_SBF) { + printk("%d: Subflow value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_sbf *) result; +} + +static struct mptcp_rbs_value_skb *parse_value_skb(struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_SKB) { + printk("%d: Sockbuffer value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_skb *) result; +} + +static struct mptcp_rbs_value_skb *parse_value_skb_allow_pop( + struct parse_ctx *ctx) +{ + struct mptcp_rbs_value *result; + enum mptcp_rbs_type_kind type; + int value_position; + + result = parse_value_allow_pop(ctx, &value_position); + if (!result) + return NULL; + + type = mptcp_rbs_value_get_type(result->kind); + + if (type != TYPE_KIND_SKB) { + printk("%d: Sockbuffer value expected but %s found\n", + value_position, mptcp_rbs_type_get_name(type)); + result->free(result); + return NULL; + } + + return (struct mptcp_rbs_value_skb *) result; +} + +static bool parse_smt(struct parse_ctx *ctx, struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, bool *return_found); + +static bool parse_smt_drop(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_skb *value; + struct mptcp_rbs_smt_drop *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* SKB value must follow */ + value = parse_value_skb_allow_pop(ctx); + if (!value) + return false; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_drop_new(value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smts(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, char *var_name, + int var_number, enum mptcp_rbs_type_kind var_type, + bool reinject, bool *return_found) +{ + bool result = true; + struct var *var; + struct var_list vars; + struct mptcp_rbs_token token; + struct mptcp_rbs_smt *last_smt = NULL; + + /* { must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_CURLY, &token)) + return false; + + INIT_VAR_LIST(&vars); + PUSH_VAR_LIST(&ctx->var_stack, &vars); + if (var_name) { + var = var_new(var_name, var_number, var_type, &reinject, NULL); + ADD_VAR(&vars, var); + } + + while (true) { + if (!parse_smt(ctx, block, &last_smt, return_found)) { + result = false; + break; + } + + /* } might follow */ + if (!lookahead_token(ctx, &token)) { + result = false; + break; + } + if (token.kind == TOKEN_KIND_CLOSE_CURLY || *return_found) { + /* } must have followed */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_CURLY, &token)) + result = false; + break; + } + } + + POP_VAR_LIST(&ctx->var_stack); + FOREACH_VAR(&vars, var, var_free(var)); + FREE_VAR_LIST(&vars); + return result; +} + +/* + * FOREACH loops are translated into ifs and gotos as follows: + * + * FOREACH (VAR x IN y) { + * z; + * } + * + * --> + * + * b0: + * VAR v1 = y; + * GOTO b1; + * + * b1: + * VAR x = v1.NEXT(); + * if (x != NULL) GOTO b2 ELSE GOTO b3; + * + * b2: + * z; + * GOTO b1; + * + * b3: + * ... + */ +static bool parse_smt_foreach(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, + bool *return_found) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + int value_position; + struct mptcp_rbs_value *value; + enum mptcp_rbs_type_kind type; + int var_number; + struct mptcp_rbs_smt_var *var_smt; + struct mptcp_rbs_cfg_block *next_block; + struct mptcp_rbs_cfg_block *last_block; + enum mptcp_rbs_value_kind underlying_queue_kind; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* VAR must follow */ + if (!expect_token(ctx, TOKEN_KIND_VAR, &token)) + return false; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return false; + + /* IN must follow */ + if (!expect_token(ctx, TOKEN_KIND_IN, &token)) + return false; + + /* Subflow or sockbuffer list must follow */ + value = parse_value(ctx, &value_position); + if (!value) + return false; + underlying_queue_kind = ctx->underlying_queue_kind; + + type = mptcp_rbs_value_get_type(value->kind); + if (type != TYPE_KIND_SBFLIST && type != TYPE_KIND_SKBLIST) { + printk("%d: List value expected but %s found\n", value_position, + mptcp_rbs_type_get_name(type)); + value->free(value); + return false; + } + + /* Create VAR v1 = y; */ + var_number = ctx->var_index; + var_smt = mptcp_rbs_smt_var_new(var_number, false, value); + ++ctx->var_index; + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) var_smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) var_smt; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) + return false; + + /* b1: */ + next_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next = next_block; + *block = next_block; + + /* VAR x = v1.NEXT(); */ + if (type == TYPE_KIND_SBFLIST) { + struct mptcp_rbs_value_sbf_list_var *var_value = + mptcp_rbs_value_sbf_list_var_new(var_number); + + value = (struct mptcp_rbs_value *) + mptcp_rbs_value_sbf_list_next_new( + (struct mptcp_rbs_value_sbf_list *) var_value); + } else { + struct mptcp_rbs_value_skb_list_var *var_value = + mptcp_rbs_value_skb_list_var_new(var_number, + underlying_queue_kind); + + value = (struct mptcp_rbs_value *) + mptcp_rbs_value_skb_list_next_new( + (struct mptcp_rbs_value_skb_list *) var_value); + } + + var_number = ctx->var_index; + var_smt = mptcp_rbs_smt_var_new(var_number, false, value); + ++ctx->var_index; + (*block)->first_smt = (struct mptcp_rbs_smt *) var_smt; + + /* if (x != NULL) GOTO b2 ELSE GOTO b3; */ + if (type == TYPE_KIND_SBFLIST) { + value = (struct mptcp_rbs_value *) mptcp_rbs_value_sbf_var_new( + var_number); + type = TYPE_KIND_SBF; + } else { + value = (struct mptcp_rbs_value *) mptcp_rbs_value_skb_var_new( + var_number, underlying_queue_kind == VALUE_KIND_RQ); + type = TYPE_KIND_SKB; + } + + (*block)->condition = + (struct mptcp_rbs_value_bool *) mptcp_rbs_value_is_not_null_new( + value); + + /* b2: */ + next_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next = next_block; + + /* Statements must follow */ + if (!parse_smts(ctx, &next_block, ident_token.string, var_number, type, + underlying_queue_kind == VALUE_KIND_RQ, return_found)) + return false; + + /* b3: */ + last_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next_else = last_block; + + if (*return_found) { + /* GOTO b3; */ + next_block->next = last_block; + } else { + /* GOTO b1; */ + next_block->next = *block; + } + + *block = last_block; + *last_smt = NULL; + return true; +} + +static bool parse_smt_if(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, bool *return_found) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_bool *value; + struct mptcp_rbs_cfg_block *branch_block; + struct mptcp_rbs_cfg_block *next_block = NULL; + bool if_return_found; + bool else_return_found = false; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* Boolean value must follow */ + value = parse_value_bool(ctx); + if (!value) + return false; + (*block)->condition = value; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) + return false; + + branch_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next = branch_block; + + /* Statements must follow */ + if (!parse_smts(ctx, &branch_block, NULL, 0, TYPE_KIND_NULL, false, + &if_return_found)) + return false; + + if (!if_return_found) { + next_block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + branch_block->next = next_block; + } + + /* else might follow */ + if (!lookahead_token(ctx, &token)) + return false; + + if (token.kind == TOKEN_KIND_ELSE) { + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* if might follow */ + if (!lookahead_token(ctx, &token)) + return false; + + branch_block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + (*block)->next_else = branch_block; + + if (token.kind == TOKEN_KIND_IF) { + if (!parse_smt_if(ctx, &branch_block, last_smt, + &else_return_found)) + return false; + } else { + /* Statements must follow */ + if (!parse_smts(ctx, &branch_block, NULL, 0, + TYPE_KIND_NULL, false, + &else_return_found)) + return false; + } + + if (!else_return_found) { + if (!next_block) + next_block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), + GFP_KERNEL); + branch_block->next = next_block; + } + } else { + if (!next_block) + next_block = kzalloc(sizeof(struct mptcp_rbs_cfg_block), + GFP_KERNEL); + + (*block)->next_else = next_block; + } + + *block = next_block; + *last_smt = NULL; + *return_found = if_return_found && else_return_found; + return true; +} + +static bool parse_smt_print(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_string *value; + struct mptcp_rbs_value *arg_value = NULL; + struct mptcp_rbs_smt_print *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* String value must follow */ + value = parse_value_string(ctx); + if (!value) + return false; + + /* , might follow */ + if (!lookahead_token(ctx, &token)) { + value->free(value); + return false; + } + + if (token.kind == TOKEN_KIND_COMMA) { + int dummy1; + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* Value must follow */ + arg_value = parse_value(ctx, &dummy1); + if (!arg_value) { + value->free(value); + return false; + } + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + if (arg_value) + arg_value->free(arg_value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + if (arg_value) + arg_value->free(arg_value); + return false; + } + + smt = mptcp_rbs_smt_print_new(value, arg_value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_return(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block) +{ + struct mptcp_rbs_token token; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) + return false; + + /* Do nothing because the next pointer of the block is + * already set to NULL + */ + return true; +} + +static bool parse_smt_set(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + int reg_number; + struct mptcp_rbs_value_int *value; + struct mptcp_rbs_smt_set *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* R1 - R6 must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &token)) + return false; + if (strlen(token.string) != 2 || token.string[0] != 'R' || + token.string[1] < '1' || + token.string[1] > ('0' + MPTCP_RBS_REG_COUNT)) { + printk("%d: Register name %s is invalid\n", token.position, + token.string); + return false; + } + reg_number = token.string[1] - '1'; + + /* , must follow */ + if (!expect_token(ctx, TOKEN_KIND_COMMA, &token)) + return false; + + /* Integer value must follow */ + value = parse_value_int(ctx); + if (!value) + return false; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_set_new(reg_number, value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_var(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct var_list *vars; + struct var *var; + struct mptcp_rbs_value *value; + int dummy1; + struct mptcp_rbs_smt_var *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &token)) + return false; + + /* Check if identifier is already used */ + vars = GET_VAR_LIST_STACK_TOP(&ctx->var_stack); + FOREACH_VAR(vars, var, if (!strcmp(token.string, var->name)) { + printk("%d: Variable %s is already declared\n", token.position, + token.string); + return false; + }); + + var = var_new(token.string, ctx->var_index, TYPE_KIND_NULL, NULL, NULL); + ADD_VAR(vars, var); + ++ctx->var_index; + + /* = must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return false; + + /* Value must follow */ + value = parse_value(ctx, &dummy1); + if (!value) + return false; + + var->type = mptcp_rbs_value_get_type(value->kind); + if (var->type == TYPE_KIND_SKB) + var->reinject = + ((struct mptcp_rbs_value_skb *) value)->reinject; + else if (var->type == TYPE_KIND_SKBLIST) + var->underlying_queue_kind = ctx->underlying_queue_kind; + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_var_new(var->var_number, false, value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_void(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value *value = NULL; + int dummy1; + struct mptcp_rbs_smt_void *smt; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return false; + + /* Value might follow */ + if (!lookahead_token(ctx, &token)) + return false; + if (token.kind != TOKEN_KIND_CLOSE_BRACKET) { + value = parse_value(ctx, &dummy1); + if (!value) + return false; + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + if (value) + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + if (value) + value->free(value); + return false; + } + + smt = mptcp_rbs_smt_void_new(value); + if (*last_smt) + (*last_smt)->next = (struct mptcp_rbs_smt *) smt; + else + (*block)->first_smt = (struct mptcp_rbs_smt *) smt; + *last_smt = (struct mptcp_rbs_smt *) smt; + + return true; +} + +static bool parse_smt_other(struct parse_ctx *ctx, + struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt) +{ + struct mptcp_rbs_token token; + int value_position; + struct mptcp_rbs_value *value = parse_value(ctx, &value_position); + struct mptcp_rbs_smt *smt; + + if (!value) + return false; + + /* PUSH might follow. In this case the previous . was + * already parsed + */ + if (!lookahead_token(ctx, &token)) { + value->free(value); + return false; + } + + if (token.kind == TOKEN_KIND_PUSH) { + struct mptcp_rbs_value_skb *skb_value; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) { + value->free(value); + return false; + } + + /* SKB value must follow */ + skb_value = parse_value_skb_allow_pop(ctx); + if (!skb_value) { + value->free(value); + return false; + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + skb_value->free(skb_value); + return false; + } + + smt = (struct mptcp_rbs_smt *) mptcp_rbs_smt_push_new( + (struct mptcp_rbs_value_sbf *) value, skb_value); + } else if (token.kind == TOKEN_KIND_SET_USER) { + struct mptcp_rbs_value_int *int_value; + + mptcp_rbs_get_next_token(&ctx->str, &ctx->position, &ctx->line, + &ctx->line_position, &token); + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) { + value->free(value); + return false; + } + + /* Integer value must follow */ + int_value = parse_value_int(ctx); + if (!int_value) { + value->free(value); + return false; + } + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + int_value->free(int_value); + return false; + } + + smt = (struct mptcp_rbs_smt *) mptcp_rbs_smt_set_user_new( + (struct mptcp_rbs_value_sbf *) value, int_value); + } else { + printk("%d: Values cannot stand alone\n", value_position); + value->free(value); + return false; + } + + /* ; must follow */ + if (!expect_token(ctx, TOKEN_KIND_SEMICOLON, &token)) { + smt->free(smt); + return false; + } + + if (*last_smt) + (*last_smt)->next = smt; + else + (*block)->first_smt = smt; + *last_smt = smt; + + return true; +} + +static bool parse_smt(struct parse_ctx *ctx, struct mptcp_rbs_cfg_block **block, + struct mptcp_rbs_smt **last_smt, bool *return_found) +{ + struct mptcp_rbs_token token; + + if (!lookahead_token(ctx, &token)) + return false; + + switch (token.kind) { + case TOKEN_KIND_DROP: { + *return_found = false; + return parse_smt_drop(ctx, block, last_smt); + } + case TOKEN_KIND_FOREACH: + return parse_smt_foreach(ctx, block, last_smt, return_found); + case TOKEN_KIND_IF: + return parse_smt_if(ctx, block, last_smt, return_found); + case TOKEN_KIND_PRINT: { + *return_found = false; + return parse_smt_print(ctx, block, last_smt); + } + case TOKEN_KIND_RETURN: { + *return_found = true; + return parse_smt_return(ctx, block); + } + case TOKEN_KIND_SET: { + *return_found = false; + return parse_smt_set(ctx, block, last_smt); + } + case TOKEN_KIND_VAR: { + *return_found = false; + return parse_smt_var(ctx, block, last_smt); + } +#ifdef MPTCP_RBS_MEASURE + case TOKEN_KIND_VOID: { + *return_found = false; + return parse_smt_void(ctx, block, last_smt); + } +#endif + default: { + *return_found = false; + return parse_smt_other(ctx, block, last_smt); + } + } +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_parse(const char *str) +{ + struct mptcp_rbs_scheduler *scheduler = + kzalloc(sizeof(struct mptcp_rbs_scheduler), GFP_KERNEL); + struct mptcp_rbs_cfg_block *block = + kzalloc(sizeof(struct mptcp_rbs_cfg_block), GFP_KERNEL); + struct mptcp_rbs_smt *last_smt = NULL; + struct mptcp_rbs_token token; + struct var *var; + struct var_list vars; + struct parse_ctx ctx; + ctx.str = str; + ctx.position = 0; + ctx.line = 0; + ctx.line_position = 0; + ctx.var_index = 0; + scheduler->variations[0].first_block = block; + + /* SCHEDULER must follow */ + if (!expect_token(&ctx, TOKEN_KIND_SCHEDULER, &token)) { + mptcp_rbs_scheduler_free(scheduler); + return NULL; + } + + /* Identifier must follow */ + if (!expect_token(&ctx, TOKEN_KIND_IDENT, &token)) { + mptcp_rbs_scheduler_free(scheduler); + return NULL; + } + scheduler->name = strclone(token.string); + /* ; must follow */ + if (!expect_token(&ctx, TOKEN_KIND_SEMICOLON, &token)) { + mptcp_rbs_scheduler_free(scheduler); + return NULL; + } + + INIT_REPL_STACK(&ctx.repls); + INIT_VAR_LIST(&vars); + INIT_VAR_LIST_STACK(&ctx.var_stack); + PUSH_VAR_LIST(&ctx.var_stack, &vars); + + while (true) { + bool return_found; + /* Check if end is found */ + if (!lookahead_token(&ctx, &token)) { + printk("%s\n", mptcp_rbs_get_last_error()); + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + break; + } + if (token.kind == TOKEN_KIND_EOD) + break; + + /* Statement must follow */ + if (!parse_smt(&ctx, &block, &last_smt, &return_found)) { + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + break; + } + + if (return_found) { + /* End found */ + if (!expect_token(&ctx, TOKEN_KIND_EOD, &token)) { + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + } + break; + } + } + + FREE_REPL_STACK(&ctx.repls); + FOREACH_VAR(&vars, var, var_free(var)); + FREE_VAR_LIST(&vars); + FREE_VAR_LIST_STACK(&ctx.var_stack); + + if (scheduler) { + scheduler->variations[0].used_vars = ctx.var_index; + if (ctx.var_index > MPTCP_RBS_MAX_VAR_COUNT) { + printk("Scheduler cannot be parsed because too many " + "variables are used\n"); + mptcp_rbs_scheduler_free(scheduler); + scheduler = NULL; + } + } + + return scheduler; +} diff --git a/net/mptcp/mptcp_rbs_parser.h b/net/mptcp/mptcp_rbs_parser.h new file mode 100644 index 0000000000000..6467cb85ddbd3 --- /dev/null +++ b/net/mptcp/mptcp_rbs_parser.h @@ -0,0 +1,23 @@ +#ifndef _MPTCP_RBS_PARSER_H +#define _MPTCP_RBS_PARSER_H + +/* Define this macro to enable VOID statements */ +#define MPTCP_RBS_MEASURE + +struct mptcp_rbs_scheduler; + +/* + * Formats a given string with arguments, stores it inside of the given + * buffer and returns the number of written characters. The buffer pointer is + * enhanced to the next character after the last written character. If NULL or + * a pointer to NULL is passed the function only calculates the length + */ +int sprintf_null(char **buf, const char *fmt, ...); + +/* + * Tries to build a scheduler from a string + * @return: The parsed scheduler or NULL + */ +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_parse(const char *str); + +#endif diff --git a/net/mptcp/mptcp_rbs_queue.c b/net/mptcp/mptcp_rbs_queue.c new file mode 100644 index 0000000000000..2b795fd3a68a0 --- /dev/null +++ b/net/mptcp/mptcp_rbs_queue.c @@ -0,0 +1,72 @@ +#include "mptcp_rbs_queue.h" +#include + +/* + * some helper for the queue structures + */ + +void mptcp_rbs_advance_send_head(struct sock *sk, struct sk_buff **skb) +{ + if (tcp_skb_is_last(sk, *skb)) + *skb = NULL; + else { + /* we have to reset mptcp_rbs_in_queue as it will NOW be in QU */ + TCP_SKB_CB(*skb)->mptcp_rbs.flags_not_in_queue = 0; + + *skb = tcp_write_queue_next(sk, *skb); + } +} + +unsigned int mptcp_rbs_q_size(struct sock *sk, struct sk_buff *queue_position) +{ + struct sk_buff *initial_qp = queue_position; + + unsigned int i = 0; + while (queue_position) { + i++; + + if (tcp_skb_is_last(sk, queue_position)) { + break; + } + queue_position = queue_position->next; + + if (i > 1000) { + printk("## rbs_q_size for sk %p and queue position %p " + "with sk_write_queue %p of size %u aborted with " + "more than 1000 elements... might be an " + "infinite loop\n", + sk, initial_qp, &sk->sk_write_queue, + sk->sk_write_queue.qlen); + + break; + } + } + return i; +} + +/* + * returns size + */ +unsigned int mptcp_rbs_print_queue(struct sock *sk, struct sk_buff *skb) +{ + unsigned int i = 0; + + for (i = 0; i < 10; i++) { + if (!skb) { + return i; + } + + if (i < 10) { + mptcp_debug("sk_buff queue %p seq %10u and end_seq " + "%10u and len %10u\n", + skb, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, skb->len); + } + + if (tcp_skb_is_last(sk, skb)) + return i + 1; + + skb = tcp_write_queue_next(sk, skb); + } + return i; +} diff --git a/net/mptcp/mptcp_rbs_queue.h b/net/mptcp/mptcp_rbs_queue.h new file mode 100644 index 0000000000000..ef29c65886907 --- /dev/null +++ b/net/mptcp/mptcp_rbs_queue.h @@ -0,0 +1,12 @@ +#ifndef _MPTCP_RBS_QUEUE_H +#define _MPTCP_RBS_QUEUE_H + +#include + +void mptcp_rbs_advance_send_head(struct sock *sk, struct sk_buff **skb); + +unsigned int mptcp_rbs_q_size(struct sock *sk, struct sk_buff *queue_position); + +unsigned int mptcp_rbs_print_queue(struct sock *sk, struct sk_buff *skb); + +#endif diff --git a/net/mptcp/mptcp_rbs_sched.c b/net/mptcp/mptcp_rbs_sched.c new file mode 100644 index 0000000000000..3c5931647ca62 --- /dev/null +++ b/net/mptcp/mptcp_rbs_sched.c @@ -0,0 +1,914 @@ +/* Rule-based MPTCP Scheduler */ + +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_exec.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_user.h" + +#include +#include +#include +#include // required for after(...) +#include +#include + +static const char *default_rules = "SCHEDULER simple;\n" + "VAR sbfCandidates = SUBFLOWS.FILTER(sbf => sbf.CWND > \n" + " sbf.SKBS_IN_FLIGHT + sbf.QUEUED AND !sbf.THROTTLED AND !sbf.LOSSY);\n" + "IF (sbfCandidates.EMPTY) { RETURN; }\n" + "IF (!RQ.EMPTY) {\n" + " sbfCandidates.GET(0).PUSH(RQ.POP());\n" + " RETURN;\n" + "}\n" + "IF (!Q.EMPTY) {\n" + " sbfCandidates.GET(0).PUSH(Q.POP());\n" + "}"; + +/* Linked list with all schedulers */ +static struct mptcp_rbs_scheduler *schedulers; +/* The default scheduler */ +static struct mptcp_rbs_scheduler *default_scheduler; + +/* Parameters to control scheduler */ +bool mptcp_rbs_extended_msgs __read_mostly = false; +module_param(mptcp_rbs_extended_msgs, bool, 0644); +MODULE_PARM_DESC(mptcp_rbs_extended_msgs, "Should we give a bit more dmesg's?"); + +/* Parameters to control the advanced ooo receive ops */ +bool mptcp_ooo_opt __read_mostly = false; +module_param(mptcp_ooo_opt, bool, 0644); +MODULE_PARM_DESC(mptcp_ooo_opt, "Should we run the advanced ooo receive ops?"); + +/* Parameters to turn off CWND */ +bool ignoreSbfCwndConfig __read_mostly = false; +module_param(ignoreSbfCwndConfig, bool, 0644); +MODULE_PARM_DESC(ignoreSbfCwndConfig, "Ignore congestion control."); + +static bool mptcp_rbs_clean_reinject_queue __read_mostly = true; +module_param(mptcp_rbs_clean_reinject_queue, bool, 0644); +MODULE_PARM_DESC(mptcp_rbs_clean_reinject_queue, + "Should the reinjection queue be cleaned?"); + +static bool mptcp_rbs_check_for_gaps_in_seq __read_mostly = false; +module_param(mptcp_rbs_check_for_gaps_in_seq, bool, 0644); +MODULE_PARM_DESC( + mptcp_rbs_check_for_gaps_in_seq, + "Should the scheduler check if there is a gap in the sequence numbers?"); + +static bool mptcp_rbs_check_for_work_conservingness __read_mostly = false; +module_param(mptcp_rbs_check_for_work_conservingness, bool, 0644); +MODULE_PARM_DESC( + mptcp_rbs_check_for_work_conservingness, + "Should the scheduler check if the program is work conserving?"); + +u32 mptcp_ooo_number_matches = 0; + +static bool mptcp_rbs_is_available(const struct sock *sk, + const struct sk_buff *skb, + bool zero_wnd_test, bool cwnd_test) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + /* Set of states for which we are allowed to send data */ + if (!mptcp_sk_can_send(sk)) + return false; + + /* We do not send data on this subflow unless it is + * fully established, i.e. the 4th ack has been received. + */ + if (tp->mptcp->pre_established) + return false; + + if (tp->pf) + return false; + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been acked. + * (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ + if (!tcp_is_reno(tp)) + return false; + else if (tp->snd_una != tp->high_seq) + return false; + } + + if (!tp->mptcp->fully_established) { + /* Make sure that we send in-order data */ + if (skb && tp->mptcp->second_packet && + tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq) + return false; + } + return true; +} + +/* Are we not allowed to reinject this skb on tp? */ +static int mptcp_rbs_dont_reinject_skb(const struct tcp_sock *tp, + const struct sk_buff *skb) +{ + /* If the skb has already been enqueued in this sk, try to find + * another one. + */ + return skb && + /* Has the skb already been enqueued into this subsocket? */ + mptcp_pi_to_flag(tp->mptcp->path_index) & + TCP_SKB_CB(skb)->path_mask; +} + +static struct sock *mptcp_rbs_get_available_subflow(struct sock *meta_sk, + struct sk_buff *skb, + bool zero_wnd_test) +{ + const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + struct sock *sk, *bestsk = NULL, *backupsk = NULL; + + mptcp_rbs_debug( + "### asked for available subflow for meta_sk %p and skb %p with " + "zerownd %u called from %pS\n", + meta_sk, skb, zero_wnd_test, __builtin_return_address(0)); + + /* Answer data_fin on same subflow!!! */ + if (meta_sk->sk_shutdown & RCV_SHUTDOWN && skb && + mptcp_is_data_fin(skb)) { + mptcp_for_each_sk(mpcb, sk) + { + if (tcp_sk(sk)->mptcp->path_index == + mpcb->dfin_path_index && + mptcp_rbs_is_available(sk, skb, zero_wnd_test, + true)) + return sk; + } + } + + /* First, find the best subflow */ + mptcp_for_each_sk(mpcb, sk) + { + struct tcp_sock *tp = tcp_sk(sk); + + if (!mptcp_rbs_is_available(sk, skb, zero_wnd_test, true)) + continue; + + if (mptcp_rbs_dont_reinject_skb(tp, skb)) { + backupsk = sk; + continue; + } + + bestsk = sk; + } + + if (bestsk) { + sk = bestsk; + } else if (backupsk) { + /* It has been sent on all subflows once - let's give it a + * chance again by restarting its pathmask. + */ + if (skb) + TCP_SKB_CB(skb)->path_mask = 0; + sk = backupsk; + } + + mptcp_rbs_debug("returning with sk %p", sk); + return sk; +} + +/* only call this if open action is empty, as we might otherwise free packets + * which are still in open_action */ +static void clean_up_reinjection_queue(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sk_buff *skb = skb_peek(&mpcb->reinject_queue); + u32 counter = 0; + mptcp_rbs_debug("%s for rq %p with peek %p and size %u\n", __func__, + &mpcb->reinject_queue, skb, mpcb->reinject_queue.qlen); + + while (skb) { + struct sk_buff *tmp = skb; + counter++; + + if (counter == 1000) { + printk("%s found more than %u packets in rq with qlen " + "%u\n", + __func__, counter, mpcb->reinject_queue.qlen); + } else if (counter > 10000) { + printk("%s finished it with %u packets in rq and qlen " + "%u\n", + __func__, counter, mpcb->reinject_queue.qlen); + break; + } + + if (skb_queue_is_last(&mpcb->reinject_queue, skb)) { + skb = NULL; + mptcp_rbs_debug("%s for rq %p next in rq is NULL\n", + __func__, &mpcb->reinject_queue); + } else { + skb = skb_queue_next(&mpcb->reinject_queue, skb); + mptcp_rbs_debug("%s for rq %p next in rq is %p\n", + __func__, &mpcb->reinject_queue, skb); + } + + if (after(meta_tp->snd_una, TCP_SKB_CB(tmp)->end_seq) || + (TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_unlink && + TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_free)) { + + /* Segment already reached the peer, remove it */ + mptcp_rbs_debug( + "rbs_clean_up_reinjection queue removes skb %p " + "with end_seq %u seq %u and snd_una %u with next " + "skb %p with to_unlink %u and with to_free %u and " + "not_in_queue %u\n", + tmp, TCP_SKB_CB(tmp)->end_seq, TCP_SKB_CB(tmp)->seq, + meta_tp->snd_una, skb, + TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_unlink, + TCP_SKB_CB(tmp)->mptcp_rbs.flags_to_free, + TCP_SKB_CB(tmp)->mptcp_rbs.flags_not_in_queue); + + __skb_unlink(tmp, &mpcb->reinject_queue); + __kfree_skb(tmp); + } + } +} + +u32 get_number_of_available_subflows(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sock *sk; + u32 result = 0; + + mptcp_for_each_sk(mpcb, sk) + { + struct tcp_sock *tp = (struct tcp_sock *) sk; + if (mptcp_rbs_sbf_is_available(tp)) { + result++; + } + } + + return result; +} + +u32 get_number_of_available_subflows_with_cwnd(struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sock *sk; + u32 result = 0; + + mptcp_for_each_sk(mpcb, sk) + { + struct tcp_sock *tp = (struct tcp_sock *) sk; + if (mptcp_rbs_sbf_is_available(tp) && + tp->packets_out < tp->snd_cwnd) + result++; + } + + return result; +} + +static struct sk_buff *process_actions(struct tcp_sock *meta_tp, int *reinject, + struct sock **sbf) +{ + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + enum mptcp_rbs_action_kind kind; + struct sk_buff *skb; + unsigned int end_seq; + + FOREACH_ACTION( + rbs_cb->open_actions, kind, *((struct tcp_sock **) sbf), skb, + end_seq, *reinject, { + if (kind == ACTION_KIND_PUSH) { + mptcp_rbs_debug("Answer with OPEN PUSH ACTION with " + "skb %p on sbf %p (reinjection " + "%i)\n", + skb, *sbf, *reinject); + + /* + * The packet might be already acknowledged, so we + * check + * the sequence numbers + */ + if (!after(end_seq, meta_tp->snd_una)) { + mptcp_rbs_debug("rbs recovered from " + "acknowledged seq without " + "touching skb\n"); + continue; + } + + return skb; + } else if (kind == ACTION_KIND_DROP) { + mptcp_rbs_debug("Execute OPEN DROP ACTION with skb " + "%p (reinjection %i)\n", + skb, *reinject); + /* nothing to do... TODO: remove from open action + * table + */ + } else + BUG_ON(true); + }); + + *reinject = false; + *sbf = NULL; + return NULL; +} + +#ifdef CONFIG_MPTCP_RBSMEASURE + +/* Functions for measurements with SystemTap */ + +void noinline mptcp_rbs_scheduler_opt(const char *sched_name, int sbf_num, + int status) +{ + asm("nop"); +} + +void noinline mptcp_rbs_scheduler_switch(const struct sock *meta_sk, + const char *sched_name, + int old_sbf_num, int sbf_num) +{ + asm("nop"); +} + +#endif + +/* Determines if a scheduler is currently optimized. + * 0 = no + * 1 = yes - the following static variables are currently filled with data + * 2 = yes - the following static variables hold valid data + */ +static atomic_t optimizing = ATOMIC_INIT(0); +/* The scheduler to optimize */ +static struct mptcp_rbs_scheduler *opt_scheduler; +/* Number of subflows the scheduler should be optimized for */ +static u32 opt_sbf_num; +/* Index of the variation where the optimized code should be stored */ +static int opt_variation_idx; + +/* Number of calls necessary to start an subflow number dependent optimization. + * If the number of subflows changes the counter should restart + */ +#define MIN_CALLS_TO_OPT 10 + +static int opt_thread_func(void *data) +{ + struct mptcp_rbs_scheduler_variation variation; + + while (true) { + if (atomic_read(&optimizing) != 2) { + msleep(2); + continue; + } + + barrier(); + + mptcp_rbs_debug("optimizing scheduler %s for %d subflows\n", + opt_scheduler->name, opt_sbf_num); + +#ifdef CONFIG_MPTCP_RBSMEASURE + mptcp_rbs_scheduler_opt(opt_scheduler->name, opt_sbf_num, 1); +#endif + + /* Copy the default variation */ + variation.first_block = mptcp_rbs_cfg_blocks_clone( + opt_scheduler->variations[0].first_block, NULL, NULL); + variation.used_vars = opt_scheduler->variations[0].used_vars; + variation.sbf_num = opt_sbf_num; + + /* Apply optimizations */ + mptcp_rbs_optimize(&variation, &opt_scheduler->del, opt_sbf_num, + mptcp_rbs_opts_enabled == 2); + + /* "Publish" the optimized variation */ + opt_scheduler->variations[opt_variation_idx].used_vars = + variation.used_vars; + opt_scheduler->variations[opt_variation_idx].sbf_num = + variation.sbf_num; + opt_scheduler->variations[opt_variation_idx].first_block = + variation.first_block; + +#ifdef CONFIG_MPTCP_RBSMEASURE + mptcp_rbs_scheduler_opt(opt_scheduler->name, opt_sbf_num, 0); +#endif + + mptcp_rbs_debug("scheduler %s was optimized for %d subflows\n", + opt_scheduler->name, opt_sbf_num); + + atomic_set(&optimizing, 0); + } + + return 0; +} + +struct sk_buff *mptcp_rbs_next_segment(struct sock *meta_sk, int *reinject, + struct sock **subsk, unsigned int *limit) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + struct mptcp_rbs_eval_ctx ctx; + struct sk_buff *skb; + u32 number_of_subflows; + u64 begin_time; + u64 begin_time2; + int i; + unsigned int number_of_evaluations = 0; + struct mptcp_rbs_scheduler_variation *old_variation; + + mptcp_rbs_debug("rbs meta_sk->send_head = %p, own queue = %p , " + "packets in flight %i , cwnd %i, wnd %i and q size %u " + "packets_out %u\n", + tcp_send_head(meta_sk), rbs_cb->queue_position, + meta_tp->packets_out, meta_tp->snd_cwnd, + (tcp_wnd_end(meta_tp) - meta_tp->write_seq), + mptcp_rbs_q_size(meta_sk, rbs_cb->queue_position), + meta_tp->packets_out); + + // TODO check that we have at least window? + + if (!rbs_cb || !rbs_cb->scheduler) { + mptcp_rbs_debug("rbs_cb or scheduler invalid\n"); + return NULL; + } + + if(rbs_cb->execution_bucket == 0) { + printk("%s: WARNING: Execution bucket exceeded for scheduler %s. Now aborting new call.", __func__, rbs_cb->scheduler->name); + return NULL; + } + +#ifdef CONFIG_MPTCP_RBSMEASURE + /* first time measurment */ + begin_time = __native_read_tsc(); +#endif + + /* + * If we still have open actions from previous rule evaluations, + * simply return their results in order + */ + skb = process_actions(meta_tp, reinject, subsk); + if (skb) { +#ifdef CONFIG_MPTCP_RBSMEASURE + rbs_cb->scheduler->total_time_oa_skb += + __native_read_tsc() - begin_time; + rbs_cb->scheduler->total_count_oa_skb++; +#endif + + /* we want to run all existing checks after the rule execution + * to ensure + * that we do not miss a packet for the gap test */ + goto after_rbs_exec; + } + + mptcp_rbs_debug("rbs scheduler no open actions\n"); + + /* only clean the queue if we are sure there is no open action which + * might use it */ + if (mptcp_rbs_clean_reinject_queue) + clean_up_reinjection_queue(meta_tp); + + number_of_subflows = get_number_of_available_subflows(meta_tp); + old_variation = rbs_cb->variation; + rbs_cb->variation = &rbs_cb->scheduler->variations[0]; + + if (mptcp_rbs_opts_enabled) { + /* Check if there is a specific variation */ + for (i = 1; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!rbs_cb->scheduler->variations[i].first_block) + break; + + if (rbs_cb->scheduler->variations[i].sbf_num == + number_of_subflows) { + rbs_cb->variation = + &rbs_cb->scheduler->variations[i]; + break; + } + } + + /* If we have place for another optimized variation */ + if (i != MPTCP_RBS_VARIATION_COUNT) { + if (rbs_cb->last_number_of_subflows != + number_of_subflows) { + rbs_cb->last_number_of_subflows = + number_of_subflows; + rbs_cb->calls_since_sbf_change = 0; + } else if (rbs_cb->calls_since_sbf_change > + MIN_CALLS_TO_OPT && + number_of_subflows && + rbs_cb->variation == + &rbs_cb->scheduler->variations[0]) { + /* We should optimize for this number of + * subflows + */ + if (!atomic_cmpxchg(&optimizing, 0, 1)) { + opt_scheduler = rbs_cb->scheduler; + opt_sbf_num = number_of_subflows; + opt_variation_idx = i; + barrier(); + atomic_inc(&optimizing); + } + rbs_cb->calls_since_sbf_change = 0; + } else + ++rbs_cb->calls_since_sbf_change; + } + } + + if (old_variation != rbs_cb->variation) { +#ifdef CONFIG_MPTCP_RBSMEASURE + mptcp_rbs_scheduler_switch(meta_sk, rbs_cb->scheduler->name, + old_variation->sbf_num, + rbs_cb->variation->sbf_num); +#endif + + mptcp_rbs_debug( + "switching for %p to scheduler optimized for %d subflows\n", + meta_sk, rbs_cb->variation->sbf_num); + } + + /* + * We repeat the execution till it returns a packet in case + * the rule execution had side effects. + * + * A side effect is the change of a register or the execution of + * a POP operation. + * + * This is repeated at most X times, to ensure that + * we terminate in case of unsuitable schedulers. + */ + do { + /* Prepare context */ + memset(&ctx, 0, sizeof(struct mptcp_rbs_eval_ctx)); + ctx.meta_sk = meta_sk; + ctx.mpcb = meta_tp->mpcb; + ctx.rbs_cb = rbs_cb; + ctx.side_effects = 0; + + /* Increase execution counter */ + ++rbs_cb->exec_count; + +#ifdef CONFIG_MPTCP_RBSMEASURE + begin_time2 = __native_read_tsc(); +#endif + + /* Execute the rules and apply new actions if there are any */ + mptcp_rbs_exec(&ctx); + skb = process_actions(meta_tp, reinject, subsk); + +#ifdef CONFIG_MPTCP_RBSMEASURE + if (skb) { + rbs_cb->scheduler->total_exec_count_skb++; + rbs_cb->scheduler->total_exec_time_skb += + __native_read_tsc() - begin_time2; + } else { + rbs_cb->scheduler->total_exec_count_no_skb++; + rbs_cb->scheduler->total_exec_time_no_skb += + __native_read_tsc() - begin_time2; + } +#endif + + number_of_evaluations++; + + if(number_of_evaluations >= 5) { + printk("%s: WARNING: Exceeded 5 evaluations for scheduler %s. Now aborting.", __func__, rbs_cb->scheduler->name); + break; + } + } while(!skb && ctx.side_effects); + +#ifdef CONFIG_MPTCP_RBSMEASURE + if (skb) { + rbs_cb->scheduler->total_time_noa_skb += + __native_read_tsc() - begin_time; + rbs_cb->scheduler->total_count_noa_skb++; + } else { + rbs_cb->scheduler->total_time_noa_no_skb += + __native_read_tsc() - begin_time; + rbs_cb->scheduler->total_count_noa_no_skb++; + } +#endif + +after_rbs_exec: + + if (mptcp_rbs_check_for_gaps_in_seq && skb) { + /* + * is there a gap between the beginning of + * this packet and the highest_seq without a gap? + */ + if (rbs_cb->highest_seq + 1 < TCP_SKB_CB(skb)->seq) { + printk("RBS GAP CHECK found gab for meta_sk %p with " + "current packet %p and its seq %u and current " + "highest_seq %u\n", + meta_tp, skb, TCP_SKB_CB(skb)->seq, + rbs_cb->highest_seq); + } + + /* we mention every gap only once, increase highest_seq? */ + if (rbs_cb->highest_seq > TCP_SKB_CB(skb)->end_seq) { + // still the highest, nothing to do + } else { + rbs_cb->highest_seq = TCP_SKB_CB(skb)->end_seq; + } + } + + /* + * if the skb is null but there are still packets in one of the queues, + * print warning. + * Note that the queue might have changed during rule execution, but the + * state at the end is sufficent. + */ + if (mptcp_rbs_check_for_work_conservingness && !skb && + (rbs_cb->queue_position || meta_tp->mpcb->reinject_queue.qlen)) { + u32 number_of_subflows_with_cwnd = + get_number_of_available_subflows_with_cwnd(meta_tp); + + if (number_of_subflows_with_cwnd) { + printk("RBS WORK CONSERVINESS CHECK found problem for " + "meta_sk %p with Q.TOP %p and RQ.COUNT %u and " + "number_of_evaluations %u and number of " + "available sbf %u with cwnd %u\n", + __func__, rbs_cb->queue_position, + meta_tp->mpcb->reinject_queue.qlen, + number_of_evaluations, number_of_subflows, + number_of_subflows_with_cwnd); + } + } + + if(skb) { + rbs_cb->execution_bucket--; + if(rbs_cb->execution_bucket == 0) { + printk("%s: WARNING: Execution bucket exceeded for scheduler %s. Now aborting.", __func__, rbs_cb->scheduler->name); + } + } + + return skb; +} + +void mptcp_rbs_sbf_bw_init(struct mptcp_rbs_sbf_cb *sbf_cb) +{ + sbf_cb->bw_out_bytes = 0; + sbf_cb->bw_out_last_update_ns = 0; + sbf_cb->bw_ack_bytes = 0; + sbf_cb->bw_ack_last_update_ns = 0; +} + +static void mptcp_rbs_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sock *meta_sk = tp->mpcb->meta_sk; + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + + if (!rbs_cb->open_actions) + rbs_cb->open_actions = + kzalloc(sizeof(struct mptcp_rbs_actions), GFP_KERNEL); + + if (!rbs_cb->scheduler) { + mptcp_rbs_debug("mptcp_rbs_init for sk %p with meta_sk %p\n", + sk, meta_sk); + rbs_cb->scheduler = default_scheduler; + rbs_cb->variation = &default_scheduler->variations[0]; + ++default_scheduler->usage; + + rbs_cb->highest_seq = meta_tp->snd_una; + + rbs_cb->execution_bucket = 1000; // intital bucket + mptcp_debug("%s init highest seq with last una %u\n", __func__, rbs_cb->highest_seq); + + if (!meta_tp->nonagle) + // during development, this is REALLY important, so + // don't disable it + printk("Warning: Nagle could cause performance " + "issues in combination with RBS\n"); + } else { + mptcp_rbs_debug("mptcp_rbs_init for sk %p with meta_sk %p has " + "already scheduler\n", + sk, meta_sk); + } + + if (meta_sk != sk) { + mptcp_rbs_debug("mptcp_rbs_init for sbf %p with meta_sk %p\n", + sk, meta_sk); + mptcp_rbs_sbf_bw_init( + (struct mptcp_rbs_sbf_cb *) &tp->mptcp->mptcp_sched[0]); + } + + mptcp_debug("%s for sk %p with meta_sk %p\n", __func__, sk, meta_sk); +} + +static void mptcp_rbs_release(struct sock *sk) +{ + if (is_meta_sk(sk)) { + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(tcp_sk(sk)); + + /* If the meta socket is released the scheduler is no longer + * used + */ + --mptcp_rbs_get_cb(tcp_sk(sk))->scheduler->usage; + + kfree(rbs_cb->open_actions); + } + + printk("Releasing %p with is_meta=%d\n", sk, is_meta_sk(sk)); +} + +static void mptcp_rbs_recover_skb(struct sock *meta_sk, struct sock *subsk, + struct sk_buff *skb, bool reinject) +{ + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + + mptcp_rbs_debug("rbs scheduler recover with rejected skb %p on sbf " + "%p with existing open action %p\n", + skb, subsk, rbs_cb->open_actions); + + /* skb has highest prio, insert at the beginning */ + mptcp_rbs_action_new(rbs_cb->open_actions, true, ACTION_KIND_PUSH, + tcp_sk(subsk), skb, reinject); + + // TODO Should this really be the default CFG? + // default_rule_set->recovered_count++; +} + +/* type 0 = out, 1 = ack, 2 = ack on skb to get options */ +static void mptcp_rbs_update_stats(struct sock *sk, const struct sk_buff *skb, + unsigned int bytes, unsigned int type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + switch (type) { + case 0: { + struct sock *meta_sk = tp->mpcb->meta_sk; + struct tcp_sock *meta_tp = tcp_sk(meta_sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(meta_tp); + rbs_cb->scheduler->total_bytes_sent += bytes; + + mptcp_debug("rbs adds %u bytes for sk %p on bw out\n", bytes, sk); + mptcp_rbs_sbf_bw_send_add(tp, bytes); + break; + } + case 1: { + mptcp_debug("rbs adds %u bytes for sk %p on bw ack\n", bytes, sk); + mptcp_rbs_sbf_bw_ack_add(tp, bytes); + break; + } + case 2: { + mptcp_rbs_sbf_delay_update(tp, skb); + break; + } + } +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_default(void) +{ + return default_scheduler; +} + +void mptcp_rbs_scheduler_set_default(struct mptcp_rbs_scheduler *scheduler) +{ + default_scheduler = scheduler; +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_registered(void) +{ + return schedulers; +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_find(const char *name) +{ + struct mptcp_rbs_scheduler *tmp = schedulers; + + while (tmp) { + if (!strcmp(tmp->name, name)) + return tmp; + tmp = tmp->next; + } + + return NULL; +} + +bool mptcp_rbs_scheduler_register(struct mptcp_rbs_scheduler *scheduler) +{ + struct mptcp_rbs_scheduler *tmp; + + /* Check if a scheduler with the same name is already registered */ + tmp = schedulers; + while (tmp) { + if (!strcmp(tmp->name, scheduler->name)) + return false; + tmp = tmp->next; + } + + scheduler->next = schedulers; + schedulers = scheduler; + return true; +} + +void mptcp_rbs_scheduler_unregister(struct mptcp_rbs_scheduler *scheduler) +{ + struct mptcp_rbs_scheduler *cur; + struct mptcp_rbs_scheduler *next; + + if (!schedulers) + return; + if (schedulers == scheduler) { + schedulers = schedulers->next; + return; + } + + cur = schedulers; + next = cur->next; + while (next) { + if (next == scheduler) { + cur->next = scheduler->next; + break; + } + + cur = next; + next = cur->next; + } +} + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(tp); + + return rbs_cb->scheduler; +} + +bool mptcp_rbs_scheduler_set(struct sock *sk, const char *name) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct mptcp_rbs_cb *rbs_cb = mptcp_rbs_get_cb(tp); + struct mptcp_rbs_scheduler *scheduler; + + if (name) + scheduler = mptcp_rbs_scheduler_find(name); + else + scheduler = default_scheduler; + + if (!scheduler) + return false; + + ++scheduler->usage; + --rbs_cb->scheduler->usage; + rbs_cb->scheduler = scheduler; + rbs_cb->variation = &scheduler->variations[0]; + return true; +} + +static struct mptcp_sched_ops mptcp_sched_rbs = { + .get_subflow = mptcp_rbs_get_available_subflow, + .next_segment = mptcp_rbs_next_segment, + .init = mptcp_rbs_init, + .release = mptcp_rbs_release, + .name = "rbs", + .owner = THIS_MODULE, + .recover_skb = mptcp_rbs_recover_skb, + .update_stats = mptcp_rbs_update_stats, +}; + +static int __init rbs_register(void) +{ + BUILD_BUG_ON(sizeof(struct mptcp_rbs_cb) > MPTCP_SCHED_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct mptcp_rbs_sbf_cb) > MPTCP_SCHED_SIZE); + + if (mptcp_register_scheduler(&mptcp_sched_rbs)) + return -1; + + /* Load default scheduler */ + default_scheduler = mptcp_rbs_scheduler_parse(default_rules); + BUG_ON(!default_scheduler); + mptcp_rbs_scheduler_register(default_scheduler); + + /* Register proc for stats */ + mptcp_rbs_user_interface_init(); + +#ifdef CONFIG_MPTCP_RBSOPT + /* Start optimize thread */ + kthread_run(&opt_thread_func, NULL, "mptcp_rbs_opt"); +#endif + + return 0; +} + +static void rbs_unregister(void) +{ + /* Release all schedulers */ + while (schedulers) { + struct mptcp_rbs_scheduler *tmp = schedulers; + schedulers = schedulers->next; + mptcp_rbs_scheduler_free(tmp); + } + + mptcp_unregister_scheduler(&mptcp_sched_rbs); +} + +module_init(rbs_register); +module_exit(rbs_unregister); + +MODULE_AUTHOR("Alexander Froemmgen, Tobias Erbshaeusser"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Rule-based MPTCP Scheduler"); +MODULE_VERSION("0.89"); diff --git a/net/mptcp/mptcp_rbs_sched.h b/net/mptcp/mptcp_rbs_sched.h new file mode 100644 index 0000000000000..5224fa5cb0e51 --- /dev/null +++ b/net/mptcp/mptcp_rbs_sched.h @@ -0,0 +1,44 @@ +#ifndef _MPTCP_RBS_SCHED_H +#define _MPTCP_RBS_SCHED_H + +#include "mptcp_rbs_ctx.h" + +struct mptcp_rbs_scheduler; + +extern bool mptcp_rbs_extended_msgs; + +extern bool mptcp_ooo_opt; + +extern u32 mptcp_ooo_number_matches; + +extern bool ignoreSbfCwndConfig; + +#define mptcp_rbs_debug(fmt, args...) \ + do { \ + if (mptcp_rbs_extended_msgs) \ + mptcp_debug(fmt, ##args); \ + } while (0) + +struct sk_buff *mptcp_rbs_next_segment(struct sock *meta_sk, int *reinject, + struct sock **subsk, + unsigned int *limit); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_default(void); + +void mptcp_rbs_scheduler_set_default(struct mptcp_rbs_scheduler *scheduler); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get_registered(void); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_find(const char *name); + +bool mptcp_rbs_scheduler_register(struct mptcp_rbs_scheduler *scheduler); + +void mptcp_rbs_scheduler_unregister(struct mptcp_rbs_scheduler *scheduler); + +struct mptcp_rbs_scheduler *mptcp_rbs_scheduler_get(struct sock *sk); + +bool mptcp_rbs_scheduler_set(struct sock *sk, const char *name); + +bool mptcp_rbs_sbf_is_available(struct tcp_sock* sbf); + +#endif diff --git a/net/mptcp/mptcp_rbs_scheduler.c b/net/mptcp/mptcp_rbs_scheduler.c new file mode 100644 index 0000000000000..ce8b0b1883ba5 --- /dev/null +++ b/net/mptcp/mptcp_rbs_scheduler.c @@ -0,0 +1,41 @@ +#include "mptcp_rbs_scheduler.h" +#include "mptcp_rbs_cfg.h" +#include "mptcp_rbs_parser.h" +#include + +void mptcp_rbs_scheduler_variation_free( + struct mptcp_rbs_scheduler_variation *variation) +{ + if (variation->first_block) + mptcp_rbs_cfg_blocks_free(variation->first_block); +} + +void mptcp_rbs_scheduler_free(struct mptcp_rbs_scheduler *scheduler) +{ + int i; + + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + mptcp_rbs_scheduler_variation_free(&scheduler->variations[i]); + } + + kfree(scheduler->name); + kfree(scheduler); +} + +int mptcp_rbs_scheduler_print(const struct mptcp_rbs_scheduler *scheduler, + int variation, char *buffer) +{ + int len; + + BUG_ON(variation < 0 || variation >= MPTCP_RBS_VARIATION_COUNT); + BUG_ON(!scheduler->variations[variation].first_block); + + len = sprintf_null(&buffer, "SCHEDULER %s;\n\n", scheduler->name); + len += mptcp_rbs_cfg_blocks_print( + scheduler->variations[variation].first_block, buffer); + + return len; +} diff --git a/net/mptcp/mptcp_rbs_scheduler.h b/net/mptcp/mptcp_rbs_scheduler.h new file mode 100644 index 0000000000000..7b7b3be3f2520 --- /dev/null +++ b/net/mptcp/mptcp_rbs_scheduler.h @@ -0,0 +1,76 @@ +#ifndef _MPTCP_RBS_SCHEDULER_H +#define _MPTCP_RBS_SCHEDULER_H + +#include + +struct mptcp_rbs_cfg_block; + +#define MPTCP_RBS_VARIATION_COUNT 8 + +/* Struct for RBS scheduler variations that are created by the optimizer */ +struct mptcp_rbs_scheduler_variation { + /* The first block of the CFG or NULL if the variation is not used yet + */ + struct mptcp_rbs_cfg_block *first_block; + /* Number of total used variables */ + u8 used_vars; + /* Determines for how many subflows this variation is optimized. Might + * be 0 if the variation is not optimized for subflows + */ + u8 sbf_num; + +#ifdef CONFIG_MPTCP_RBSMEASURE + /* Number of executions */ + u64 exec_count; + /* Total execution time */ + u64 total_time; +#endif +}; + +/* Struct for RBS schedulers */ +struct mptcp_rbs_scheduler { + /* The next scheduler or NULL */ + struct mptcp_rbs_scheduler *next; + /* Name of the scheduler */ + char *name; + /* Array with different variations of the scheduler. The first entry is + * not optimized for a certain number of subflows + */ + struct mptcp_rbs_scheduler_variation + variations[MPTCP_RBS_VARIATION_COUNT]; + /* Number of usages */ + int usage; + /* Determines if the scheduler should be deleted */ + bool del; + +#ifdef CONFIG_MPTCP_RBSMEASURE + u64 total_count_noa_no_skb; + u64 total_time_noa_no_skb; + + u64 total_count_oa_skb; + u64 total_time_oa_skb; + + u64 total_count_noa_skb; + u64 total_time_noa_skb; + + u64 total_exec_count_no_skb; + u64 total_exec_time_no_skb; + + u64 total_exec_count_skb; + u64 total_exec_time_skb; +#endif + /* Total bytes pushed by the scheduler. Used to analyse the + * overhead of redundancy. + */ + u64 total_bytes_sent; +}; + +void mptcp_rbs_scheduler_variation_free( + struct mptcp_rbs_scheduler_variation *variation); + +void mptcp_rbs_scheduler_free(struct mptcp_rbs_scheduler *scheduler); + +int mptcp_rbs_scheduler_print(const struct mptcp_rbs_scheduler *scheduler, + int variation, char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_smt.c b/net/mptcp/mptcp_rbs_smt.c new file mode 100644 index 0000000000000..08ae504e1d7b7 --- /dev/null +++ b/net/mptcp/mptcp_rbs_smt.c @@ -0,0 +1,887 @@ +#include "mptcp_rbs_smt.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_optimizer_ebpf_disasm.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_value.h" +#include +#include + +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_smt_drop *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_drop), GFP_KERNEL); + smt->kind = SMT_KIND_DROP; + smt->free = mptcp_rbs_smt_drop_free; + smt->execute = mptcp_rbs_smt_drop_execute; + smt->skb = skb; + + return smt; +} + +void mptcp_rbs_smt_drop_free(struct mptcp_rbs_smt_drop *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +void mptcp_rbs_smt_drop_execute(struct mptcp_rbs_smt_drop *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return; + + ctx->rbs_cb->execution_bucket += 5; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_DROP, + NULL, skb, self->skb->reinject); +} + +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_clone( + const struct mptcp_rbs_smt_drop *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_drop *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_drop), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->skb = (struct mptcp_rbs_value_skb *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->skb, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_new( + struct mptcp_rbs_value_string *msg, struct mptcp_rbs_value *arg) +{ + struct mptcp_rbs_smt_print *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_print), GFP_KERNEL); + smt->kind = SMT_KIND_PRINT; + smt->free = mptcp_rbs_smt_print_free; + smt->execute = mptcp_rbs_smt_print_execute; + smt->msg = msg; + smt->arg = arg; + + return smt; +} + +void mptcp_rbs_smt_print_free(struct mptcp_rbs_smt_print *self) +{ + MPTCP_RBS_VALUE_FREE(self->msg); + MPTCP_RBS_VALUE_FREE(self->arg); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_smt_print_execute(struct mptcp_rbs_smt_print *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + char *msg = self->msg->execute(self->msg, ctx); + char str[512]; + struct inet_sock *isk = inet_sk(ctx->meta_sk); + + if (!msg) + return; + if (!self->arg) { + printk("ProgMP %p %08X:%04X %08X:%04X: %s\n", ctx->meta_sk, + isk->inet_rcv_saddr, + ntohs(isk->inet_sport), + isk->inet_daddr, + ntohs(isk->inet_dport), + msg); + return; + } + + /* build prefix */ + memset(str, 0, sizeof(str)); + snprintf(str, sizeof(str), "ProgMP %p %08X:%04X %08X:%04X: %s\n", ctx->meta_sk, + isk->inet_rcv_saddr, + ntohs(isk->inet_sport), + isk->inet_daddr, + ntohs(isk->inet_dport), + msg); + msg = str; + + switch (mptcp_rbs_value_get_type(self->arg->kind)) { + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *arg = + (struct mptcp_rbs_value_bool *) self->arg; + s32 value = arg->execute(arg, ctx); + if (value != -1) + printk(msg, value != 0); + break; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *arg = + (struct mptcp_rbs_value_int *) self->arg; + s64 value = arg->execute(arg, ctx); + if (value != -1) + printk(msg, (unsigned int) value); + break; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *arg = + (struct mptcp_rbs_value_string *) self->arg; + char *value = arg->execute(arg, ctx); + if (value) + printk(msg, value); + break; + } + case TYPE_KIND_NULL: { + printk(msg, NULL); + break; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *arg = + (struct mptcp_rbs_value_sbf *) self->arg; + struct tcp_sock *value = arg->execute(arg, ctx); + if (value) { + printk(msg, value, value->mptcp->sbf_id); + } else { + printk(msg, 0, 0); + } + break; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *arg = + (struct mptcp_rbs_value_skb *) self->arg; + struct sk_buff *value = arg->execute(arg, ctx); + if (value) { + printk(msg, value, TCP_SKB_CB(value)->seq, + TCP_SKB_CB(value)->end_seq); + } else { + printk(msg, 0, 0, 0); + } + break; + } + case TYPE_KIND_SBFLIST: + case TYPE_KIND_SKBLIST: + /* Not possible */ + break; + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_clone( + const struct mptcp_rbs_smt_print *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_print *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_print), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->msg = (struct mptcp_rbs_value_string *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->msg, user_ctx, user_func); + if (clone->arg) + clone->arg = + mptcp_rbs_value_clone(clone->arg, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_smt_push *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_push), GFP_KERNEL); + smt->kind = SMT_KIND_PUSH; + smt->free = mptcp_rbs_smt_push_free; + smt->execute = mptcp_rbs_smt_push_execute; + smt->sbf = sbf; + smt->skb = skb; + + return smt; +} + +void mptcp_rbs_smt_push_free(struct mptcp_rbs_smt_push *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +void mptcp_rbs_smt_push_execute(struct mptcp_rbs_smt_push *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + struct sk_buff *skb; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return; + + ctx->rbs_cb->execution_bucket += 5; + + mptcp_rbs_action_new(ctx->rbs_cb->open_actions, false, ACTION_KIND_PUSH, + sbf, skb, self->skb->reinject); +} + +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_clone( + const struct mptcp_rbs_smt_push *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_push *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_push), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->sbf = (struct mptcp_rbs_value_sbf *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->sbf, user_ctx, user_func); + clone->skb = (struct mptcp_rbs_value_skb *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->skb, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_int *value) +{ + struct mptcp_rbs_smt_set_user *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_set_user), GFP_KERNEL); + smt->kind = SMT_KIND_SET_USER; + smt->free = mptcp_rbs_smt_set_user_free; + smt->execute = mptcp_rbs_smt_set_user_execute; + smt->sbf = sbf; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_set_user_free(struct mptcp_rbs_smt_set_user *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +void mptcp_rbs_smt_set_user_execute(struct mptcp_rbs_smt_set_user *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + s64 val; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return; + + val = self->value->execute(self->value, ctx); + + /* even eval to null is a side effect */ + ctx->side_effects = 1; + + if (val != -1) { +// *((unsigned long*)&sbf->mptcp->mptcp_sched[0]) = val; + mptcp_rbs_get_sbf_cb(sbf)->user = val; + } +} + +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_clone( + const struct mptcp_rbs_smt_set_user *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_set_user *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_set_user), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->sbf = (struct mptcp_rbs_value_sbf *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->sbf, user_ctx, user_func); + clone->value = (struct mptcp_rbs_value_int *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_new( + int reg_number, struct mptcp_rbs_value_int *value) +{ + struct mptcp_rbs_smt_set *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_set), GFP_KERNEL); + smt->kind = SMT_KIND_SET; + smt->free = mptcp_rbs_smt_set_free; + smt->execute = mptcp_rbs_smt_set_execute; + smt->reg_number = reg_number; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_set_free(struct mptcp_rbs_smt_set *self) +{ + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +void mptcp_rbs_smt_set_execute(struct mptcp_rbs_smt_set *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->value->execute(self->value, ctx); + + /* even eval to null is a side effect */ + ctx->side_effects = 1; + + if (val != -1) + ctx->rbs_cb->regs[self->reg_number] = val; +} + +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_clone( + const struct mptcp_rbs_smt_set *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_set *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_set), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->value = (struct mptcp_rbs_value_int *) mptcp_rbs_value_clone( + (struct mptcp_rbs_value *) clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_new(int var_number, bool is_lazy, + struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_smt_var *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_var), GFP_KERNEL); + smt->kind = SMT_KIND_VAR; + smt->free = mptcp_rbs_smt_var_free; + smt->execute = mptcp_rbs_smt_var_execute; + smt->var_number = var_number; + smt->is_lazy = is_lazy; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_var_free(struct mptcp_rbs_smt_var *self) +{ + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +static struct tcp_sock **sbf_list_store(struct mptcp_rbs_value_sbf_list *value, + struct mptcp_rbs_eval_ctx *ctx) +{ + int len = 0; + bool is_null; + void *prev = NULL; + struct tcp_sock *sbf; + struct tcp_sock **list; + struct tcp_sock **tmp; + + sbf = value->execute(value, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (sbf) { + ++len; + sbf = value->execute(value, ctx, &prev, &is_null); + } + + list = kmalloc(sizeof(struct tcp_sock *) * (len + 1), GFP_ATOMIC); + if (!list) { + mptcp_rbs_debug("WARNING: Cannot allocate %zu bytes to store a " + "subflow list inside a variable. Setting " + "variable value to NULL\n", + sizeof(struct tcp_sock *) * (len + 1)); + return NULL; + } + + tmp = list; + prev = NULL; + sbf = value->execute(value, ctx, &prev, &is_null); + while (sbf) { + *tmp = sbf; + ++tmp; + sbf = value->execute(value, ctx, &prev, &is_null); + } + *tmp = NULL; /* implicit end with NULL */ + +if(len == 0) { + printk("%s allocates space for %d subflows\n", __func__, len); +} else if(len == 1) { + printk("%s allocates space for %d subflows, with first one %p\n", __func__, len, *list); +} else if(len > 1) { + printk("%s allocates space for %d subflows, with first one %p, second %p\n", __func__, len, *list, *(list + 1)); +} + + return list; +} + +static struct sk_buff **skb_list_store(struct mptcp_rbs_value_skb_list *value, + struct mptcp_rbs_eval_ctx *ctx) +{ + int len = 0; + bool is_null; + void *prev = NULL; + struct sk_buff *skb; + struct sk_buff **list; + struct sk_buff **tmp; + + skb = value->execute(value, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (skb) { + ++len; + skb = value->execute(value, ctx, &prev, &is_null); + } + + list = kmalloc(sizeof(struct sk_buff *) * (len + 1), GFP_ATOMIC); + if (!list) { + mptcp_rbs_debug("WARNING: Cannot allocate %zu bytes to store a " + "sockbuffer list inside a variable. Setting " + "variable value to NULL\n", + sizeof(struct sk_buff *) * (len + 1)); + return NULL; + } + + tmp = list; + prev = NULL; + skb = value->execute(value, ctx, &prev, &is_null); + while (skb) { + *tmp = skb; + ++tmp; + skb = value->execute(value, ctx, &prev, &is_null); + } + *tmp = NULL; + + return list; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_smt_var_execute(struct mptcp_rbs_smt_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + enum mptcp_rbs_type_kind type = + mptcp_rbs_value_get_type(self->value->kind); + + var->type = type; + var->is_lazy = self->is_lazy; + + if (self->is_lazy) + var->lazy_value = self->value; + else { + switch (type) { + case TYPE_KIND_NULL: + break; + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *bool_value = + (struct mptcp_rbs_value_bool *) self->value; + + var->bool_value = bool_value->execute(bool_value, ctx); + break; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *int_value = + (struct mptcp_rbs_value_int *) self->value; + + var->int_value = int_value->execute(int_value, ctx); + break; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *string_value = + (struct mptcp_rbs_value_string *) self->value; + + var->string_value = + string_value->execute(string_value, ctx); + break; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *sbf_value = + (struct mptcp_rbs_value_sbf *) self->value; + + var->sbf_value = sbf_value->execute(sbf_value, ctx); + break; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *sbf_list_value = + (struct mptcp_rbs_value_sbf_list *) self->value; +printk("%s for meta_sk %p\n", __func__, ctx->mpcb->meta_sk); + var->sbf_list_value = + sbf_list_store(sbf_list_value, ctx); + break; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *skb_value = + (struct mptcp_rbs_value_skb *) self->value; + + var->skb_value = skb_value->execute(skb_value, ctx); + break; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *skb_list_value = + (struct mptcp_rbs_value_skb_list *) self->value; + + var->skb_list_value = + skb_list_store(skb_list_value, ctx); + break; + } + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_clone( + const struct mptcp_rbs_smt_var *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_var), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + clone->value = mptcp_rbs_value_clone(clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_new(struct mptcp_rbs_value *value) +{ + struct mptcp_rbs_smt_void *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_void), GFP_KERNEL); + smt->kind = SMT_KIND_VOID; + smt->free = mptcp_rbs_smt_void_free; + smt->execute = mptcp_rbs_smt_void_execute; + smt->value = value; + + return smt; +} + +void mptcp_rbs_smt_void_free(struct mptcp_rbs_smt_void *self) +{ + MPTCP_RBS_VALUE_FREE(self->value); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_smt_void_execute(struct mptcp_rbs_smt_void *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + if (!self->value) + return; + + switch (mptcp_rbs_value_get_type(self->value->kind)) { + case TYPE_KIND_NULL: { + /* Do nothing */ + break; + } + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *value = + (struct mptcp_rbs_value_bool *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *value = + (struct mptcp_rbs_value_int *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *value = + (struct mptcp_rbs_value_string *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *value = + (struct mptcp_rbs_value_sbf *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *value = + (struct mptcp_rbs_value_sbf_list *) self->value; + void *prev = NULL; + bool is_null; + + value->execute(value, ctx, &prev, &is_null); + break; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *value = + (struct mptcp_rbs_value_skb *) self->value; + + value->execute(value, ctx); + break; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *value = + (struct mptcp_rbs_value_skb_list *) self->value; + void *prev = NULL; + bool is_null; + + value->execute(value, ctx, &prev, &is_null); + break; + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_clone( + const struct mptcp_rbs_smt_void *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_smt_void *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_smt_void), GFP_KERNEL); + *clone = *smt; + clone->next = NULL; + if (clone->value) + clone->value = + mptcp_rbs_value_clone(clone->value, user_ctx, user_func); + + return clone; +} + +struct mptcp_rbs_smt_ebpf *mptcp_rbs_smt_ebpf_new(struct bpf_prog *prog, + char **strs, int strs_len) +{ + struct mptcp_rbs_smt_ebpf *smt; + + smt = kzalloc(sizeof(struct mptcp_rbs_smt_ebpf), GFP_KERNEL); + smt->kind = SMT_KIND_EBPF; + smt->free = mptcp_rbs_smt_ebpf_free; + smt->execute = mptcp_rbs_smt_ebpf_execute; + smt->prog = prog; + smt->strs = strs; + smt->strs_len = strs_len; + + return smt; +} + +void mptcp_rbs_smt_ebpf_free(struct mptcp_rbs_smt_ebpf *self) +{ + int i; + + if (self->prog) + bpf_prog_free(self->prog); + if (self->strs) { + for (i = 0; i < self->strs_len; ++i) { + kfree(self->strs[i]); + } + kfree(self->strs); + } + kfree(self); +} + +void mptcp_rbs_smt_ebpf_execute(struct mptcp_rbs_smt_ebpf *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + BPF_PROG_RUN(self->prog, (struct sk_buff *) ctx); +} + +void mptcp_rbs_smts_free(struct mptcp_rbs_smt *smt) +{ + while (smt) { + struct mptcp_rbs_smt *old_smt = smt; + smt = smt->next; + old_smt->free(old_smt); + } +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +struct mptcp_rbs_smt *mptcp_rbs_smt_clone( + const struct mptcp_rbs_smt *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + switch (smt->kind) { + case SMT_KIND_DROP: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_drop_clone( + (const struct mptcp_rbs_smt_drop *) smt, user_ctx, + user_func); + case SMT_KIND_PRINT: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_print_clone( + (const struct mptcp_rbs_smt_print *) smt, user_ctx, + user_func); + case SMT_KIND_PUSH: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_push_clone( + (const struct mptcp_rbs_smt_push *) smt, user_ctx, + user_func); + case SMT_KIND_SET_USER: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_set_user_clone( + (const struct mptcp_rbs_smt_set_user*) smt, user_ctx, + user_func); + case SMT_KIND_SET: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_set_clone( + (const struct mptcp_rbs_smt_set *) smt, user_ctx, + user_func); + case SMT_KIND_VAR: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_var_clone( + (const struct mptcp_rbs_smt_var *) smt, user_ctx, + user_func); + case SMT_KIND_VOID: + return (struct mptcp_rbs_smt *) mptcp_rbs_smt_void_clone( + (const struct mptcp_rbs_smt_void *) smt, user_ctx, + user_func); + case SMT_KIND_EBPF: { + /* This should never be called */ + BUG_ON(true); + return NULL; + } + } +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +int mptcp_rbs_smt_print(const struct mptcp_rbs_smt *smt, char *buffer) +{ + int len; + int tmp_len; + + switch (smt->kind) { + case SMT_KIND_DROP: { + const struct mptcp_rbs_smt_drop *drop = + (const struct mptcp_rbs_smt_drop *) smt; + + len = sprintf_null(&buffer, "DROP("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) drop->skb, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_PRINT: { + const struct mptcp_rbs_smt_print *print = + (const struct mptcp_rbs_smt_print *) smt; + + len = sprintf_null(&buffer, "PRINT("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) print->msg, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + if (print->arg) { + len += sprintf_null(&buffer, ", "); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) print->arg, + buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + } + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_PUSH: { + const struct mptcp_rbs_smt_push *push = + (const struct mptcp_rbs_smt_push *) smt; + + len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) push->sbf, buffer); + if (buffer) + buffer += len; + len += sprintf_null(&buffer, ".PUSH("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) push->skb, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_SET_USER: { + const struct mptcp_rbs_smt_set_user *set_user = + (const struct mptcp_rbs_smt_set_user *) smt; + + len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) set_user->sbf, buffer); + if (buffer) + buffer += len; + len += sprintf_null(&buffer, ".SET_USER("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) set_user->value, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_SET: { + const struct mptcp_rbs_smt_set *set = + (const struct mptcp_rbs_smt_set *) smt; + + len = sprintf_null(&buffer, "SET(R%d, ", set->reg_number + 1); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) set->value, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ");"); + return len; + } + case SMT_KIND_VAR: { + const struct mptcp_rbs_smt_var *var = + (const struct mptcp_rbs_smt_var *) smt; + + len = sprintf_null(&buffer, "VAR v%d = ", var->var_number + 1); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) var->value, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + if (var->is_lazy) + len += sprintf_null(&buffer, " LAZY"); + len += sprintf_null(&buffer, ";"); + return len; + } + case SMT_KIND_VOID: { + const struct mptcp_rbs_smt_void *void_ = + (const struct mptcp_rbs_smt_void *) smt; + + len = sprintf_null(&buffer, "VOID"); + if (void_->value) { + len += sprintf_null(&buffer, "("); + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) void_->value, + buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + len += sprintf_null(&buffer, ")"); + } + len += sprintf_null(&buffer, ";"); + return len; + } + case SMT_KIND_EBPF: { + const struct mptcp_rbs_smt_ebpf *ebpf = + (const struct mptcp_rbs_smt_ebpf *) smt; + + return mptcp_rbs_ebpf_dump(ebpf->prog, buffer); + } + } +} +#pragma GCC diagnostic pop diff --git a/net/mptcp/mptcp_rbs_smt.h b/net/mptcp/mptcp_rbs_smt.h new file mode 100644 index 0000000000000..d328a9f8b6676 --- /dev/null +++ b/net/mptcp/mptcp_rbs_smt.h @@ -0,0 +1,221 @@ +#ifndef _MPTCP_RBS_SMT_H +#define _MPTCP_RBS_SMT_H + +#include + +struct bpf_prog; +struct mptcp_rbs_eval_ctx; + +#ifndef MPTCP_RBS_CLONE_USER_FUNC_DEFINED +#define MPTCP_RBS_CLONE_USER_FUNC_DEFINED +typedef struct mptcp_rbs_value *(*mptcp_rbs_value_clone_user_func)( + void *user_ctx, const struct mptcp_rbs_value *value); +#endif + +/* Enumeration of statement kinds */ +enum mptcp_rbs_smt_kind { + SMT_KIND_DROP, + SMT_KIND_PRINT, + SMT_KIND_PUSH, + SMT_KIND_SET_USER, + SMT_KIND_SET, + SMT_KIND_VAR, + SMT_KIND_VOID, + SMT_KIND_EBPF +}; + +/* Base struct to store a single statement */ +struct mptcp_rbs_smt { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt *self); + void (*execute)(struct mptcp_rbs_smt *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* Struct to store a drop statement */ +struct mptcp_rbs_smt_drop { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_drop *self); + void (*execute)(struct mptcp_rbs_smt_drop *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_smt_drop_free(struct mptcp_rbs_smt_drop *self); +void mptcp_rbs_smt_drop_execute(struct mptcp_rbs_smt_drop *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_drop *mptcp_rbs_smt_drop_clone( + const struct mptcp_rbs_smt_drop *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a print statement */ +struct mptcp_rbs_smt_print { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_print *self); + void (*execute)(struct mptcp_rbs_smt_print *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_string *msg; + struct mptcp_rbs_value *arg; +}; + +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_new( + struct mptcp_rbs_value_string *msg, struct mptcp_rbs_value *arg); +void mptcp_rbs_smt_print_free(struct mptcp_rbs_smt_print *self); +void mptcp_rbs_smt_print_execute(struct mptcp_rbs_smt_print *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_print *mptcp_rbs_smt_print_clone( + const struct mptcp_rbs_smt_print *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a push statement */ +struct mptcp_rbs_smt_push { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_push *self); + void (*execute)(struct mptcp_rbs_smt_push *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_smt_push_free(struct mptcp_rbs_smt_push *self); +void mptcp_rbs_smt_push_execute(struct mptcp_rbs_smt_push *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_push *mptcp_rbs_smt_push_clone( + const struct mptcp_rbs_smt_push *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a set_user statement */ +struct mptcp_rbs_smt_set_user { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_set_user *self); + void (*execute)(struct mptcp_rbs_smt_set_user *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; + struct mptcp_rbs_value_int *value; +}; + +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_new( + struct mptcp_rbs_value_sbf *sbf, struct mptcp_rbs_value_int *value); +void mptcp_rbs_smt_set_user_free(struct mptcp_rbs_smt_set_user *self); +void mptcp_rbs_smt_set_user_execute(struct mptcp_rbs_smt_set_user *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_set_user *mptcp_rbs_smt_set_user_clone( + const struct mptcp_rbs_smt_set_user *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a set statement */ +struct mptcp_rbs_smt_set { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_set *self); + void (*execute)(struct mptcp_rbs_smt_set *self, + struct mptcp_rbs_eval_ctx *ctx); + int reg_number; + struct mptcp_rbs_value_int *value; +}; + +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_new( + int reg_number, struct mptcp_rbs_value_int *value); +void mptcp_rbs_smt_set_free(struct mptcp_rbs_smt_set *self); +void mptcp_rbs_smt_set_execute(struct mptcp_rbs_smt_set *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_set *mptcp_rbs_smt_set_clone( + const struct mptcp_rbs_smt_set *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a var statement */ +struct mptcp_rbs_smt_var { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_var *self); + void (*execute)(struct mptcp_rbs_smt_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; + bool is_lazy; + struct mptcp_rbs_value *value; +}; + +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_new(int var_number, bool is_lazy, + struct mptcp_rbs_value *value); +void mptcp_rbs_smt_var_free(struct mptcp_rbs_smt_var *self); +void mptcp_rbs_smt_var_execute(struct mptcp_rbs_smt_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_var *mptcp_rbs_smt_var_clone( + const struct mptcp_rbs_smt_var *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store a void statement. This statement is only used for + * measurements! + */ +struct mptcp_rbs_smt_void { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_void *self); + void (*execute)(struct mptcp_rbs_smt_void *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value *value; +}; + +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_new( + struct mptcp_rbs_value *value); +void mptcp_rbs_smt_void_free(struct mptcp_rbs_smt_void *self); +void mptcp_rbs_smt_void_execute(struct mptcp_rbs_smt_void *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_smt_void *mptcp_rbs_smt_void_clone( + const struct mptcp_rbs_smt_void *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* Struct to store generated eBPF code */ +struct mptcp_rbs_smt_ebpf { + struct mptcp_rbs_smt *next; + enum mptcp_rbs_smt_kind kind; + void (*free)(struct mptcp_rbs_smt_ebpf *self); + void (*execute)(struct mptcp_rbs_smt_ebpf *self, + struct mptcp_rbs_eval_ctx *ctx); + struct bpf_prog *prog; + char **strs; + int strs_len; +}; + +struct mptcp_rbs_smt_ebpf *mptcp_rbs_smt_ebpf_new(struct bpf_prog *prog, + char **strs, int strs_len); +void mptcp_rbs_smt_ebpf_free(struct mptcp_rbs_smt_ebpf *self); +void mptcp_rbs_smt_ebpf_execute(struct mptcp_rbs_smt_ebpf *self, + struct mptcp_rbs_eval_ctx *ctx); + +/* + * Releases all statements starting with the given one + */ +void mptcp_rbs_smts_free(struct mptcp_rbs_smt *smt); + +/* + * Creates a copy of a statement and all its subvalues + * @smt: The statement to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_smt *mptcp_rbs_smt_clone( + const struct mptcp_rbs_smt *smt, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Writes a string representation of a statement to the given buffer + * @smt: The statement + * @buffer: Pointer to the buffer where the string should be stored or NULL + * @return: Number of written characters + */ +int mptcp_rbs_smt_print(const struct mptcp_rbs_smt *smt, char *buffer); + +#endif diff --git a/net/mptcp/mptcp_rbs_type.c b/net/mptcp/mptcp_rbs_type.c new file mode 100644 index 0000000000000..22150f753a953 --- /dev/null +++ b/net/mptcp/mptcp_rbs_type.c @@ -0,0 +1,18 @@ +#include "mptcp_rbs_type.h" + +/* Array to map mptcp_rbs_type_kind items to their names */ +static const char *type_names[] = { + "null", + "boolean", + "integer", + "string", + "subflow", + "subflow list", + "sockbuffer", + "sockbuffer list" +}; + +const char *mptcp_rbs_type_get_name(enum mptcp_rbs_type_kind type) +{ + return type_names[type]; +} diff --git a/net/mptcp/mptcp_rbs_type.h b/net/mptcp/mptcp_rbs_type.h new file mode 100644 index 0000000000000..591d802f9cf35 --- /dev/null +++ b/net/mptcp/mptcp_rbs_type.h @@ -0,0 +1,21 @@ +#ifndef _MPTCP_RBS_TYPE_H +#define _MPTCP_RBS_TYPE_H + +/* Enumeration of type kinds */ +enum mptcp_rbs_type_kind { + TYPE_KIND_NULL, + TYPE_KIND_BOOL, + TYPE_KIND_INT, + TYPE_KIND_STRING, + TYPE_KIND_SBF, + TYPE_KIND_SBFLIST, + TYPE_KIND_SKB, + TYPE_KIND_SKBLIST +}; + +/* + * Returns the name of a type + */ +const char *mptcp_rbs_type_get_name(enum mptcp_rbs_type_kind type); + +#endif diff --git a/net/mptcp/mptcp_rbs_user.c b/net/mptcp/mptcp_rbs_user.c new file mode 100644 index 0000000000000..eb9e54d9db843 --- /dev/null +++ b/net/mptcp/mptcp_rbs_user.c @@ -0,0 +1,932 @@ +#include "mptcp_rbs_user.h" +#include "mptcp_rbs_optimizer.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_sched.h" +#include "mptcp_rbs_scheduler.h" +#include <../fs/proc/internal.h> +#include +#include +#include + +struct rbs_dir; + +struct rbs_sub_dir { + struct rbs_sub_dir *next; + struct mptcp_rbs_scheduler *scheduler; + struct proc_dir_entry *dir; +}; + +struct rbs_dir { + struct rbs_dir *next; + struct proc_dir_entry *dir; + struct rbs_sub_dir *first_sub_dir; +}; + +static struct rbs_dir *first_rbs_dir = NULL; +static DEFINE_MUTEX(delete_mutex); +static struct mptcp_rbs_scheduler *schedulers_to_delete = NULL; + +static void link_rbs_sub_dir(struct rbs_dir *dir, struct rbs_sub_dir *sub_dir) +{ + sub_dir->next = dir->first_sub_dir; + dir->first_sub_dir = sub_dir; +} + +static void unlink_rbs_sub_dir(struct rbs_dir *dir, struct rbs_sub_dir *sub_dir) +{ + struct rbs_sub_dir *tmp = dir->first_sub_dir; + struct rbs_sub_dir *prev = NULL; + + while (tmp != sub_dir) { + if (!tmp) + return; + + prev = tmp; + tmp = tmp->next; + } + + if (!prev) + dir->first_sub_dir = sub_dir->next; + else + prev->next = sub_dir->next; + + sub_dir->next = NULL; +} + +static void link_rbs_dir(struct rbs_dir *dir) +{ + dir->next = first_rbs_dir; + first_rbs_dir = dir; +} + +static void unlink_rbs_dir(struct rbs_dir *dir) +{ + struct rbs_dir *tmp = first_rbs_dir; + struct rbs_dir *prev = NULL; + + while (tmp != dir) { + if (!tmp) + return; + + prev = tmp; + tmp = tmp->next; + } + + if (!prev) + first_rbs_dir = dir->next; + else + prev->next = dir->next; + + dir->next = NULL; +} + +static void link_scheduler_to_delete(struct mptcp_rbs_scheduler *scheduler) +{ + struct mptcp_rbs_scheduler *tmp; + + mutex_lock(&delete_mutex); + + /* Check if the scheduler is already in the list */ + tmp = schedulers_to_delete; + while (tmp) { + if (tmp == scheduler) + break; + tmp = tmp->next; + } + + if (tmp != scheduler) { + scheduler->next = schedulers_to_delete; + schedulers_to_delete = scheduler; + } + + mutex_unlock(&delete_mutex); +} + +static bool create_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler); +static void remove_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler); + +static void delete_scheduler(struct work_struct *work) +{ + struct mptcp_rbs_scheduler *scheduler; + struct rbs_dir *rbs_dir; + + mutex_lock(&delete_mutex); + + while (schedulers_to_delete) { + scheduler = schedulers_to_delete->next; + + /* Delete the proc entries for the scheduler in all rbs + * directories + */ + rbs_dir = first_rbs_dir; + while (rbs_dir) { + remove_scheduler_sub_dir(rbs_dir, schedulers_to_delete); + rbs_dir = rbs_dir->next; + } + + /* Remove the CFG */ + mptcp_rbs_scheduler_free(schedulers_to_delete); + + schedulers_to_delete = scheduler; + } + + mutex_unlock(&delete_mutex); +} + +static DECLARE_WORK(delete_task, delete_scheduler); + +/* + * info proc entry + */ + +static int info_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "Number of OOO advanced hits %u\n", mptcp_ooo_number_matches); + + return 0; +} + +static int info_open(struct inode *inode, struct file *file) +{ + return single_open(file, info_show, PDE_DATA(inode)); +} + +static const struct file_operations info_file_ops = { + .owner = THIS_MODULE, + .open = info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * schedulers proc entry + */ + +static int schedulers_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = + mptcp_rbs_scheduler_get_registered(); + + seq_printf(seq, "id: scheduler name\n"); + while (scheduler) { + seq_printf(seq, "%p: %s\n", scheduler, scheduler->name); + scheduler = scheduler->next; + } + + return 0; +} + +static int schedulers_open(struct inode *inode, struct file *file) +{ + return single_open(file, schedulers_show, PDE_DATA(inode)); +} + +struct strbuffer { + char *start; + int len; +}; + +static ssize_t schedulers_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + struct strbuffer *buffer = + ((struct seq_file *) file->private_data)->private; + + if (!size) + return 0; + + if (buffer) { + char *new_start = + krealloc(buffer->start, buffer->len + size + 1, GFP_ATOMIC); + if (!new_start) { + printk("RBS: Could not allocate memory for scheduler " + "code\n"); + return -1; + } + + buffer->start = new_start; + } else { + buffer = kmalloc(sizeof(struct strbuffer), GFP_KERNEL); + if (!buffer) { + printk("RBS: Could not allocate memory for scheduler " + "code\n"); + return -1; + } + + buffer->len = 0; + buffer->start = kmalloc(size + 1, GFP_KERNEL); + if (!buffer->start) { + kfree(buffer); + printk("RBS: Could not allocate memory for scheduler " + "code\n"); + return -1; + } + } + ((struct seq_file *) file->private_data)->private = buffer; + + buffer->start[buffer->len + size] = 0; + copy_from_user(&buffer->start[buffer->len], buf, size); + buffer->len += size; + + return size; +} + +static int schedulers_release(struct inode *inode, struct file *file) +{ + struct strbuffer *buffer = + ((struct seq_file *) file->private_data)->private; + struct mptcp_rbs_scheduler *scheduler; + struct rbs_dir *rbs_dir; + + if (!buffer) + return 0; + + /* Parse the scheduler */ + scheduler = mptcp_rbs_scheduler_parse(buffer->start); + kfree(buffer->start); + kfree(buffer); + if (!scheduler) + return -1; + + /* Optimize the scheduler */ + if (mptcp_rbs_opts_enabled) { + mptcp_rbs_optimize(&scheduler->variations[0], &scheduler->del, + 0, false); + } + + /* Add the scheduler */ + if (!strcmp(scheduler->name, "info") || + !strcmp(scheduler->name, "schedulers") || + !strcmp(scheduler->name, "default") || + !mptcp_rbs_scheduler_register(scheduler)) { + mptcp_rbs_scheduler_free(scheduler); + return -2; + } + + /* Create the proc entries for the scheduler in every rbs directory */ + rbs_dir = first_rbs_dir; + while (rbs_dir) { + create_scheduler_sub_dir(rbs_dir, scheduler); + rbs_dir = rbs_dir->next; + } + + return 0; +} + +static const struct file_operations schedulers_file_ops = { + .owner = THIS_MODULE, + .open = schedulers_open, + .read = seq_read, + .write = schedulers_write, + .llseek = seq_lseek, + .release = schedulers_release, +}; + +/* + * default proc entry + */ + +static int default_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%s\n", mptcp_rbs_scheduler_get_default()->name); + + return 0; +} + +static int default_open(struct inode *inode, struct file *file) +{ + return single_open(file, default_show, PDE_DATA(inode)); +} + +static ssize_t default_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + char *str; + char *trimmed_str; + struct mptcp_rbs_scheduler *scheduler = NULL; + const struct mptcp_rbs_scheduler *tmp = + mptcp_rbs_scheduler_get_registered(); + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Find scheduler with the given name */ + while (tmp) { + if (!strcmp(trimmed_str, tmp->name)) { + scheduler = (struct mptcp_rbs_scheduler *) tmp; + break; + } + + tmp = tmp->next; + } + kfree(str); + + if (!scheduler) + return -1; + + mptcp_rbs_scheduler_set_default(scheduler); + return size; +} + +static const struct file_operations default_file_ops = { + .owner = THIS_MODULE, + .open = default_open, + .read = seq_read, + .write = default_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * opt proc entry + */ + +int mptcp_rbs_opts_enabled = +#ifdef CONFIG_MPTCP_RBSOPT +#ifdef CONFIG_MPTCP_RBSEBPF + 2 +#else + 1 +#endif +#else + 0 +#endif + ; + +static int opt_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "%d\n", mptcp_rbs_opts_enabled); + return 0; +} + +static int opt_open(struct inode *inode, struct file *file) +{ + return single_open(file, opt_show, PDE_DATA(inode)); +} + +static ssize_t opt_write(struct file *file, const char __user *buf, size_t size, + loff_t *offset) +{ + char *str; + char *trimmed_str; + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + if (!strcmp(trimmed_str, "0")) + mptcp_rbs_opts_enabled = 0; +#ifdef CONFIG_MPTCP_RBSOPT + else if (!strcmp(trimmed_str, "1")) + mptcp_rbs_opts_enabled = 1; +#ifdef CONFIG_MPTCP_RBSEBPF + else if (!strcmp(trimmed_str, "2")) + mptcp_rbs_opts_enabled = 2; +#endif +#endif + else + return -1; + + return size; +} + +static const struct file_operations opt_file_ops = { + .owner = THIS_MODULE, + .open = opt_open, + .read = seq_read, + .write = opt_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * measurements proc entry for default and round robin scheduler + */ + +extern u64 total_default_time_skb; +extern u64 total_default_time_no_skb; +extern u64 total_default_count_skb; +extern u64 total_default_count_no_skb; + +extern u64 total_rr_time_skb; +extern u64 total_rr_time_no_skb; +extern u64 total_rr_count_skb; +extern u64 total_rr_count_no_skb; + +static int measurements2_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "Default:\n"); + seq_printf(seq, " state executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + seq_printf(seq, " no_skb %13llu %13llu\n", + total_default_count_no_skb, total_default_time_no_skb); + seq_printf(seq, " skb %13llu %13llu\n", total_default_count_skb, + total_default_time_skb); +#endif + + seq_printf(seq, "\n"); + seq_printf(seq, "Round Robin:\n"); + seq_printf(seq, " state executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + seq_printf(seq, " no_skb %13llu %13llu\n", total_rr_count_no_skb, + total_rr_time_no_skb); + seq_printf(seq, " skb %13llu %13llu\n", total_rr_count_skb, + total_rr_time_skb); +#endif + + return 0; +} + +static int measurements2_open(struct inode *inode, struct file *file) +{ + return single_open(file, measurements2_show, PDE_DATA(inode)); +} + +static ssize_t measurements2_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + char *str; + char *trimmed_str; + bool b; + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Check if value == "0" */ + if (!strtobool(trimmed_str, &b) && !b) { +#ifdef CONFIG_MPTCP_RBSMEASURE + total_default_time_skb = 0; + total_default_time_no_skb = 0; + total_default_count_skb = 0; + total_default_count_no_skb = 0; + + total_rr_time_skb = 0; + total_rr_time_no_skb = 0; + total_rr_count_skb = 0; + total_rr_count_no_skb = 0; +#endif + kfree(str); + return size; + } + kfree(str); + + return -1; +} + +static const struct file_operations measurements2_file_ops = { + .owner = THIS_MODULE, + .open = measurements2_open, + .read = seq_read, + .write = measurements2_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * delete proc entry (per scheduler) + */ + +static int delete_show(struct seq_file *seq, void *v) +{ + seq_printf(seq, "0\n"); + + return 0; +} + +static int delete_open(struct inode *inode, struct file *file) +{ + return single_open(file, delete_show, PDE_DATA(inode)); +} + +static ssize_t delete_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + struct mptcp_rbs_scheduler *scheduler = + ((struct seq_file *) file->private_data)->private; + char *str; + char *trimmed_str; + bool b; + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Check if value == "1" */ + if (!strtobool(trimmed_str, &b) && b) { + scheduler->del = true; + kfree(str); + return size; + } + kfree(str); + + return -1; +} + +static int delete_release(struct inode *inode, struct file *file) +{ + struct mptcp_rbs_scheduler *scheduler = PDE_DATA(inode); + int result = single_release(inode, file); + + if (scheduler->del) { + /* Check if the scheduler is in use */ + if (scheduler->usage > 0 || + scheduler == mptcp_rbs_scheduler_get_default()) { + /* Cannot delete the scheduler */ + scheduler->del = false; + } else { + /* Remove the scheduler from usable ones */ + mptcp_rbs_scheduler_unregister(scheduler); + + /* Put the scheduler in the queue of schedulers to + * delete + */ + link_scheduler_to_delete(scheduler); + + /* Schedule the delete task because we cannot delete the + * delete proc entry here + */ + schedule_work(&delete_task); + } + } + + return result; +} + +static const struct file_operations delete_file_ops = { + .owner = THIS_MODULE, + .open = delete_open, + .read = seq_read, + .write = delete_write, + .llseek = seq_lseek, + .release = delete_release, +}; + +/* + * dump proc entry (per scheduler) + */ + +static int dump_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = seq->private; + int len = 0; + int variation_count = 0; + int i; + int pos; + char *str; + + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + len += strlen("Subflows :\n") + (i > 9 ? 2 : 1) + 1 + + mptcp_rbs_scheduler_print(scheduler, i, NULL); + ++variation_count; + } + + str = kzalloc(len + 1, GFP_KERNEL); + if (!str) { + seq_printf(seq, "\n"); + return 0; + } + + pos = 0; + for (i = 0; i < variation_count; ++i) { + pos += sprintf(&str[pos], "Subflows %d:\n", + scheduler->variations[i].sbf_num); + pos += mptcp_rbs_scheduler_print(scheduler, i, &str[pos]); + pos += sprintf(&str[pos], "\n"); + } + + seq_printf(seq, "%s", str); + kfree(str); + + return 0; +} + +static int dump_open(struct inode *inode, struct file *file) +{ + return single_open(file, dump_show, PDE_DATA(inode)); +} + +static const struct file_operations dump_file_ops = { + .owner = THIS_MODULE, + .open = dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * measurements proc entry (per scheduler) + */ + +static int measurements_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = seq->private; +#ifdef CONFIG_MPTCP_RBSMEASURE + int i; +#endif + + seq_printf(seq, " subflows executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + seq_printf(seq, "%10d %13llu %13llu\n", + scheduler->variations[i].sbf_num, + scheduler->variations[i].exec_count, + scheduler->variations[i].total_time); + } +#endif + + seq_printf(seq, "\n"); + seq_printf(seq, " state executions time\n"); + +#ifdef CONFIG_MPTCP_RBSMEASURE + seq_printf(seq, "noa_no_skb %13llu %13llu\n", + scheduler->total_count_noa_no_skb, + scheduler->total_time_noa_no_skb); + seq_printf(seq, " noa_skb %13llu %13llu\n", + scheduler->total_count_noa_skb, + scheduler->total_time_noa_skb); + seq_printf(seq, " oa_skb %13llu %13llu\n", + scheduler->total_count_oa_skb, scheduler->total_time_oa_skb); + seq_printf(seq, "execno_skb %13llu %13llu\n", + scheduler->total_exec_count_no_skb, + scheduler->total_exec_time_no_skb); + seq_printf(seq, " exec_skb %13llu %13llu\n", + scheduler->total_exec_count_skb, + scheduler->total_exec_time_skb); +#endif + + return 0; +} + +/* + * scheduler info proc entry (per scheduler) + */ + +static int scheduler_info_show(struct seq_file *seq, void *v) +{ + const struct mptcp_rbs_scheduler *scheduler = seq->private; + + seq_printf(seq, "Total bytes sent %llu\n", scheduler->total_bytes_sent); + return 0; +} + +static int measurements_open(struct inode *inode, struct file *file) +{ + return single_open(file, measurements_show, PDE_DATA(inode)); +} + +static ssize_t measurements_write(struct file *file, const char __user *buf, + size_t size, loff_t *offset) +{ + struct mptcp_rbs_scheduler *scheduler = + ((struct seq_file *) file->private_data)->private; + char *str; + char *trimmed_str; + bool b; +#ifdef CONFIG_MPTCP_RBSMEASURE + int i; +#endif + + if (size == 0) + return 0; + + str = kzalloc(size + 1, GFP_KERNEL); + copy_from_user(str, buf, size); + trimmed_str = strim(str); + + /* Check if value == "0" */ + if (!strtobool(trimmed_str, &b) && !b) { +#ifdef CONFIG_MPTCP_RBSMEASURE + for (i = 0; i < MPTCP_RBS_VARIATION_COUNT; ++i) { + if (!scheduler->variations[i].first_block) + break; + + scheduler->variations[i].exec_count = 0; + scheduler->variations[i].total_time = 0; + } + + scheduler->total_count_noa_no_skb = 0; + scheduler->total_time_noa_no_skb = 0; + scheduler->total_count_noa_skb = 0; + scheduler->total_time_noa_skb = 0; + scheduler->total_count_oa_skb = 0; + scheduler->total_time_oa_skb = 0; + scheduler->total_exec_count_no_skb = 0; + scheduler->total_exec_time_no_skb = 0; + scheduler->total_exec_count_skb = 0; + scheduler->total_exec_time_skb = 0; +#endif + kfree(str); + return size; + } + kfree(str); + + return -1; +} + +static const struct file_operations measurements_file_ops = { + .owner = THIS_MODULE, + .open = measurements_open, + .read = seq_read, + .write = measurements_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int scheduler_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, scheduler_info_show, PDE_DATA(inode)); +} + +static const struct file_operations scheduler_info_file_ops = { + .owner = THIS_MODULE, + .open = scheduler_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static bool create_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler) +{ + struct proc_dir_entry *dir; + struct rbs_sub_dir *rbs_sub_dir; + + dir = proc_mkdir(scheduler->name, rbs_dir->dir); + if (!dir) + return false; + + rbs_sub_dir = kmalloc(sizeof(struct rbs_sub_dir), GFP_KERNEL); + rbs_sub_dir->scheduler = scheduler; + rbs_sub_dir->dir = dir; + link_rbs_sub_dir(rbs_dir, rbs_sub_dir); + + mutex_lock(&delete_mutex); + proc_create_data("delete", S_IRUGO, dir, &delete_file_ops, scheduler); + proc_create_data("dump", S_IRUGO, dir, &dump_file_ops, scheduler); + proc_create_data("measurements", S_IRUGO, dir, &measurements_file_ops, + scheduler); + proc_create_data("info", S_IRUGO, dir, &scheduler_info_file_ops, scheduler); + mutex_unlock(&delete_mutex); + + return true; +} + +static void remove_scheduler_sub_dir(struct rbs_dir *rbs_dir, + struct mptcp_rbs_scheduler *scheduler) +{ + struct rbs_sub_dir *rbs_sub_dir = rbs_dir->first_sub_dir; + + while (rbs_sub_dir) { + if (rbs_sub_dir->scheduler == scheduler) { + remove_proc_entry("delete", rbs_sub_dir->dir); + remove_proc_entry("dump", rbs_sub_dir->dir); + remove_proc_entry("measurements", rbs_sub_dir->dir); + remove_proc_entry("info", rbs_sub_dir->dir); + remove_proc_entry(scheduler->name, rbs_dir->dir); + + unlink_rbs_sub_dir(rbs_dir, rbs_sub_dir); + kfree(rbs_sub_dir); + break; + } + + rbs_sub_dir = rbs_sub_dir->next; + } +} + +static int init_subsys(struct net *net) +{ +#ifndef NS3 + struct rbs_dir *rbs_dir; + struct proc_dir_entry *dir; + struct mptcp_rbs_scheduler *scheduler; + + rbs_dir = kmalloc(sizeof(struct rbs_dir), GFP_KERNEL); + + dir = proc_mkdir_data("rbs", 0, net->mptcp.proc_net_mptcp, rbs_dir); + if (!dir) { + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create_data("info", S_IRUGO, dir, &info_file_ops, net)) { + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create_data("schedulers", S_IRUGO, dir, &schedulers_file_ops, + NULL)) { + remove_proc_entry("info", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create("default", S_IRUGO, dir, &default_file_ops)) { + remove_proc_entry("schedulers", dir); + remove_proc_entry("info", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create("opt", S_IRUGO, dir, &opt_file_ops)) { + remove_proc_entry("schedulers", dir); + remove_proc_entry("info", dir); + remove_proc_entry("default", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + if (!proc_create("measurements", S_IRUGO, dir, + &measurements2_file_ops)) { + remove_proc_entry("schedulers", dir); + remove_proc_entry("info", dir); + remove_proc_entry("default", dir); + remove_proc_entry("opt", dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + kfree(rbs_dir); + return -ENOMEM; + } + + rbs_dir->dir = dir; + rbs_dir->first_sub_dir = NULL; + link_rbs_dir(rbs_dir); + net->mptcp.proc_net_mptcp_rbs = dir; + + scheduler = mptcp_rbs_scheduler_get_registered(); + while (scheduler) { + create_scheduler_sub_dir(rbs_dir, scheduler); + scheduler = scheduler->next; + } +#endif + + return 0; +} + +static void exit_subsys(struct net *net) +{ +#ifndef NS3 + struct rbs_dir *rbs_dir; + struct mptcp_rbs_scheduler *scheduler; + + if (!net->mptcp.proc_net_mptcp_rbs) + return; + + rbs_dir = net->mptcp.proc_net_mptcp_rbs->data; + + scheduler = mptcp_rbs_scheduler_get_registered(); + while (scheduler) { + remove_scheduler_sub_dir(rbs_dir, scheduler); + scheduler = scheduler->next; + } + + remove_proc_entry("info", rbs_dir->dir); + remove_proc_entry("schedulers", rbs_dir->dir); + remove_proc_entry("default", rbs_dir->dir); + remove_proc_entry("opt", rbs_dir->dir); + remove_proc_entry("measurements", rbs_dir->dir); + remove_proc_entry("rbs", net->mptcp.proc_net_mptcp); + net->mptcp.proc_net_mptcp_rbs = NULL; + + unlink_rbs_dir(rbs_dir); + kfree(rbs_dir); +#endif +} + +static struct pernet_operations proc_ops = { + .init = init_subsys, + .exit = exit_subsys, +}; + +bool mptcp_rbs_user_interface_init(void) +{ + return !register_pernet_subsys(&proc_ops); +} diff --git a/net/mptcp/mptcp_rbs_user.h b/net/mptcp/mptcp_rbs_user.h new file mode 100644 index 0000000000000..ccb23589f4370 --- /dev/null +++ b/net/mptcp/mptcp_rbs_user.h @@ -0,0 +1,19 @@ +#ifndef _MPTCP_RBS_USER_H +#define _MPTCP_RBS_USER_H + +#include + +/* Variable to indicate that optimizations are enabled. + * 0: Optimizations are disabled + * 1: CFG based optimizations enabled + * 2: CFG based optimizations + eBPF code generation enabled + */ +extern int mptcp_rbs_opts_enabled; + +/* + * Initializes the user proc interface + * @return: false if an error occurred + */ +bool mptcp_rbs_user_interface_init(void); + +#endif diff --git a/net/mptcp/mptcp_rbs_value.c b/net/mptcp/mptcp_rbs_value.c new file mode 100644 index 0000000000000..6d34946ad4bb3 --- /dev/null +++ b/net/mptcp/mptcp_rbs_value.c @@ -0,0 +1,5518 @@ +#include "mptcp_rbs_value.h" +#include "mptcp_rbs_ctx.h" +#include "mptcp_rbs_lexer.h" +#include "mptcp_rbs_parser.h" +#include "mptcp_rbs_queue.h" +#include "mptcp_rbs_sched.h" +#include + +/* Macro to clone values */ +#define APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + case ENUM: { \ + return (struct mptcp_rbs_value *) STRUCT##_clone( \ + ctx, (const struct STRUCT *) value); \ + } + +/* Macro to print values */ +#define APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) \ + case ENUM: { \ + return STRUCT##_print((const struct STRUCT *) value, buffer); \ + } + +/* Macro to get the type of a value */ +#define APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) \ + case ENUM: \ + return RETURNTYPE; + +/* Context only for cloning. This is necessary since we cannot simply copy + * sbf/skb progress references to filters. Instead we have to replace the + * pointer. Note that we assume that no more than MAX_NESTING filters are nested + */ +struct mptcp_rbs_value_clone_ctx { +#define MAX_NESTING 10 + struct { + const void *repl; + void *repl_with; + } repls[MAX_NESTING]; + void *user_ctx; + mptcp_rbs_value_clone_user_func user_func; +}; + +struct mptcp_rbs_value *mptcp_rbs_value_clone_ex( + struct mptcp_rbs_value_clone_ctx *ctx, const struct mptcp_rbs_value *value); + +#define CLONE(val) \ + val = (typeof(val)) mptcp_rbs_value_clone_ex( \ + ctx, (struct mptcp_rbs_value *) val) + +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_new(unsigned int num) +{ + struct mptcp_rbs_value_constint *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_constint), GFP_KERNEL); + value->kind = VALUE_KIND_CONSTINT; + value->free = &mptcp_rbs_value_constint_free; + value->execute = &mptcp_rbs_value_constint_execute; + value->value = num; + + return value; +} + +void mptcp_rbs_value_constint_free(struct mptcp_rbs_value_constint *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_constint_execute(struct mptcp_rbs_value_constint *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return self->value; +} + +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_constint *value) +{ + struct mptcp_rbs_value_constint *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_constint), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_constint_print(const struct mptcp_rbs_value_constint *value, + char *buffer) +{ + return sprintf_null(&buffer, "%u", value->value); +} + +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_new(char *str) +{ + struct mptcp_rbs_value_conststring *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_conststring), GFP_KERNEL); + value->kind = VALUE_KIND_CONSTSTRING; + value->free = &mptcp_rbs_value_conststring_free; + value->execute = &mptcp_rbs_value_conststring_execute; + value->value = str; + + return value; +} + +void mptcp_rbs_value_conststring_free(struct mptcp_rbs_value_conststring *self) +{ + kfree(self->value); + kfree(self); +} + +char *mptcp_rbs_value_conststring_execute( + struct mptcp_rbs_value_conststring *self, struct mptcp_rbs_eval_ctx *ctx) +{ + return self->value; +} + +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_conststring *value) +{ + struct mptcp_rbs_value_conststring *clone; + int len; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_conststring), GFP_KERNEL); + *clone = *value; + len = strlen(value->value); + clone->value = kmalloc(len + 1, GFP_KERNEL); + memcpy(clone->value, value->value, len + 1); + + return clone; +} + +int mptcp_rbs_value_conststring_print( + const struct mptcp_rbs_value_conststring *value, char *buffer) +{ + int len = 0; + char *str; + + len = replace_with_escape_chars(value->value, false); + if (!buffer) + return len + 2; + + str = kmalloc(len + 1, GFP_KERNEL); + memcpy(str, value->value, strlen(value->value) + 1); + replace_with_escape_chars(str, true); + len = sprintf_null(&buffer, "\"%s\"", str); + kfree(str); + return len; +} + +struct mptcp_rbs_value_null *mptcp_rbs_value_null_new(void) +{ + struct mptcp_rbs_value_null *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_null), GFP_KERNEL); + value->kind = VALUE_KIND_NULL; + value->free = &mptcp_rbs_value_null_free; + value->execute = &mptcp_rbs_value_null_execute; + + return value; +} + +void mptcp_rbs_value_null_free(struct mptcp_rbs_value_null *self) +{ + kfree(self); +} + +s32 mptcp_rbs_value_null_execute(struct mptcp_rbs_value_null *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return -1; +} + +struct mptcp_rbs_value_null *mptcp_rbs_value_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_null *value) +{ + struct mptcp_rbs_value_null *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_null), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_null_print(const struct mptcp_rbs_value_null *value, + char *buffer) +{ + return sprintf_null(&buffer, "NULL"); +} + +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_new(int var_number) +{ + struct mptcp_rbs_value_bool_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_bool_var), GFP_KERNEL); + value->kind = VALUE_KIND_BOOL_VAR; + value->free = mptcp_rbs_value_bool_var_free; + value->execute = mptcp_rbs_value_bool_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_bool_var_free(struct mptcp_rbs_value_bool_var *self) +{ + kfree(self); +} + +s32 mptcp_rbs_value_bool_var_execute(struct mptcp_rbs_value_bool_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_bool *value = + (struct mptcp_rbs_value_bool *) var->lazy_value; + var->bool_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->bool_value; +} + +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_bool_var *value) +{ + struct mptcp_rbs_value_bool_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_bool_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_bool_var_print(const struct mptcp_rbs_value_bool_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_new(int var_number) +{ + struct mptcp_rbs_value_int_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_int_var), GFP_KERNEL); + value->kind = VALUE_KIND_INT_VAR; + value->free = mptcp_rbs_value_int_var_free; + value->execute = mptcp_rbs_value_int_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_int_var_free(struct mptcp_rbs_value_int_var *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_int_var_execute(struct mptcp_rbs_value_int_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_int *value = + (struct mptcp_rbs_value_int *) var->lazy_value; + var->int_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->int_value; +} + +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_int_var *value) +{ + struct mptcp_rbs_value_int_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_int_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_int_var_print(const struct mptcp_rbs_value_int_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_new( + int var_number) +{ + struct mptcp_rbs_value_string_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_string_var), GFP_KERNEL); + value->kind = VALUE_KIND_STRING_VAR; + value->free = mptcp_rbs_value_string_var_free; + value->execute = mptcp_rbs_value_string_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_string_var_free(struct mptcp_rbs_value_string_var *self) +{ + kfree(self); +} + +char *mptcp_rbs_value_string_var_execute( + struct mptcp_rbs_value_string_var *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_string *value = + (struct mptcp_rbs_value_string *) var->lazy_value; + var->string_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->string_value; +} + +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_string_var *value) +{ + struct mptcp_rbs_value_string_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_string_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_string_var_print( + const struct mptcp_rbs_value_string_var *value, char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_new(int var_number) +{ + struct mptcp_rbs_value_sbf_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_var), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_VAR; + value->free = mptcp_rbs_value_sbf_var_free; + value->execute = mptcp_rbs_value_sbf_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_sbf_var_free(struct mptcp_rbs_value_sbf_var *self) +{ + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_var_execute( + struct mptcp_rbs_value_sbf_var *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_sbf *value = + (struct mptcp_rbs_value_sbf *) var->lazy_value; + var->sbf_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->sbf_value; +} + +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_var *value) +{ + struct mptcp_rbs_value_sbf_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_sbf_var_print(const struct mptcp_rbs_value_sbf_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_new( + int var_number) +{ + struct mptcp_rbs_value_sbf_list_var *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_var), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_VAR; + value->free = mptcp_rbs_value_sbf_list_var_free; + value->execute = mptcp_rbs_value_sbf_list_var_execute; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_sbf_list_var_free( + struct mptcp_rbs_value_sbf_list_var *self) +{ + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_var_execute( + struct mptcp_rbs_value_sbf_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + struct tcp_sock **entry; + +printk("%s self %p *prev %p var->is_lazy %u coming from %pS for meta_sk %p\n", __func__, self, *prev, var->is_lazy, __builtin_return_address(0), ctx->mpcb->meta_sk); + + if (var->is_lazy) { + struct mptcp_rbs_value_sbf_list *value = + (struct mptcp_rbs_value_sbf_list *) var->lazy_value; + return value->execute(value, ctx, prev, is_null); + } + + if (!var->sbf_list_value) { + *is_null = true; + return NULL; + } + *is_null = false; + + if (*prev) + entry = ((struct tcp_sock **) *prev) + 1; + else + entry = var->sbf_list_value; + + if (*entry) + *prev = entry; + +printk("%s returns %p with is at %p\n", __func__, *entry, entry); + + return *entry; +} + +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_var *value) +{ + struct mptcp_rbs_value_sbf_list_var *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_sbf_list_var_print( + const struct mptcp_rbs_value_sbf_list_var *value, char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_new(int var_number, + bool reinject) +{ + struct mptcp_rbs_value_skb_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_var), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_VAR; + value->free = mptcp_rbs_value_skb_var_free; + value->execute = mptcp_rbs_value_skb_var_execute; + value->reinject = reinject; + value->var_number = var_number; + + return value; +} + +void mptcp_rbs_value_skb_var_free(struct mptcp_rbs_value_skb_var *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_var_execute( + struct mptcp_rbs_value_skb_var *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + + if (var->is_lazy) { + struct mptcp_rbs_value_skb *value = + (struct mptcp_rbs_value_skb *) var->lazy_value; + var->skb_value = value->execute(value, ctx); + var->is_lazy = false; + } + + return var->skb_value; +} + +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_var *value) +{ + struct mptcp_rbs_value_skb_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_skb_var_print(const struct mptcp_rbs_value_skb_var *value, + char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_new( + int var_number, enum mptcp_rbs_value_kind underlying_queue_kind) +{ + struct mptcp_rbs_value_skb_list_var *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_var), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_VAR; + value->free = mptcp_rbs_value_skb_list_var_free; + value->execute = mptcp_rbs_value_skb_list_var_execute; + value->var_number = var_number; + value->underlying_queue_kind = underlying_queue_kind; + + return value; +} + +void mptcp_rbs_value_skb_list_var_free( + struct mptcp_rbs_value_skb_list_var *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_var_execute( + struct mptcp_rbs_value_skb_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct mptcp_rbs_var *var = &ctx->vars[self->var_number]; + struct sk_buff **entry; + + if (var->is_lazy) { + struct mptcp_rbs_value_skb_list *value = + (struct mptcp_rbs_value_skb_list *) var->lazy_value; + return value->execute(value, ctx, prev, is_null); + } + + if (!var->skb_list_value) { + *is_null = true; + return NULL; + } + *is_null = false; + + if (*prev) + entry = ((struct sk_buff **) *prev) + 1; + else + entry = var->skb_list_value; + + if (*entry) + *prev = entry; + + return *entry; +} + +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_var *value) +{ + struct mptcp_rbs_value_skb_list_var *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_var), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_skb_list_var_print( + const struct mptcp_rbs_value_skb_list_var *value, char *buffer) +{ + return sprintf_null(&buffer, "v%d", value->var_number + 1); +} + +struct mptcp_rbs_value_not *mptcp_rbs_value_not_new( + struct mptcp_rbs_value_bool *operand) +{ + struct mptcp_rbs_value_not *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_not), GFP_KERNEL); + value->kind = VALUE_KIND_NOT; + value->free = &mptcp_rbs_value_not_free; + value->execute = &mptcp_rbs_value_not_execute; + value->operand = operand; + + return value; +} + +void mptcp_rbs_value_not_free(struct mptcp_rbs_value_not *self) +{ + MPTCP_RBS_VALUE_FREE(self->operand); + kfree(self); +} + +s32 mptcp_rbs_value_not_execute(struct mptcp_rbs_value_not *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s32 b = self->operand->execute(self->operand, ctx); + + if (b == 0) + return 1; + if (b == -1) + return -1; + return 0; +} + +struct mptcp_rbs_value_not *mptcp_rbs_value_not_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_not *value) +{ + struct mptcp_rbs_value_not *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_not), GFP_KERNEL); + *clone = *value; + CLONE(clone->operand); + + return clone; +} + +int mptcp_rbs_value_not_print(const struct mptcp_rbs_value_not *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "!"); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + return len; +} + +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_equal *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_equal), GFP_KERNEL); + value->kind = VALUE_KIND_EQUAL; + value->free = &mptcp_rbs_value_equal_free; + value->execute = &mptcp_rbs_value_equal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_equal_free(struct mptcp_rbs_value_equal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_equal_execute(struct mptcp_rbs_value_equal *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == right) + return 1; + return 0; +} + +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_equal *value) +{ + struct mptcp_rbs_value_equal *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_equal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_equal_print(const struct mptcp_rbs_value_equal *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " == "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_unequal *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_unequal), GFP_KERNEL); + value->kind = VALUE_KIND_UNEQUAL; + value->free = &mptcp_rbs_value_unequal_free; + value->execute = &mptcp_rbs_value_unequal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_unequal_free(struct mptcp_rbs_value_unequal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_unequal_execute(struct mptcp_rbs_value_unequal *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == right) + return 0; + return 1; +} + +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_unequal *value) +{ + struct mptcp_rbs_value_unequal *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_unequal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_unequal_print(const struct mptcp_rbs_value_unequal *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " != "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_less *mptcp_rbs_value_less_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_less *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_less), GFP_KERNEL); + value->kind = VALUE_KIND_LESS; + value->free = &mptcp_rbs_value_less_free; + value->execute = &mptcp_rbs_value_less_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_less_free(struct mptcp_rbs_value_less *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_less_execute(struct mptcp_rbs_value_less *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left < right) + return 1; + return 0; +} + +struct mptcp_rbs_value_less *mptcp_rbs_value_less_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less *value) +{ + struct mptcp_rbs_value_less *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_less), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_less_print(const struct mptcp_rbs_value_less *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " < "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_less_equal *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_less_equal), GFP_KERNEL); + value->kind = VALUE_KIND_LESS_EQUAL; + value->free = &mptcp_rbs_value_less_equal_free; + value->execute = &mptcp_rbs_value_less_equal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_less_equal_free(struct mptcp_rbs_value_less_equal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_less_equal_execute(struct mptcp_rbs_value_less_equal *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left <= right) + return 1; + return 0; +} + +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less_equal *value) +{ + struct mptcp_rbs_value_less_equal *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_less_equal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_less_equal_print( + const struct mptcp_rbs_value_less_equal *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " <= "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_greater *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_greater), GFP_KERNEL); + value->kind = VALUE_KIND_GREATER; + value->free = &mptcp_rbs_value_greater_free; + value->execute = &mptcp_rbs_value_greater_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_greater_free(struct mptcp_rbs_value_greater *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_greater_execute(struct mptcp_rbs_value_greater *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left > right) + return 1; + return 0; +} + +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater *value) +{ + struct mptcp_rbs_value_greater *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_greater), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_greater_print(const struct mptcp_rbs_value_greater *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " > "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_greater_equal *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_greater_equal), GFP_KERNEL); + value->kind = VALUE_KIND_GREATER_EQUAL; + value->free = &mptcp_rbs_value_greater_equal_free; + value->execute = &mptcp_rbs_value_greater_equal_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_greater_equal_free( + struct mptcp_rbs_value_greater_equal *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_greater_equal_execute( + struct mptcp_rbs_value_greater_equal *self, struct mptcp_rbs_eval_ctx *ctx) +{ + s64 left = self->left_operand->execute(self->left_operand, ctx); + s64 right = self->right_operand->execute(self->right_operand, ctx); + + if (left == -1 || right == -1) + return -1; + if (left >= right) + return 1; + return 0; +} + +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater_equal *value) +{ + struct mptcp_rbs_value_greater_equal *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_greater_equal), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_greater_equal_print( + const struct mptcp_rbs_value_greater_equal *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " >= "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_and *mptcp_rbs_value_and_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand) +{ + struct mptcp_rbs_value_and *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_and), GFP_KERNEL); + value->kind = VALUE_KIND_AND; + value->free = &mptcp_rbs_value_and_free; + value->execute = &mptcp_rbs_value_and_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_and_free(struct mptcp_rbs_value_and *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_and_execute(struct mptcp_rbs_value_and *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s32 val = self->left_operand->execute(self->left_operand, ctx); + if (val <= 0) + return 0; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val <= 0) + return 0; + + return 1; +} + +struct mptcp_rbs_value_and *mptcp_rbs_value_and_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_and *value) +{ + struct mptcp_rbs_value_and *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_and), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_and_print(const struct mptcp_rbs_value_and *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " AND "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_or *mptcp_rbs_value_or_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand) +{ + struct mptcp_rbs_value_or *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_or), GFP_KERNEL); + value->kind = VALUE_KIND_OR; + value->free = &mptcp_rbs_value_or_free; + value->execute = &mptcp_rbs_value_or_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_or_free(struct mptcp_rbs_value_or *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s32 mptcp_rbs_value_or_execute(struct mptcp_rbs_value_or *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s32 val = self->left_operand->execute(self->left_operand, ctx); + if (val == 1) + return 1; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == 1) + return 1; + + return 0; +} + +struct mptcp_rbs_value_or *mptcp_rbs_value_or_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_or *value) +{ + struct mptcp_rbs_value_or *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_or), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_or_print(const struct mptcp_rbs_value_or *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " OR "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_add *mptcp_rbs_value_add_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_add *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_add), GFP_KERNEL); + value->kind = VALUE_KIND_ADD; + value->free = &mptcp_rbs_value_add_free; + value->execute = &mptcp_rbs_value_add_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_add_free(struct mptcp_rbs_value_add *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_add_execute(struct mptcp_rbs_value_add *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1) + return -1; + result += val; + + return result; +} + +struct mptcp_rbs_value_add *mptcp_rbs_value_add_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_add *value) +{ + struct mptcp_rbs_value_add *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_add), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_add_print(const struct mptcp_rbs_value_add *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " + "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_subtract *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_subtract), GFP_KERNEL); + value->kind = VALUE_KIND_SUBTRACT; + value->free = &mptcp_rbs_value_subtract_free; + value->execute = &mptcp_rbs_value_subtract_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_subtract_free(struct mptcp_rbs_value_subtract *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_subtract_execute(struct mptcp_rbs_value_subtract *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1) + return -1; + result -= val; + + return result; +} + +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subtract *value) +{ + struct mptcp_rbs_value_subtract *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_subtract), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_subtract_print(const struct mptcp_rbs_value_subtract *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " - "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_multiply *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_multiply), GFP_KERNEL); + value->kind = VALUE_KIND_MULTIPLY; + value->free = &mptcp_rbs_value_multiply_free; + value->execute = &mptcp_rbs_value_multiply_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_multiply_free(struct mptcp_rbs_value_multiply *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_multiply_execute(struct mptcp_rbs_value_multiply *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1) + return -1; + result *= val; + + return result; +} + +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_multiply *value) +{ + struct mptcp_rbs_value_multiply *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_multiply), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_multiply_print(const struct mptcp_rbs_value_multiply *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " * "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_divide *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_divide), GFP_KERNEL); + value->kind = VALUE_KIND_DIVIDE; + value->free = &mptcp_rbs_value_divide_free; + value->execute = &mptcp_rbs_value_divide_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_divide_free(struct mptcp_rbs_value_divide *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_divide_execute(struct mptcp_rbs_value_divide *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1 || !val) + return -1; + result /= val; + + return result; +} + +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_divide *value) +{ + struct mptcp_rbs_value_divide *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_divide), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_divide_print(const struct mptcp_rbs_value_divide *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " / "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand) +{ + struct mptcp_rbs_value_remainder *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_remainder), GFP_KERNEL); + value->kind = VALUE_KIND_REMAINDER; + value->free = &mptcp_rbs_value_remainder_free; + value->execute = &mptcp_rbs_value_remainder_execute; + value->left_operand = left_operand; + value->right_operand = right_operand; + + return value; +} + +void mptcp_rbs_value_remainder_free(struct mptcp_rbs_value_remainder *self) +{ + MPTCP_RBS_VALUE_FREE(self->left_operand); + MPTCP_RBS_VALUE_FREE(self->right_operand); + kfree(self); +} + +s64 mptcp_rbs_value_remainder_execute(struct mptcp_rbs_value_remainder *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + s64 val = self->left_operand->execute(self->left_operand, ctx); + unsigned int result; + + if (val == -1) + return -1; + result = val; + + val = self->right_operand->execute(self->right_operand, ctx); + if (val == -1 || !val) + return -1; + result %= val; + + return result; +} + +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_remainder *value) +{ + struct mptcp_rbs_value_remainder *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_remainder), GFP_KERNEL); + *clone = *value; + CLONE(clone->left_operand); + CLONE(clone->right_operand); + + return clone; +} + +int mptcp_rbs_value_remainder_print( + const struct mptcp_rbs_value_remainder *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->left_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " % "); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->right_operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_new( + struct mptcp_rbs_value *operand) +{ + struct mptcp_rbs_value_is_null *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_is_null), GFP_KERNEL); + value->kind = VALUE_KIND_IS_NULL; + value->free = &mptcp_rbs_value_is_null_free; + value->execute = &mptcp_rbs_value_is_null_execute; + value->operand = operand; + + return value; +} + +void mptcp_rbs_value_is_null_free(struct mptcp_rbs_value_is_null *self) +{ + MPTCP_RBS_VALUE_FREE(self->operand); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +s32 mptcp_rbs_value_is_null_execute(struct mptcp_rbs_value_is_null *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + switch (mptcp_rbs_value_get_type(self->operand->kind)) { + case TYPE_KIND_NULL: + return 1; + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *bool_value = + (struct mptcp_rbs_value_bool *) self->operand; + + return bool_value->execute(bool_value, ctx) == -1; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *int_value = + (struct mptcp_rbs_value_int *) self->operand; + + return int_value->execute(int_value, ctx) == -1; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *string_value = + (struct mptcp_rbs_value_string *) self->operand; + + return string_value->execute(string_value, ctx) == NULL; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *sbf_value = + (struct mptcp_rbs_value_sbf *) self->operand; + + return sbf_value->execute(sbf_value, ctx) == NULL; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *sbf_list_value = + (struct mptcp_rbs_value_sbf_list *) self->operand; + void *prev = NULL; + bool is_null; + + sbf_list_value->execute(sbf_list_value, ctx, &prev, &is_null); + return is_null; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *skb_value = + (struct mptcp_rbs_value_skb *) self->operand; + + return skb_value->execute(skb_value, ctx) == NULL; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *skb_list_value = + (struct mptcp_rbs_value_skb_list *) self->operand; + void *prev = NULL; + bool is_null; + + skb_list_value->execute(skb_list_value, ctx, &prev, &is_null); + return is_null; + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_null *value) +{ + struct mptcp_rbs_value_is_null *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_is_null), GFP_KERNEL); + *clone = *value; + CLONE(clone->operand); + + return clone; +} + +int mptcp_rbs_value_is_null_print(const struct mptcp_rbs_value_is_null *value, + char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print(value->operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " == NULL)"); + return len; +} + +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_new( + struct mptcp_rbs_value *operand) +{ + struct mptcp_rbs_value_is_not_null *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_is_not_null), GFP_KERNEL); + value->kind = VALUE_KIND_IS_NOT_NULL; + value->free = &mptcp_rbs_value_is_not_null_free; + value->execute = &mptcp_rbs_value_is_not_null_execute; + value->operand = operand; + + return value; +} + +void mptcp_rbs_value_is_not_null_free(struct mptcp_rbs_value_is_not_null *self) +{ + MPTCP_RBS_VALUE_FREE(self->operand); + kfree(self); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +s32 mptcp_rbs_value_is_not_null_execute( + struct mptcp_rbs_value_is_not_null *self, struct mptcp_rbs_eval_ctx *ctx) +{ + switch (mptcp_rbs_value_get_type(self->operand->kind)) { + case TYPE_KIND_NULL: + return 0; + case TYPE_KIND_BOOL: { + struct mptcp_rbs_value_bool *bool_value = + (struct mptcp_rbs_value_bool *) self->operand; + + return bool_value->execute(bool_value, ctx) != -1; + } + case TYPE_KIND_INT: { + struct mptcp_rbs_value_int *int_value = + (struct mptcp_rbs_value_int *) self->operand; + + return int_value->execute(int_value, ctx) != -1; + } + case TYPE_KIND_STRING: { + struct mptcp_rbs_value_string *string_value = + (struct mptcp_rbs_value_string *) self->operand; + + return string_value->execute(string_value, ctx) != NULL; + } + case TYPE_KIND_SBF: { + struct mptcp_rbs_value_sbf *sbf_value = + (struct mptcp_rbs_value_sbf *) self->operand; + + return sbf_value->execute(sbf_value, ctx) != NULL; + } + case TYPE_KIND_SBFLIST: { + struct mptcp_rbs_value_sbf_list *sbf_list_value = + (struct mptcp_rbs_value_sbf_list *) self->operand; + void *prev = NULL; + bool is_null; + + sbf_list_value->execute(sbf_list_value, ctx, &prev, &is_null); + return !is_null; + } + case TYPE_KIND_SKB: { + struct mptcp_rbs_value_skb *skb_value = + (struct mptcp_rbs_value_skb *) self->operand; + + return skb_value->execute(skb_value, ctx) != NULL; + } + case TYPE_KIND_SKBLIST: { + struct mptcp_rbs_value_skb_list *skb_list_value = + (struct mptcp_rbs_value_skb_list *) self->operand; + void *prev = NULL; + bool is_null; + + skb_list_value->execute(skb_list_value, ctx, &prev, &is_null); + return !is_null; + } + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_not_null *value) +{ + struct mptcp_rbs_value_is_not_null *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_is_not_null), GFP_KERNEL); + *clone = *value; + CLONE(clone->operand); + + return clone; +} + +int mptcp_rbs_value_is_not_null_print( + const struct mptcp_rbs_value_is_not_null *value, char *buffer) +{ + int len = sprintf_null(&buffer, "("); + int tmp_len = mptcp_rbs_value_print(value->operand, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, " != NULL)"); + return len; +} + +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_new(int reg_number) +{ + struct mptcp_rbs_value_reg *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_reg), GFP_KERNEL); + value->kind = VALUE_KIND_REG; + value->free = mptcp_rbs_value_reg_free; + value->execute = mptcp_rbs_value_reg_execute; + value->reg_number = reg_number; + + return value; +} + +void mptcp_rbs_value_reg_free(struct mptcp_rbs_value_reg *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_reg_execute(struct mptcp_rbs_value_reg *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return ctx->rbs_cb->regs[self->reg_number]; +} + +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_reg *value) +{ + struct mptcp_rbs_value_reg *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_reg), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_reg_print(const struct mptcp_rbs_value_reg *value, + char *buffer) +{ + return sprintf_null(&buffer, "R%d", value->reg_number + 1); +} + +struct mptcp_rbs_value_q *mptcp_rbs_value_q_new(void) +{ + struct mptcp_rbs_value_q *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_q), GFP_KERNEL); + value->kind = VALUE_KIND_Q; + value->free = &mptcp_rbs_value_q_free; + value->execute = &mptcp_rbs_value_q_execute; + value->underlying_queue_kind = VALUE_KIND_Q; + + return value; +} + +/* skip packets which should not be in the queue */ +struct sk_buff *mptcp_rbs_next_in_queue(struct sk_buff_head *queue, + struct sk_buff *skb) +{ + while (skb && TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue) { + mptcp_debug("%s skipping skb %p with seq %u and end_seq %u as " + "it not in queue\n", + __func__, skb, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq); + + if (skb_queue_is_last(queue, skb)) + return NULL; + else + skb = skb_queue_next(queue, skb); + } + + return skb; +} + +void mptcp_rbs_value_q_free(struct mptcp_rbs_value_q *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_q_execute(struct mptcp_rbs_value_q *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct sk_buff *skb_candidate; + struct sk_buff *skb_result; + + if (*prev) { + skb_candidate = (struct sk_buff *) *prev; + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, + skb_candidate)) + skb_candidate = NULL; + else + skb_candidate = skb_queue_next( + &ctx->meta_sk->sk_write_queue, skb_candidate); + } else { + skb_candidate = ctx->rbs_cb->queue_position; + } + + skb_result = mptcp_rbs_next_in_queue(&ctx->meta_sk->sk_write_queue, + skb_candidate); + + *is_null = false; + if (skb_result) + *prev = skb_result; + + return skb_result; +} + +struct mptcp_rbs_value_q *mptcp_rbs_value_q_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_q *value) +{ + struct mptcp_rbs_value_q *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_q), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_q_print(const struct mptcp_rbs_value_q *value, char *buffer) +{ + return sprintf_null(&buffer, "Q"); +} + +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_new(void) +{ + struct mptcp_rbs_value_qu *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_qu), GFP_KERNEL); + value->kind = VALUE_KIND_QU; + value->free = &mptcp_rbs_value_qu_free; + value->execute = &mptcp_rbs_value_qu_execute; + value->underlying_queue_kind = VALUE_KIND_QU; + + return value; +} + +void mptcp_rbs_value_qu_free(struct mptcp_rbs_value_qu *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_qu_execute(struct mptcp_rbs_value_qu *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct sk_buff *skb; + + if (*prev) { + skb = (struct sk_buff *) *prev; + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb)) + skb = NULL; + else { + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + } else { + if (ctx->meta_sk->sk_write_queue.qlen == 0) + /* queue is empty, rq is empty */ + skb = NULL; + else + /* start with write_queue.next */ + skb = skb_peek(&ctx->meta_sk->sk_write_queue); + } + + mptcp_debug("%s with prev %p has a candidate of %p\n", __func__, *prev, + skb); + + // TODO in the old version, we also checked for skb->next == + // queue_position + if (skb == ctx->rbs_cb->queue_position) { + mptcp_debug( + "%s skb %p matches the queue_position, we are at the end\n", + __func__, skb); + skb = NULL; + } + + // we can not use the approach of Q and RQ, as we have to check for + // queue_position + // skb_result = next_in_queue(&ctx->meta_sk->sk_write_queue, + // skb_candidate); + + while (skb && TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue) { + mptcp_debug("%s skips skb %p\n", __func__, skb); + if (skb_queue_is_last(&ctx->meta_sk->sk_write_queue, skb) || + /* Empty because it points to the element in Q */ + skb == ctx->rbs_cb->queue_position) { + skb = NULL; + break; + } else + skb = + skb_queue_next(&ctx->meta_sk->sk_write_queue, skb); + } + + *is_null = false; + if (skb) + *prev = skb; + + return skb; +} + +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_qu *value) +{ + struct mptcp_rbs_value_qu *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_qu), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_qu_print(const struct mptcp_rbs_value_qu *value, + char *buffer) +{ + return sprintf_null(&buffer, "QU"); +} + +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_new(void) +{ + struct mptcp_rbs_value_rq *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_rq), GFP_KERNEL); + value->kind = VALUE_KIND_RQ; + value->free = &mptcp_rbs_value_rq_free; + value->execute = &mptcp_rbs_value_rq_execute; + value->underlying_queue_kind = VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_rq_free(struct mptcp_rbs_value_rq *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_rq_execute(struct mptcp_rbs_value_rq *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct sk_buff *skb_candidate; + struct sk_buff *skb_result; + + if (*prev) { + skb_candidate = (struct sk_buff *) *prev; + if (skb_queue_is_last(&ctx->mpcb->reinject_queue, + skb_candidate)) { + skb_candidate = NULL; + } else { + skb_candidate = skb_queue_next( + &ctx->mpcb->reinject_queue, skb_candidate); + } + } else { + skb_candidate = skb_peek(&ctx->mpcb->reinject_queue); + } + + skb_result = + mptcp_rbs_next_in_queue(&ctx->mpcb->reinject_queue, skb_candidate); + + mptcp_debug("%s with candidate %p and prev %p for rq %p with rq.len %u " + "has result %p\n", + __func__, skb_candidate, *prev, &ctx->mpcb->reinject_queue, + ctx->mpcb->reinject_queue.qlen, skb_result); + + *is_null = false; + if (skb_result) + *prev = skb_result; + + return skb_result; +} + +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_rq *value) +{ + struct mptcp_rbs_value_rq *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_rq), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_rq_print(const struct mptcp_rbs_value_rq *value, + char *buffer) +{ + return sprintf_null(&buffer, "RQ"); +} + +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_new(void) +{ + struct mptcp_rbs_value_subflows *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_subflows), GFP_KERNEL); + value->kind = VALUE_KIND_SUBFLOWS; + value->free = mptcp_rbs_value_subflows_free; + value->execute = mptcp_rbs_value_subflows_execute; + + return value; +} + +void mptcp_rbs_value_subflows_free(struct mptcp_rbs_value_subflows *self) +{ + kfree(self); +} + +bool mptcp_rbs_sbf_is_available(struct tcp_sock *sbf) +{ + /* Set of states for which we are allowed to send data */ + if (!mptcp_sk_can_send((struct sock *) sbf)) { + mptcp_debug("sbf_is_available %p can not send -> false\n", sbf); + return false; + } + + /* We do not send data on this subflow unless it is + * fully established, i.e. the 4th ack has been received. + */ + if (sbf->mptcp->pre_established) { + mptcp_debug("sbf_is_available %p preestablished -> false\n", + sbf); + return false; + } + + if (sbf->pf) { + mptcp_debug("sbf_is_available %p pf -> false\n", sbf); + return false; + } + + /*if (inet_csk((struct sock *) sbf)->icsk_ca_state == TCP_CA_Loss) { + mptcp_debug("sbf_is_available %p loss state -> false\n", sbf);*/ + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been + * acked. (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ +/* if (!tcp_is_reno(sbf)) + return false; + else if (sbf->snd_una != sbf->high_seq) + return false; + }*/ + + /* If TSQ is already throttling us, do not send on this subflow. When + * TSQ gets cleared the subflow becomes eligible again. + */ +/* + moved this test to a seperate prop + + if (test_bit(TSQ_THROTTLED, &sbf->tsq_flags)) { + mptcp_debug("sbf_is_available %p tso throttle -> false\n", sbf); + return false; + }*/ + + return true; +} + +struct tcp_sock *mptcp_rbs_value_subflows_execute( + struct mptcp_rbs_value_subflows *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null) +{ + struct tcp_sock *sbf; + +//printk("%s self %p prev %p called\n", __func__, self, prev); + + if (*prev) + sbf = ((struct tcp_sock *) *prev)->mptcp->next; + else + sbf = ctx->mpcb->connection_list; + + /* Skip unavailable subflows */ + while (sbf && !mptcp_rbs_sbf_is_available(sbf)) { + printk("%s skips sbf %p for meta_sk %p coming from %pS\n", __func__, sbf, ctx->mpcb->meta_sk, __builtin_return_address(0)); + sbf = sbf->mptcp->next; + } + + *is_null = false; + if (sbf) + *prev = sbf; + +//printk("%s returns %p\n", __func__, sbf); + + return sbf; +} + +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subflows *value) +{ + struct mptcp_rbs_value_subflows *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_subflows), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_subflows_print(const struct mptcp_rbs_value_subflows *value, + char *buffer) +{ + return sprintf_null(&buffer, "SUBFLOWS"); +} + +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_new( + void) +{ + struct mptcp_rbs_value_current_time_ms *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_current_time_ms), GFP_KERNEL); + value->kind = VALUE_KIND_CURRENT_TIME_MS; + value->free = mptcp_rbs_value_current_time_ms_free; + value->execute = mptcp_rbs_value_current_time_ms_execute; + + return value; +} + +void mptcp_rbs_value_current_time_ms_free( + struct mptcp_rbs_value_current_time_ms *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_current_time_ms_execute( + struct mptcp_rbs_value_current_time_ms *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + u64 ct = ktime_get_raw_ns(); + u64 tp6 = 1000000; + return ct / tp6; +} + +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_current_time_ms *value) +{ + struct mptcp_rbs_value_current_time_ms *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_current_time_ms), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_current_time_ms_print( + const struct mptcp_rbs_value_current_time_ms *value, char *buffer) +{ + return sprintf_null(&buffer, "CURRENT_TIME_MS"); +} + +struct mptcp_rbs_value_random *mptcp_rbs_value_random_new(void) +{ + struct mptcp_rbs_value_random *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_random), GFP_KERNEL); + value->kind = VALUE_KIND_RANDOM; + value->free = mptcp_rbs_value_random_free; + value->execute = mptcp_rbs_value_random_execute; + + return value; +} + +void mptcp_rbs_value_random_free(struct mptcp_rbs_value_random *self) +{ + kfree(self); +} + +s64 mptcp_rbs_value_random_execute(struct mptcp_rbs_value_random *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + unsigned int n; + + get_random_bytes(&n, sizeof(unsigned int)); + return n; +} + +struct mptcp_rbs_value_random *mptcp_rbs_value_random_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_random *value) +{ + struct mptcp_rbs_value_random *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_random), GFP_KERNEL); + *clone = *value; + + return clone; +} + +int mptcp_rbs_value_random_print(const struct mptcp_rbs_value_random *value, + char *buffer) +{ + return sprintf_null(&buffer, "RANDOM"); +} + +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_rtt *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_rtt), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_RTT; + value->free = mptcp_rbs_value_sbf_rtt_free; + value->execute = mptcp_rbs_value_sbf_rtt_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_rtt_free(struct mptcp_rbs_value_sbf_rtt *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_rtt_execute(struct mptcp_rbs_value_sbf_rtt *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->srtt_us; +} + +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt *value) +{ + struct mptcp_rbs_value_sbf_rtt *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_rtt), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_rtt_print(const struct mptcp_rbs_value_sbf_rtt *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".RTT"); + return len; +} + +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_rtt_ms *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_ms), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_RTT_MS; + value->free = mptcp_rbs_value_sbf_rtt_ms_free; + value->execute = mptcp_rbs_value_sbf_rtt_ms_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_rtt_ms_free(struct mptcp_rbs_value_sbf_rtt_ms *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_rtt_ms_execute(struct mptcp_rbs_value_sbf_rtt_ms *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + // we now are _MS + return (sbf->srtt_us >> 3) / 1000; +} + +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_ms *value) +{ + struct mptcp_rbs_value_sbf_rtt_ms *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_ms), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_rtt_ms_print(const struct mptcp_rbs_value_sbf_rtt_ms *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".RTT_MS"); + return len; +} + +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_rtt_var *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_var), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_RTT_VAR; + value->free = mptcp_rbs_value_sbf_rtt_var_free; + value->execute = mptcp_rbs_value_sbf_rtt_var_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_rtt_var_free(struct mptcp_rbs_value_sbf_rtt_var *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_rtt_var_execute(struct mptcp_rbs_value_sbf_rtt_var *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->rttvar_us; +} + +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_var *value) +{ + struct mptcp_rbs_value_sbf_rtt_var *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_rtt_var), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_rtt_var_print(const struct mptcp_rbs_value_sbf_rtt_var *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".RTT_VAR"); + return len; +} + +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_user *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_user), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_USER; + value->free = mptcp_rbs_value_sbf_user_free; + value->execute = mptcp_rbs_value_sbf_user_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_user_free(struct mptcp_rbs_value_sbf_user *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_user_execute(struct mptcp_rbs_value_sbf_user *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + +// return *((s64*) &sbf->mptcp->mptcp_sched[0]); + return mptcp_rbs_get_sbf_cb(sbf)->user; +} + +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_user *value) +{ + struct mptcp_rbs_value_sbf_user *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_user), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_user_print(const struct mptcp_rbs_value_sbf_user *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".USER"); + return len; +} + +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_is_backup *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_is_backup), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_IS_BACKUP; + value->free = mptcp_rbs_value_sbf_is_backup_free; + value->execute = mptcp_rbs_value_sbf_is_backup_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_is_backup_free( + struct mptcp_rbs_value_sbf_is_backup *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_is_backup_execute( + struct mptcp_rbs_value_sbf_is_backup *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->mptcp->low_prio || sbf->mptcp->rcv_low_prio; +} + +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_is_backup *value) +{ + struct mptcp_rbs_value_sbf_is_backup *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_is_backup), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_is_backup_print( + const struct mptcp_rbs_value_sbf_is_backup *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".IS_BACKUP"); + return len; +} + +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_cwnd *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_cwnd), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_CWND; + value->free = mptcp_rbs_value_sbf_cwnd_free; + value->execute = mptcp_rbs_value_sbf_cwnd_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_cwnd_free(struct mptcp_rbs_value_sbf_cwnd *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_cwnd_execute(struct mptcp_rbs_value_sbf_cwnd *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->snd_cwnd; +} + +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_cwnd *value) +{ + struct mptcp_rbs_value_sbf_cwnd *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_cwnd), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_cwnd_print(const struct mptcp_rbs_value_sbf_cwnd *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".CWND"); + return len; +} + +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_queued *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_queued), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_QUEUED; + value->free = mptcp_rbs_value_sbf_queued_free; + value->execute = mptcp_rbs_value_sbf_queued_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_queued_free(struct mptcp_rbs_value_sbf_queued *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_queued_execute(struct mptcp_rbs_value_sbf_queued *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return (sbf->write_seq - sbf->snd_nxt) / sbf->mss_cache; +} + +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_queued *value) +{ + struct mptcp_rbs_value_sbf_queued *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_queued), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_queued_print(const struct mptcp_rbs_value_sbf_queued *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".QUEUED"); + return len; +} + +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_new(struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_skbs_in_flight *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_skbs_in_flight), + GFP_KERNEL); + value->kind = VALUE_KIND_SBF_SKBS_IN_FLIGHT; + value->free = mptcp_rbs_value_sbf_skbs_in_flight_free; + value->execute = mptcp_rbs_value_sbf_skbs_in_flight_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_skbs_in_flight_free( + struct mptcp_rbs_value_sbf_skbs_in_flight *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_skbs_in_flight_execute( + struct mptcp_rbs_value_sbf_skbs_in_flight *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->packets_out; +} + +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_skbs_in_flight *value) +{ + struct mptcp_rbs_value_sbf_skbs_in_flight *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_skbs_in_flight), + GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_skbs_in_flight_print( + const struct mptcp_rbs_value_sbf_skbs_in_flight *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SKBS_IN_FLIGHT"); + return len; +} + +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_lost_skbs *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_lost_skbs), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_LOST_SKBS; + value->free = mptcp_rbs_value_sbf_lost_skbs_free; + value->execute = mptcp_rbs_value_sbf_lost_skbs_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_lost_skbs_free( + struct mptcp_rbs_value_sbf_lost_skbs *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_lost_skbs_execute( + struct mptcp_rbs_value_sbf_lost_skbs *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->lost_out; +} + +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lost_skbs *value) +{ + struct mptcp_rbs_value_sbf_lost_skbs *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_lost_skbs), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_lost_skbs_print( + const struct mptcp_rbs_value_sbf_lost_skbs *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".LOST_SKBS"); + return len; +} + +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_new(struct mptcp_rbs_value_sbf *sbf, + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_sbf_has_window_for *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_has_window_for), + GFP_KERNEL); + value->kind = VALUE_KIND_SBF_HAS_WINDOW_FOR; + value->free = mptcp_rbs_value_sbf_has_window_for_free; + value->execute = mptcp_rbs_value_sbf_has_window_for_execute; + value->sbf = sbf; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_sbf_has_window_for_free( + struct mptcp_rbs_value_sbf_has_window_for *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_has_window_for_execute( + struct mptcp_rbs_value_sbf_has_window_for *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + unsigned int mss_now = tcp_current_mss(ctx->meta_sk); + struct tcp_sock *sbf; + struct sk_buff *skb; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + /* RBS copied from mptcp_sched.c */ + /* Don't send on this subflow if we bypass the allowed send-window at + * the per-subflow level. Similar to tcp_snd_wnd_test, but manually + * calculated end_seq (because here at this point end_seq is still at + * the meta-level). + */ + if (after(sbf->write_seq + min(skb->len, mss_now), tcp_wnd_end(sbf))) + return 0; + return 1; +} + +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_has_window_for *value) +{ + struct mptcp_rbs_value_sbf_has_window_for *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_has_window_for), + GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_sbf_has_window_for_print( + const struct mptcp_rbs_value_sbf_has_window_for *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".HAS_WINDOW_FOR("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_id *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_id), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_ID; + value->free = mptcp_rbs_value_sbf_id_free; + value->execute = mptcp_rbs_value_sbf_id_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_id_free(struct mptcp_rbs_value_sbf_id *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_id_execute(struct mptcp_rbs_value_sbf_id *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->mptcp->sbf_id; +} + +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_id *value) +{ + struct mptcp_rbs_value_sbf_id *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_id), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_id_print(const struct mptcp_rbs_value_sbf_id *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".ID"); + return len; +} + +/* some helper for delay calculation */ + +void mptcp_rbs_sbf_delay_update(struct tcp_sock *tp, const struct sk_buff *skb) +{ + /* Recalculate delays */ + /* Size considerations: we subtract two u32 values, the result might + * have a sign (requires 33 bit) + * However, we can safely ignore the highest bit of the u32 values + * (0x80000000), + * add 1 << 32 (0x80000000and) for the subtraction and store it as u32. + */ + struct mptcp_rbs_sbf_cb *sbf_cb = mptcp_rbs_get_sbf_cb(tp); + const unsigned int first_bit_set = 0x80000000; + const unsigned int first_bit_not_set = 0x7FFFFFFF; + sbf_cb->delay_in = + (first_bit_set + (tcp_time_stamp & first_bit_not_set)) - + (tp->rx_opt.rcv_tsval & first_bit_not_set); + sbf_cb->delay_out = + (first_bit_set + (tp->rx_opt.rcv_tsval & first_bit_not_set)) - + (tp->rx_opt.rcv_tsecr & first_bit_not_set); +// printk("rcv_tsval %u and rcv_tsecr %u lead to delay out %u and delay in %u\n", tp->rx_opt.rcv_tsval, tp->rx_opt.rcv_tsecr, sbf_cb->delay_out, sbf_cb->delay_in); +} + +/* delay out */ + +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_delay_out *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_delay_out), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_DELAY_OUT; + value->free = mptcp_rbs_value_sbf_delay_out_free; + value->execute = mptcp_rbs_value_sbf_delay_out_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_delay_out_free( + struct mptcp_rbs_value_sbf_delay_out *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_delay_out_execute( + struct mptcp_rbs_value_sbf_delay_out *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_get_sbf_cb(sbf)->delay_out; +} + +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_out *value) +{ + struct mptcp_rbs_value_sbf_delay_out *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_delay_out), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_delay_out_print( + const struct mptcp_rbs_value_sbf_delay_out *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".DELAY_OUT"); + return len; +} + +/* delay in */ + +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_delay_in *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_delay_in), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_DELAY_IN; + value->free = mptcp_rbs_value_sbf_delay_in_free; + value->execute = mptcp_rbs_value_sbf_delay_in_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_delay_in_free( + struct mptcp_rbs_value_sbf_delay_in *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_delay_in_execute( + struct mptcp_rbs_value_sbf_delay_in *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_get_sbf_cb(sbf)->delay_in; +} + +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_in *value) +{ + struct mptcp_rbs_value_sbf_delay_in *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_delay_in), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_delay_in_print( + const struct mptcp_rbs_value_sbf_delay_in *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".DELAY_IN"); + return len; +} + +/* some helper for bw calculation */ + +u64 mptcp_rbs_sbf_get_bw_send(struct mptcp_rbs_sbf_cb *sbf_cb) +{ + u64 RBS_BW_INTERVAL_NS = 1000000000; // one second + u64 diff = ktime_get_raw_ns() - sbf_cb->bw_out_last_update_ns; + + /* how much time is gone since the last update */ + if (diff > RBS_BW_INTERVAL_NS) + return 0; + + /* only use the portion of one second */ + return (RBS_BW_INTERVAL_NS - diff) * sbf_cb->bw_out_bytes / + RBS_BW_INTERVAL_NS; +} + +u64 mptcp_rbs_sbf_get_bw_ack(struct mptcp_rbs_sbf_cb *sbf_cb) +{ + u64 RBS_BW_INTERVAL_NS = 1000000000; // one second + u64 diff = ktime_get_raw_ns() - sbf_cb->bw_ack_last_update_ns; + + /* how much time is gone since the last update */ + if (diff > RBS_BW_INTERVAL_NS) + return 0; + + /* only use the portion of one second */ + return (RBS_BW_INTERVAL_NS - diff) * sbf_cb->bw_ack_bytes / + RBS_BW_INTERVAL_NS; +} + +void mptcp_rbs_sbf_bw_add(u64 *last_update_ns, u64 *bytes_in_cb, + unsigned int bytes) +{ + u64 ct = ktime_get_raw_ns(); + u64 RBS_BW_INTERVAL_NS = 1000000000; // one second + + mptcp_debug("rbs_bw compares %llu and %llu, or more precise, the diff " + "is %llu\n", + ct, (*last_update_ns), (ct - *last_update_ns)); + + if (bytes == 0) // nothing to do + return; + + if (ct - *last_update_ns < + RBS_BW_INTERVAL_NS) { // delta t less than a second + u64 delta_t = ct - (*last_update_ns); + *bytes_in_cb = (*bytes_in_cb) * (RBS_BW_INTERVAL_NS - delta_t) / + RBS_BW_INTERVAL_NS + + bytes; + mptcp_debug("rbs_bw sets bw to %llu with delta_t %llu\n", + *bytes_in_cb, delta_t); + } else { + *bytes_in_cb = bytes; + mptcp_debug( + "rbs_bw sets new value after more than 1 second to %u\n", + bytes); + } + + *last_update_ns = ct; +} + +void mptcp_rbs_sbf_bw_send_add(struct tcp_sock *tp, unsigned int bytes) +{ + struct mptcp_rbs_sbf_cb *sbf_cb = mptcp_rbs_get_sbf_cb(tp); + mptcp_rbs_sbf_bw_add(&sbf_cb->bw_out_last_update_ns, + &sbf_cb->bw_out_bytes, bytes); +} + +void mptcp_rbs_sbf_bw_ack_add(struct tcp_sock *tp, unsigned int bytes) +{ + struct mptcp_rbs_sbf_cb *sbf_cb = mptcp_rbs_get_sbf_cb(tp); + mptcp_rbs_sbf_bw_add(&sbf_cb->bw_ack_last_update_ns, + &sbf_cb->bw_ack_bytes, bytes); +} + +/* bw out ack */ + +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_bw_out_ack *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_ack), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_BW_OUT_ACK; + value->free = mptcp_rbs_value_sbf_bw_out_ack_free; + value->execute = mptcp_rbs_value_sbf_bw_out_ack_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_bw_out_ack_free( + struct mptcp_rbs_value_sbf_bw_out_ack *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_bw_out_ack_execute( + struct mptcp_rbs_value_sbf_bw_out_ack *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_sbf_get_bw_ack(mptcp_rbs_get_sbf_cb(sbf)); +} + +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_ack *value) +{ + struct mptcp_rbs_value_sbf_bw_out_ack *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_ack), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_bw_out_ack_print( + const struct mptcp_rbs_value_sbf_bw_out_ack *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".BW_OUT_ACK"); + return len; +} + +/* bw out send */ + +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_bw_out_send *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_send), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_BW_OUT_SEND; + value->free = mptcp_rbs_value_sbf_bw_out_send_free; + value->execute = mptcp_rbs_value_sbf_bw_out_send_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_bw_out_send_free( + struct mptcp_rbs_value_sbf_bw_out_send *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_bw_out_send_execute( + struct mptcp_rbs_value_sbf_bw_out_send *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_rbs_sbf_get_bw_send(mptcp_rbs_get_sbf_cb(sbf)); +} + +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_send *value) +{ + struct mptcp_rbs_value_sbf_bw_out_send *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_bw_out_send), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_bw_out_send_print( + const struct mptcp_rbs_value_sbf_bw_out_send *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".BW_OUT_SEND"); + return len; +} + +/* slow start threshold ssthresh */ + +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_ssthresh *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_ssthresh), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_SSTHRESH; + value->free = mptcp_rbs_value_sbf_ssthresh_free; + value->execute = mptcp_rbs_value_sbf_ssthresh_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_ssthresh_free( + struct mptcp_rbs_value_sbf_ssthresh *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_ssthresh_execute( + struct mptcp_rbs_value_sbf_ssthresh *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return sbf->snd_ssthresh; +} + +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_ssthresh *value) +{ + struct mptcp_rbs_value_sbf_ssthresh *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_ssthresh), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_ssthresh_print( + const struct mptcp_rbs_value_sbf_ssthresh *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SSTHRESH"); + return len; +} + +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_throttled *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_throttled), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_THROTTLED; + value->free = mptcp_rbs_value_sbf_throttled_free; + value->execute = mptcp_rbs_value_sbf_throttled_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_throttled_free( + struct mptcp_rbs_value_sbf_throttled *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_throttled_execute( + struct mptcp_rbs_value_sbf_throttled *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return test_bit(TSQ_THROTTLED, &sbf->tsq_flags); +} + +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_throttled *value) +{ + struct mptcp_rbs_value_sbf_throttled *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_throttled), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_throttled_print( + const struct mptcp_rbs_value_sbf_throttled *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".THROTTLED"); + return len; +} + +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_new( + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_sbf_lossy *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_lossy), GFP_KERNEL); + value->kind = VALUE_KIND_SBF_LOSSY; + value->free = mptcp_rbs_value_sbf_lossy_free; + value->execute = mptcp_rbs_value_sbf_lossy_execute; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_sbf_lossy_free(struct mptcp_rbs_value_sbf_lossy *self) +{ + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_lossy_execute(struct mptcp_rbs_value_sbf_lossy *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + if (inet_csk((struct sock *) sbf)->icsk_ca_state == TCP_CA_Loss) { + mptcp_debug("sbf_is_available %p loss state -> false\n", sbf); + /* If SACK is disabled, and we got a loss, TCP does not exit + * the loss-state until something above high_seq has been + * acked. (see tcp_try_undo_recovery) + * + * high_seq is the snd_nxt at the moment of the RTO. As soon + * as we have an RTO, we won't push data on the subflow. + * Thus, snd_una can never go beyond high_seq. + */ + if (!tcp_is_reno(sbf)) + return true; + else if (sbf->snd_una != sbf->high_seq) + return true; + } + + return false; +} + +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lossy *value) +{ + struct mptcp_rbs_value_sbf_lossy *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_lossy), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + + return clone; +} + +int mptcp_rbs_value_sbf_lossy_print( + const struct mptcp_rbs_value_sbf_lossy *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".LOSSY"); + return len; +} + +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_new( + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_value_sbf_list_next *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_next), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_NEXT; + value->free = mptcp_rbs_value_sbf_list_next_free; + value->execute = mptcp_rbs_value_sbf_list_next_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_sbf_list_next_free( + struct mptcp_rbs_value_sbf_list_next *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_next_execute( + struct mptcp_rbs_value_sbf_list_next *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + +printk("%s for meta_sk %p with self %p coming from %pS with self->exec_count %u and rbs_cb->exec_count %u and prev %p and isnull %d\n", __func__, ctx->mpcb->meta_sk, self, __builtin_return_address(0), self->exec_count, ctx->rbs_cb->exec_count, self->prev, self->is_null); + + if (self->exec_count != ctx->rbs_cb->exec_count) { + self->prev = NULL; + self->is_null = false; + self->exec_count = ctx->rbs_cb->exec_count; + } + if (self->is_null) + return NULL; + + sbf = self->list->execute(self->list, ctx, &self->prev, &self->is_null); + if (!sbf) { + self->prev = NULL; + self->is_null = true; + + /* If we have nested loops we have to make sure that next time + * we visit this value the first item of the list is returned + */ + --self->exec_count; + } + + return sbf; +} + +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_next *value) +{ + struct mptcp_rbs_value_sbf_list_next *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_next), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_sbf_list_next_print( + const struct mptcp_rbs_value_sbf_list_next *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".NEXT()"); + return len; +} + +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_new( + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_value_sbf_list_empty *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_empty), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_EMPTY; + value->free = mptcp_rbs_value_sbf_list_empty_free; + value->execute = mptcp_rbs_value_sbf_list_empty_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_sbf_list_empty_free( + struct mptcp_rbs_value_sbf_list_empty *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s32 mptcp_rbs_value_sbf_list_empty_execute( + struct mptcp_rbs_value_sbf_list_empty *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + + if (is_null) + return -1; + return sbf ? 0 : 1; +} + +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_empty *value) +{ + struct mptcp_rbs_value_sbf_list_empty *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_empty), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_sbf_list_empty_print( + const struct mptcp_rbs_value_sbf_list_empty *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".EMPTY"); + return len; +} + +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_new( + void) +{ + struct mptcp_rbs_value_sbf_list_filter *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_FILTER; + value->free = mptcp_rbs_value_sbf_list_filter_free; + value->execute = mptcp_rbs_value_sbf_list_filter_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_filter_free( + struct mptcp_rbs_value_sbf_list_filter *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_execute( + struct mptcp_rbs_value_sbf_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null) +{ + struct tcp_sock *sbf; + s32 b; + + sbf = self->list->execute(self->list, ctx, prev, is_null); + if (*is_null) + return NULL; + + while (sbf) { + self->cur = sbf; + b = self->cond->execute(self->cond, ctx); + if (b > 0) + break; + + sbf = self->list->execute(self->list, ctx, prev, is_null); + } + + return sbf; +} + +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter *value) +{ + struct mptcp_rbs_value_sbf_list_filter *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_filter_print( + const struct mptcp_rbs_value_sbf_list_filter *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".FILTER(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_new(struct tcp_sock **cur) +{ + struct mptcp_rbs_value_sbf_list_filter_sbf *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter_sbf), + GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_FILTER_SBF; + value->free = mptcp_rbs_value_sbf_list_filter_sbf_free; + value->execute = mptcp_rbs_value_sbf_list_filter_sbf_execute; + value->cur = cur; + + return value; +} + +void mptcp_rbs_value_sbf_list_filter_sbf_free( + struct mptcp_rbs_value_sbf_list_filter_sbf *self) +{ + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_sbf_execute( + struct mptcp_rbs_value_sbf_list_filter_sbf *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return *self->cur; +} + +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter_sbf *value) +{ + struct mptcp_rbs_value_sbf_list_filter_sbf *clone; + int i; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_filter_sbf), + GFP_KERNEL); + *clone = *value; + + for (i = 0; i < MAX_NESTING; ++i) { + if (clone->cur == ctx->repls[i].repl) { + clone->cur = ctx->repls[i].repl_with; + break; + } + } + + return clone; +} + +int mptcp_rbs_value_sbf_list_filter_sbf_print( + const struct mptcp_rbs_value_sbf_list_filter_sbf *value, char *buffer) +{ + return sprintf_null(&buffer, "v%p", value->cur); +} + +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_new(void) +{ + struct mptcp_rbs_value_sbf_list_max *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_max), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_MAX; + value->free = mptcp_rbs_value_sbf_list_max_free; + value->execute = mptcp_rbs_value_sbf_list_max_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_max_free( + struct mptcp_rbs_value_sbf_list_max *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_max_execute( + struct mptcp_rbs_value_sbf_list_max *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 value; + struct tcp_sock *max_sbf = NULL; + s64 max_value = -1; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (sbf) { + self->cur = sbf; + value = self->cond->execute(self->cond, ctx); + if (value != -1 && value > max_value) { + max_value = value; + max_sbf = sbf; + } + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return max_sbf; +} + +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_max *value) +{ + struct mptcp_rbs_value_sbf_list_max *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_max), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_max_print( + const struct mptcp_rbs_value_sbf_list_max *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".MAX(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_new(void) +{ + struct mptcp_rbs_value_sbf_list_min *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_min), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_MIN; + value->free = mptcp_rbs_value_sbf_list_min_free; + value->execute = mptcp_rbs_value_sbf_list_min_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_min_free( + struct mptcp_rbs_value_sbf_list_min *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_min_execute( + struct mptcp_rbs_value_sbf_list_min *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 value; + struct tcp_sock *min_sbf = NULL; + s64 min_value = 0xFFFFFFFFll; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return NULL; + + while (sbf) { + self->cur = sbf; + value = self->cond->execute(self->cond, ctx); + if (value != -1 && value < min_value) { + min_value = value; + min_sbf = sbf; + } + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return min_sbf; +} + +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_min *value) +{ + struct mptcp_rbs_value_sbf_list_min *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_min), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_min_print( + const struct mptcp_rbs_value_sbf_list_min *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".MIN(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_new( + struct mptcp_rbs_value_sbf_list *list, struct mptcp_rbs_value_int *index) +{ + struct mptcp_rbs_value_sbf_list_get *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_get), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_GET; + value->free = mptcp_rbs_value_sbf_list_get_free; + value->execute = mptcp_rbs_value_sbf_list_get_execute; + value->list = list; + value->index = index; + + return value; +} + +void mptcp_rbs_value_sbf_list_get_free( + struct mptcp_rbs_value_sbf_list_get *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->index); + kfree(self); +} + +struct tcp_sock *mptcp_rbs_value_sbf_list_get_execute( + struct mptcp_rbs_value_sbf_list_get *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 idx; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + idx = self->index->execute(self->index, ctx); + + if (is_null || idx < 0) + return NULL; + + while (sbf && idx) { + --idx; + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return sbf; +} + +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_get *value) +{ + struct mptcp_rbs_value_sbf_list_get *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_get), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + CLONE(clone->index); + + return clone; +} + +int mptcp_rbs_value_sbf_list_get_print( + const struct mptcp_rbs_value_sbf_list_get *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".GET("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->index, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + + return len; +} + +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_new( + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_value_sbf_list_count *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_count), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_COUNT; + value->free = mptcp_rbs_value_sbf_list_count_free; + value->execute = mptcp_rbs_value_sbf_list_count_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_sbf_list_count_free( + struct mptcp_rbs_value_sbf_list_count *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_list_count_execute( + struct mptcp_rbs_value_sbf_list_count *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct tcp_sock *sbf; + void *prev = NULL; + bool is_null; + int n = 0; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return -1; + + while (sbf) { + ++n; + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return n; +} + +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_count *value) +{ + struct mptcp_rbs_value_sbf_list_count *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_count), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_sbf_list_count_print( + const struct mptcp_rbs_value_sbf_list_count *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".COUNT"); + return len; +} + +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_new(void) +{ + struct mptcp_rbs_value_sbf_list_sum *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_sbf_list_sum), GFP_KERNEL); + value->kind = VALUE_KIND_SBFLIST_SUM; + value->free = mptcp_rbs_value_sbf_list_sum_free; + value->execute = mptcp_rbs_value_sbf_list_sum_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_sbf_list_sum_free( + struct mptcp_rbs_value_sbf_list_sum *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +s64 mptcp_rbs_value_sbf_list_sum_execute( + struct mptcp_rbs_value_sbf_list_sum *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct tcp_sock *sbf; + s64 sum = 0; + s64 value; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return -1; + + while (sbf) { + self->cur = sbf; + value = self->cond->execute(self->cond, ctx); + if (value != -1) + sum += value; + + sbf = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return sum; +} + +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_sum *value) +{ + struct mptcp_rbs_value_sbf_list_sum *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_sbf_list_sum), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->cur; + ctx->repls[i].repl_with = &clone->cur; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_sbf_list_sum_print( + const struct mptcp_rbs_value_sbf_list_sum *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SUM(v%p => ", &value->cur); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_next *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_next), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_NEXT; + value->free = mptcp_rbs_value_skb_list_next_free; + value->execute = mptcp_rbs_value_skb_list_next_execute; + value->list = list; + value->reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_skb_list_next_free( + struct mptcp_rbs_value_skb_list_next *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_next_execute( + struct mptcp_rbs_value_skb_list_next *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + if (self->exec_count != ctx->rbs_cb->exec_count) { + self->prev = NULL; + self->is_null = false; + self->exec_count = ctx->rbs_cb->exec_count; + } + if (self->is_null) + return NULL; + + skb = self->list->execute(self->list, ctx, &self->prev, &self->is_null); + if (!skb) { + self->prev = NULL; + self->is_null = true; + + /* If we have nested loops we have to make sure that next time + * we visit this value the first item of the list is returned + */ + --self->exec_count; + } + + return skb; +} + +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_next *value) +{ + struct mptcp_rbs_value_skb_list_next *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_next), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_next_print( + const struct mptcp_rbs_value_skb_list_next *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".NEXT()"); + return len; +} + +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_new( + struct mptcp_rbs_value_skb *skb, struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_value_skb_sent_on *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_sent_on), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_SENT_ON; + value->free = mptcp_rbs_value_skb_sent_on_free; + value->execute = mptcp_rbs_value_skb_sent_on_execute; + value->skb = skb; + value->sbf = sbf; + + return value; +} + +void mptcp_rbs_value_skb_sent_on_free(struct mptcp_rbs_value_skb_sent_on *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + MPTCP_RBS_VALUE_FREE(self->sbf); + kfree(self); +} + +s32 mptcp_rbs_value_skb_sent_on_execute( + struct mptcp_rbs_value_skb_sent_on *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + struct tcp_sock *sbf; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + sbf = self->sbf->execute(self->sbf, ctx); + if (!sbf) + return -1; + + return mptcp_pi_to_flag(sbf->mptcp->path_index) & + TCP_SKB_CB(skb)->path_mask; +} + +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on *value) +{ + struct mptcp_rbs_value_skb_sent_on *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_sent_on), GFP_KERNEL); + *clone = *value; + CLONE(clone->sbf); + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_sent_on_print( + const struct mptcp_rbs_value_skb_sent_on *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SENT_ON("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->sbf, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_sent_on_all *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_sent_on_all), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_SENT_ON_ALL; + value->free = mptcp_rbs_value_skb_sent_on_all_free; + value->execute = mptcp_rbs_value_skb_sent_on_all_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_sent_on_all_free( + struct mptcp_rbs_value_skb_sent_on_all *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s32 mptcp_rbs_value_skb_sent_on_all_execute( + struct mptcp_rbs_value_skb_sent_on_all *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + u32 mask; + struct tcp_sock *sbf; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + mask = TCP_SKB_CB(skb)->path_mask; + sbf = ctx->mpcb->connection_list; + + while (sbf) { + if (!(mask & mptcp_pi_to_flag(sbf->mptcp->path_index))) + return 0; + + sbf = sbf->mptcp->next; + } + + return 1; +} + +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on_all *value) +{ + struct mptcp_rbs_value_skb_sent_on_all *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_sent_on_all), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_sent_on_all_print( + const struct mptcp_rbs_value_skb_sent_on_all *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SENT_ON_ALL"); + return len; +} + +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_user *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_user), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_USER; + value->free = mptcp_rbs_value_skb_user_free; + value->execute = mptcp_rbs_value_skb_user_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_user_free(struct mptcp_rbs_value_skb_user *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s64 mptcp_rbs_value_skb_user_execute(struct mptcp_rbs_value_skb_user *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->mptcp_rbs.user; +} + +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_user *value) +{ + struct mptcp_rbs_value_skb_user *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_user), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_user_print(const struct mptcp_rbs_value_skb_user *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".USER"); + return len; +} + +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_seq *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_seq), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_SEQ; + value->free = mptcp_rbs_value_skb_seq_free; + value->execute = mptcp_rbs_value_skb_seq_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_seq_free(struct mptcp_rbs_value_skb_seq *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s64 mptcp_rbs_value_skb_seq_execute(struct mptcp_rbs_value_skb_seq *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->seq; +} + +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_seq *value) +{ + struct mptcp_rbs_value_skb_seq *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_seq), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_seq_print(const struct mptcp_rbs_value_skb_seq *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".SEQ"); + return len; +} + +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_psh *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_psh), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_PSH; + value->free = mptcp_rbs_value_skb_psh_free; + value->execute = mptcp_rbs_value_skb_psh_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_psh_free(struct mptcp_rbs_value_skb_psh *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s32 mptcp_rbs_value_skb_psh_execute(struct mptcp_rbs_value_skb_psh *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH; +} + +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_psh *value) +{ + struct mptcp_rbs_value_skb_psh *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_psh), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_psh_print(const struct mptcp_rbs_value_skb_psh *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".PSH"); + return len; +} + +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_new( + struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_value_skb_length *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_length), GFP_KERNEL); + value->kind = VALUE_KIND_SKB_LENGTH; + value->free = mptcp_rbs_value_skb_length_free; + value->execute = mptcp_rbs_value_skb_length_execute; + value->skb = skb; + + return value; +} + +void mptcp_rbs_value_skb_length_free(struct mptcp_rbs_value_skb_length *self) +{ + MPTCP_RBS_VALUE_FREE(self->skb); + kfree(self); +} + +s64 mptcp_rbs_value_skb_length_execute(struct mptcp_rbs_value_skb_length *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + + skb = self->skb->execute(self->skb, ctx); + if (!skb) + return -1; + + return TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; +} + +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_length *value) +{ + struct mptcp_rbs_value_skb_length *clone; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_length), GFP_KERNEL); + *clone = *value; + CLONE(clone->skb); + + return clone; +} + +int mptcp_rbs_value_skb_length_print(const struct mptcp_rbs_value_skb_length *value, + char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->skb, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".LENGTH"); + return len; +} + +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_empty *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_empty), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_EMPTY; + value->free = mptcp_rbs_value_skb_list_empty_free; + value->execute = mptcp_rbs_value_skb_list_empty_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_skb_list_empty_free( + struct mptcp_rbs_value_skb_list_empty *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s32 mptcp_rbs_value_skb_list_empty_execute( + struct mptcp_rbs_value_skb_list_empty *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb; + + skb = self->list->execute(self->list, ctx, &prev, &is_null); + + if (is_null) + return -1; + return skb ? 0 : 1; +} + +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_empty *value) +{ + struct mptcp_rbs_value_skb_list_empty *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_empty), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_empty_print( + const struct mptcp_rbs_value_skb_list_empty *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".EMPTY"); + return len; +} + +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_pop *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_pop), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_POP; + value->free = mptcp_rbs_value_skb_list_pop_free; + value->execute = mptcp_rbs_value_skb_list_pop_execute; + value->list = list; + value->reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_skb_list_pop_free( + struct mptcp_rbs_value_skb_list_pop *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_pop_execute( + struct mptcp_rbs_value_skb_list_pop *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb = + self->list->execute(self->list, ctx, &prev, &is_null); + + if (is_null || !skb) + return NULL; + + /* after this point, we are sure that we execute a pop */ + ctx->side_effects = 1; + + if (self->list->underlying_queue_kind == VALUE_KIND_Q) { + /* + * Pop an element from Q might be the queue_position or later + */ + if (skb == ctx->rbs_cb->queue_position) { + mptcp_rbs_advance_send_head( + ctx->meta_sk, &ctx->rbs_cb->queue_position); + mptcp_rbs_debug( + "rbs_q_pop returns %p, new queue head %p\n", skb, + ctx->rbs_cb->queue_position); + } else { + /* we can not unlink the packet, as all skbs have to + * stay in the circular buffer */ + mptcp_debug( + "%s sets not_in_queue for packet %p in Q, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + } + + return skb; + } + + if (self->list->underlying_queue_kind == VALUE_KIND_RQ) { + mptcp_debug("%s sets not_in_queue, to_free and to_unlink for " + "packet %p in RQ, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_free = 1; + TCP_SKB_CB(skb)->mptcp_rbs.flags_to_unlink = 1; + + return skb; + } + + if (self->list->underlying_queue_kind == VALUE_KIND_QU) { + mptcp_debug( + "%s sets not_in_queue for packet %p in QU, was %u\n", + __func__, skb, + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue); + TCP_SKB_CB(skb)->mptcp_rbs.flags_not_in_queue = 1; + return skb; + } + + mptcp_rbs_debug("mptcp_rbs_value_skb_list_pop_execute on " + "unexpected list kind %u\n", + self->list->kind); + return NULL; +} + +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_pop *value) +{ + struct mptcp_rbs_value_skb_list_pop *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_pop), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_pop_print( + const struct mptcp_rbs_value_skb_list_pop *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".POP()"); + return len; +} + +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_new( + void) +{ + struct mptcp_rbs_value_skb_list_filter *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_filter), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_FILTER; + value->free = mptcp_rbs_value_skb_list_filter_free; + value->execute = mptcp_rbs_value_skb_list_filter_execute; + /* value->list and value->cond are set later */ + + return value; +} + +void mptcp_rbs_value_skb_list_filter_free( + struct mptcp_rbs_value_skb_list_filter *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->cond); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_filter_execute( + struct mptcp_rbs_value_skb_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null) +{ + struct sk_buff *skb; + s32 b; + + skb = self->list->execute(self->list, ctx, prev, is_null); + if (*is_null) + return NULL; + + while (skb) { + self->progress.cur = skb; + b = self->cond->execute(self->cond, ctx); + if (b > 0) + break; + + skb = self->list->execute(self->list, ctx, prev, is_null); + } + + return skb; +} + +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter *value) +{ + struct mptcp_rbs_value_skb_list_filter *clone; + int i; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_filter), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + for (i = 0; i < MAX_NESTING; ++i) { + if (!ctx->repls[i].repl) + break; + } + BUG_ON(i == MAX_NESTING); + + ctx->repls[i].repl = &value->progress; + ctx->repls[i].repl_with = &clone->progress; + CLONE(clone->cond); + ctx->repls[i].repl = NULL; + + return clone; +} + +int mptcp_rbs_value_skb_list_filter_print( + const struct mptcp_rbs_value_skb_list_filter *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".FILTER(v%p => ", &value->progress); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->cond, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + return len; +} + +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_new( + struct mptcp_rbs_value_skb_list_filter_progress *progress) +{ + struct mptcp_rbs_value_skb_list_filter_skb *value; + + value = kzalloc(sizeof(struct mptcp_rbs_value_skb_list_filter_skb), + GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_FILTER_SKB; + value->free = mptcp_rbs_value_skb_list_filter_skb_free; + value->execute = mptcp_rbs_value_skb_list_filter_skb_execute; + value->progress = progress; + value->reinject = progress->reinject; + + return value; +} + +void mptcp_rbs_value_skb_list_filter_skb_free( + struct mptcp_rbs_value_skb_list_filter_skb *self) +{ + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_filter_skb_execute( + struct mptcp_rbs_value_skb_list_filter_skb *self, + struct mptcp_rbs_eval_ctx *ctx) +{ + return self->progress->cur; +} + +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter_skb *value) +{ + struct mptcp_rbs_value_skb_list_filter_skb *clone; + int i; + + clone = kmalloc(sizeof(struct mptcp_rbs_value_skb_list_filter_skb), + GFP_KERNEL); + *clone = *value; + + for (i = 0; i < MAX_NESTING; ++i) { + if (clone->progress == ctx->repls[i].repl) { + clone->progress = ctx->repls[i].repl_with; + break; + } + } + BUG_ON(i == MAX_NESTING); + + return clone; +} + +int mptcp_rbs_value_skb_list_filter_skb_print( + const struct mptcp_rbs_value_skb_list_filter_skb *value, char *buffer) +{ + return sprintf_null(&buffer, "v%p", value->progress); +} + +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_count *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_count), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_COUNT; + value->free = mptcp_rbs_value_skb_list_count_free; + value->execute = mptcp_rbs_value_skb_list_count_execute; + value->list = list; + + return value; +} + +void mptcp_rbs_value_skb_list_count_free( + struct mptcp_rbs_value_skb_list_count *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +s64 mptcp_rbs_value_skb_list_count_execute( + struct mptcp_rbs_value_skb_list_count *self, struct mptcp_rbs_eval_ctx *ctx) +{ + struct sk_buff *skb; + void *prev = NULL; + bool is_null; + int n = 0; + + skb = self->list->execute(self->list, ctx, &prev, &is_null); + if (is_null) + return -1; + + while (skb) { + ++n; + skb = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return n; +} + +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_count *value) +{ + struct mptcp_rbs_value_skb_list_count *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_count), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_count_print( + const struct mptcp_rbs_value_skb_list_count *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".COUNT"); + return len; +} + +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_new( + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_value_skb_list_top *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_top), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_TOP; + value->free = mptcp_rbs_value_skb_list_top_free; + value->execute = mptcp_rbs_value_skb_list_top_execute; + value->list = list; + value->reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + + return value; +} + +void mptcp_rbs_value_skb_list_top_free( + struct mptcp_rbs_value_skb_list_top *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_top_execute( + struct mptcp_rbs_value_skb_list_top *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb = + self->list->execute(self->list, ctx, &prev, &is_null); + + /* + * IMPORTANT: do not unset TCP_SKB_CB(skb)->mptcp_rbs_... here! + * only POP might set it, once it is set, it remains forever! + */ + + if (is_null) + return NULL; + + return skb; +} + +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_top *value) +{ + struct mptcp_rbs_value_skb_list_top *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_top), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + + return clone; +} + +int mptcp_rbs_value_skb_list_top_print( + const struct mptcp_rbs_value_skb_list_top *value, char *buffer) +{ + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".TOP"); + return len; +} + +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_new( + struct mptcp_rbs_value_skb_list *list, struct mptcp_rbs_value_int *index) +{ + struct mptcp_rbs_value_skb_list_get *value; + + value = + kzalloc(sizeof(struct mptcp_rbs_value_skb_list_get), GFP_KERNEL); + value->kind = VALUE_KIND_SKBLIST_GET; + value->free = mptcp_rbs_value_skb_list_get_free; + value->execute = mptcp_rbs_value_skb_list_get_execute; + value->list = list; + value->index = index; + + return value; +} + +void mptcp_rbs_value_skb_list_get_free( + struct mptcp_rbs_value_skb_list_get *self) +{ + MPTCP_RBS_VALUE_FREE(self->list); + MPTCP_RBS_VALUE_FREE(self->index); + kfree(self); +} + +struct sk_buff *mptcp_rbs_value_skb_list_get_execute( + struct mptcp_rbs_value_skb_list_get *self, struct mptcp_rbs_eval_ctx *ctx) +{ + void *prev = NULL; + bool is_null; + struct sk_buff *skb; + s64 idx; + + skb = self->list->execute(self->list, ctx, &prev, &is_null); + idx = self->index->execute(self->index, ctx); + + /* + * IMPORTANT: do not unset TCP_SKB_CB(skb)->mptcp_rbs_... here! + * only POP might set it, once it is set, it remains forever! + */ + + if (is_null || idx < 0) + return NULL; + + while (skb && idx) { + idx--; + skb = self->list->execute(self->list, ctx, &prev, &is_null); + } + + return skb; +} + +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_get *value) +{ + struct mptcp_rbs_value_skb_list_get *clone; + + clone = + kmalloc(sizeof(struct mptcp_rbs_value_skb_list_get), GFP_KERNEL); + *clone = *value; + CLONE(clone->list); + CLONE(clone->index); + + return clone; +} + +int mptcp_rbs_value_skb_list_get_print( + const struct mptcp_rbs_value_skb_list_get *value, char *buffer) +{ + int tmp_len; + int len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->list, buffer); + if (buffer) + buffer += len; + + len += sprintf_null(&buffer, ".GET("); + + tmp_len = mptcp_rbs_value_print( + (const struct mptcp_rbs_value *) value->index, buffer); + len += tmp_len; + if (buffer) + buffer += tmp_len; + + len += sprintf_null(&buffer, ")"); + + return len; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +enum mptcp_rbs_type_kind mptcp_rbs_value_get_type( + enum mptcp_rbs_value_kind kind) +{ + switch (kind) { + /* Built-in values */ + case VALUE_KIND_CONSTINT: + return TYPE_KIND_INT; + case VALUE_KIND_CONSTSTRING: + return TYPE_KIND_STRING; + case VALUE_KIND_NULL: + return TYPE_KIND_NULL; + case VALUE_KIND_BOOL_VAR: + return TYPE_KIND_BOOL; + case VALUE_KIND_INT_VAR: + return TYPE_KIND_INT; + case VALUE_KIND_STRING_VAR: + return TYPE_KIND_STRING; + case VALUE_KIND_SBF_VAR: + return TYPE_KIND_SBF; + case VALUE_KIND_SBFLIST_VAR: + return TYPE_KIND_SBFLIST; + case VALUE_KIND_SKB_VAR: + return TYPE_KIND_SKB; + case VALUE_KIND_SKBLIST_VAR: + return TYPE_KIND_SKBLIST; + case VALUE_KIND_NOT: + case VALUE_KIND_EQUAL: + case VALUE_KIND_UNEQUAL: + case VALUE_KIND_LESS: + case VALUE_KIND_LESS_EQUAL: + case VALUE_KIND_GREATER: + case VALUE_KIND_GREATER_EQUAL: + case VALUE_KIND_AND: + case VALUE_KIND_OR: + return TYPE_KIND_BOOL; + case VALUE_KIND_ADD: + case VALUE_KIND_SUBTRACT: + case VALUE_KIND_MULTIPLY: + case VALUE_KIND_DIVIDE: + case VALUE_KIND_REMAINDER: + return TYPE_KIND_INT; + case VALUE_KIND_IS_NULL: + case VALUE_KIND_IS_NOT_NULL: + return TYPE_KIND_BOOL; + case VALUE_KIND_REG: + return TYPE_KIND_INT; + case VALUE_KIND_SBFLIST_NEXT: + return TYPE_KIND_SBF; + case VALUE_KIND_SKBLIST_NEXT: + return TYPE_KIND_SKB; + +/* Custom values */ +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_GET_VALUE_TYPE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } +} +#pragma GCC diagnostic pop + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +struct mptcp_rbs_value *mptcp_rbs_value_clone_ex( + struct mptcp_rbs_value_clone_ctx *ctx, const struct mptcp_rbs_value *value) +{ + if (ctx->user_func) { + struct mptcp_rbs_value *clone; + + clone = ctx->user_func(ctx->user_ctx, value); + if (clone) + return clone; + } + + switch (value->kind) { + /* Built-in values */ + APPLY_CLONE_VALUE(VALUE_KIND_CONSTINT, , + mptcp_rbs_value_constint, ) + APPLY_CLONE_VALUE(VALUE_KIND_CONSTSTRING, , + mptcp_rbs_value_conststring, ) + APPLY_CLONE_VALUE(VALUE_KIND_NULL, , mptcp_rbs_value_null, ) + APPLY_CLONE_VALUE(VALUE_KIND_BOOL_VAR, , + mptcp_rbs_value_bool_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_INT_VAR, , + mptcp_rbs_value_int_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_STRING_VAR, , + mptcp_rbs_value_string_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SBF_VAR, , + mptcp_rbs_value_sbf_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SBFLIST_VAR, , + mptcp_rbs_value_sbf_list_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SKB_VAR, , + mptcp_rbs_value_skb_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_SKBLIST_VAR, , + mptcp_rbs_value_skb_list_var, ) + APPLY_CLONE_VALUE(VALUE_KIND_NOT, , mptcp_rbs_value_not, ) + APPLY_CLONE_VALUE(VALUE_KIND_EQUAL, , mptcp_rbs_value_equal, ) + APPLY_CLONE_VALUE(VALUE_KIND_UNEQUAL, , + mptcp_rbs_value_unequal, ) + APPLY_CLONE_VALUE(VALUE_KIND_LESS, , mptcp_rbs_value_less, ) + APPLY_CLONE_VALUE(VALUE_KIND_LESS_EQUAL, , + mptcp_rbs_value_less_equal, ) + APPLY_CLONE_VALUE(VALUE_KIND_GREATER, , + mptcp_rbs_value_greater, ) + APPLY_CLONE_VALUE(VALUE_KIND_GREATER_EQUAL, , + mptcp_rbs_value_greater_equal, ) + APPLY_CLONE_VALUE(VALUE_KIND_AND, , mptcp_rbs_value_and, ) + APPLY_CLONE_VALUE(VALUE_KIND_OR, , mptcp_rbs_value_or, ) + APPLY_CLONE_VALUE(VALUE_KIND_ADD, , mptcp_rbs_value_add, ) + APPLY_CLONE_VALUE(VALUE_KIND_SUBTRACT, , + mptcp_rbs_value_subtract, ) + APPLY_CLONE_VALUE(VALUE_KIND_MULTIPLY, , + mptcp_rbs_value_multiply, ) + APPLY_CLONE_VALUE(VALUE_KIND_DIVIDE, , mptcp_rbs_value_divide, ) + APPLY_CLONE_VALUE(VALUE_KIND_REMAINDER, , + mptcp_rbs_value_remainder, ) + APPLY_CLONE_VALUE(VALUE_KIND_IS_NULL, , + mptcp_rbs_value_is_null, ) + APPLY_CLONE_VALUE(VALUE_KIND_IS_NOT_NULL, , + mptcp_rbs_value_is_not_null, ) + APPLY_CLONE_VALUE(VALUE_KIND_REG, , mptcp_rbs_value_reg, ) + APPLY_CLONE_VALUE(VALUE_KIND_SBFLIST_NEXT, , + mptcp_rbs_value_sbf_list_next, ) + APPLY_CLONE_VALUE(VALUE_KIND_SKBLIST_NEXT, , + mptcp_rbs_value_skb_list_next, ) + +/* Custom values */ +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_CLONE_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } +} +#pragma GCC diagnostic pop + +struct mptcp_rbs_value *mptcp_rbs_value_clone( + const struct mptcp_rbs_value *value, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func) +{ + struct mptcp_rbs_value_clone_ctx ctx; + + memset(&ctx, 0, sizeof(struct mptcp_rbs_value_clone_ctx)); + ctx.user_ctx = user_ctx; + ctx.user_func = user_func; + return mptcp_rbs_value_clone_ex(&ctx, value); +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +#pragma GCC diagnostic ignored "-Wreturn-type" +int mptcp_rbs_value_print(const struct mptcp_rbs_value *value, char *buffer) +{ + switch (value->kind) { + /* Built-in values */ + APPLY_PRINT_VALUE(VALUE_KIND_CONSTINT, , + mptcp_rbs_value_constint, ) + APPLY_PRINT_VALUE(VALUE_KIND_CONSTSTRING, , + mptcp_rbs_value_conststring, ) + APPLY_PRINT_VALUE(VALUE_KIND_NULL, , mptcp_rbs_value_null, ) + APPLY_PRINT_VALUE(VALUE_KIND_BOOL_VAR, , + mptcp_rbs_value_bool_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_INT_VAR, , + mptcp_rbs_value_int_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_STRING_VAR, , + mptcp_rbs_value_string_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SBF_VAR, , + mptcp_rbs_value_sbf_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SBFLIST_VAR, , + mptcp_rbs_value_sbf_list_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SKB_VAR, , + mptcp_rbs_value_skb_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_SKBLIST_VAR, , + mptcp_rbs_value_skb_list_var, ) + APPLY_PRINT_VALUE(VALUE_KIND_NOT, , mptcp_rbs_value_not, ) + APPLY_PRINT_VALUE(VALUE_KIND_EQUAL, , mptcp_rbs_value_equal, ) + APPLY_PRINT_VALUE(VALUE_KIND_UNEQUAL, , + mptcp_rbs_value_unequal, ) + APPLY_PRINT_VALUE(VALUE_KIND_LESS, , mptcp_rbs_value_less, ) + APPLY_PRINT_VALUE(VALUE_KIND_LESS_EQUAL, , + mptcp_rbs_value_less_equal, ) + APPLY_PRINT_VALUE(VALUE_KIND_GREATER, , + mptcp_rbs_value_greater, ) + APPLY_PRINT_VALUE(VALUE_KIND_GREATER_EQUAL, , + mptcp_rbs_value_greater_equal, ) + APPLY_PRINT_VALUE(VALUE_KIND_AND, , mptcp_rbs_value_and, ) + APPLY_PRINT_VALUE(VALUE_KIND_OR, , mptcp_rbs_value_or, ) + APPLY_PRINT_VALUE(VALUE_KIND_ADD, , mptcp_rbs_value_add, ) + APPLY_PRINT_VALUE(VALUE_KIND_SUBTRACT, , + mptcp_rbs_value_subtract, ) + APPLY_PRINT_VALUE(VALUE_KIND_MULTIPLY, , + mptcp_rbs_value_multiply, ) + APPLY_PRINT_VALUE(VALUE_KIND_DIVIDE, , mptcp_rbs_value_divide, ) + APPLY_PRINT_VALUE(VALUE_KIND_REMAINDER, , + mptcp_rbs_value_remainder, ) + APPLY_PRINT_VALUE(VALUE_KIND_IS_NULL, , + mptcp_rbs_value_is_null, ) + APPLY_PRINT_VALUE(VALUE_KIND_IS_NOT_NULL, , + mptcp_rbs_value_is_not_null, ) + APPLY_PRINT_VALUE(VALUE_KIND_REG, , mptcp_rbs_value_reg, ) + APPLY_PRINT_VALUE(VALUE_KIND_SBFLIST_NEXT, , + mptcp_rbs_value_sbf_list_next, ) + APPLY_PRINT_VALUE(VALUE_KIND_SKBLIST_NEXT, , + mptcp_rbs_value_skb_list_next, ) + +/* Custom values */ +#define RBS_APPLY(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SBF_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) +#define RBS_APPLY_ON_SKB_LIST(ENUM, STR, STRUCT, RETURNTYPE) \ + APPLY_PRINT_VALUE(ENUM, STR, STRUCT, RETURNTYPE) + MPTCP_RBS_VALUE_INFO +#undef RBS_APPLY +#undef RBS_APPLY_ON_SBF +#undef RBS_APPLY_ON_SBF_LIST +#undef RBS_APPLY_ON_SKB +#undef RBS_APPLY_ON_SKB_LIST + } +} +#pragma GCC diagnostic pop diff --git a/net/mptcp/mptcp_rbs_value.h b/net/mptcp/mptcp_rbs_value.h new file mode 100644 index 0000000000000..981762cab698f --- /dev/null +++ b/net/mptcp/mptcp_rbs_value.h @@ -0,0 +1,2098 @@ +#ifndef _MPTCP_RBS_AST_H +#define _MPTCP_RBS_AST_H + +#include "mptcp_rbs_type.h" +#include + +struct mptcp_rbs_eval_ctx; +struct mptcp_rbs_sbf_cb; +struct mptcp_rbs_value_clone_ctx; +struct sk_buff_head; + +/* + * The following macro describes all values that can be parsed. + * To add a value add a new line to the macro with the following content: + * ( + * , + * , + * , + * + * ) + * + * The determines the value on which the new value can be + * applied. For example RQ.getvalue has the macro RBS_APPLY_ON_SKB_LIST because + * getvalue is applied on RQ of type sockbuffer list. Possible macros are: + * RBS_APPLY if the value needs no other value to be applied + * RBS_APPLY_ON_SBF + * RBS_APPLY_ON_SBF_LIST + * RBS_APPLY_ON_SKB + * RBS_APPLY_ON_SKB_LIST + */ +#define MPTCP_RBS_VALUE_INFO \ + RBS_APPLY(VALUE_KIND_Q, "Q", mptcp_rbs_value_q, TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_QU, "QU", mptcp_rbs_value_qu, TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_RQ, "RQ", mptcp_rbs_value_rq, TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_SUBFLOWS, "SUBFLOWS", mptcp_rbs_value_subflows, \ + TYPE_KIND_SBFLIST) \ + RBS_APPLY(VALUE_KIND_CURRENT_TIME_MS, "CURRENT_TIME_MS", \ + mptcp_rbs_value_current_time_ms, TYPE_KIND_INT) \ + RBS_APPLY(VALUE_KIND_RANDOM, "RANDOM", mptcp_rbs_value_random, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_RTT, "RTT", mptcp_rbs_value_sbf_rtt, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_RTT_MS, "RTT_MS", mptcp_rbs_value_sbf_rtt_ms, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_RTT_VAR, "RTT_VAR", mptcp_rbs_value_sbf_rtt_var, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_USER, "USER", mptcp_rbs_value_sbf_user, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_IS_BACKUP, "IS_BACKUP", \ + mptcp_rbs_value_sbf_is_backup, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_CWND, "CWND", \ + mptcp_rbs_value_sbf_cwnd, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_QUEUED, "QUEUED", \ + mptcp_rbs_value_sbf_queued, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_SKBS_IN_FLIGHT, "SKBS_IN_FLIGHT", \ + mptcp_rbs_value_sbf_skbs_in_flight, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_LOST_SKBS, "LOST_SKBS", \ + mptcp_rbs_value_sbf_lost_skbs, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_HAS_WINDOW_FOR, "HAS_WINDOW_FOR", \ + mptcp_rbs_value_sbf_has_window_for, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_ID, "ID", mptcp_rbs_value_sbf_id, \ + TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_DELAY_IN, "DELAY_IN", \ + mptcp_rbs_value_sbf_delay_in, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_DELAY_OUT, "DELAY_OUT", \ + mptcp_rbs_value_sbf_delay_out, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_BW_OUT_ACK, "BW_OUT_ACK", \ + mptcp_rbs_value_sbf_bw_out_ack, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_BW_OUT_SEND, "BW_OUT_SEND", \ + mptcp_rbs_value_sbf_bw_out_send, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_SSTHRESH, "SSTHRESH", \ + mptcp_rbs_value_sbf_ssthresh, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_THROTTLED, "THROTTLED", \ + mptcp_rbs_value_sbf_throttled, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF(VALUE_KIND_SBF_LOSSY, "LOSSY", \ + mptcp_rbs_value_sbf_lossy, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_EMPTY, "EMPTY", \ + mptcp_rbs_value_sbf_list_empty, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_FILTER, "FILTER", \ + mptcp_rbs_value_sbf_list_filter, \ + TYPE_KIND_SBFLIST) \ + RBS_APPLY(VALUE_KIND_SBFLIST_FILTER_SBF, "", \ + mptcp_rbs_value_sbf_list_filter_sbf, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_MAX, "MAX", \ + mptcp_rbs_value_sbf_list_max, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_MIN, "MIN", \ + mptcp_rbs_value_sbf_list_min, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_GET, "GET", \ + mptcp_rbs_value_sbf_list_get, TYPE_KIND_SBF) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_COUNT, "COUNT", \ + mptcp_rbs_value_sbf_list_count, TYPE_KIND_INT) \ + RBS_APPLY_ON_SBF_LIST(VALUE_KIND_SBFLIST_SUM, "SUM", \ + mptcp_rbs_value_sbf_list_sum, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_SENT_ON, "SENT_ON", \ + mptcp_rbs_value_skb_sent_on, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_SENT_ON_ALL, "SENT_ON_ALL", \ + mptcp_rbs_value_skb_sent_on_all, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_USER, "USER", \ + mptcp_rbs_value_skb_user, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_SEQ, "SEQ", \ + mptcp_rbs_value_skb_seq, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_PSH, "PSH", \ + mptcp_rbs_value_skb_psh, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB(VALUE_KIND_SKB_LENGTH, "LENGTH", \ + mptcp_rbs_value_skb_length, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_EMPTY, "EMPTY", \ + mptcp_rbs_value_skb_list_empty, TYPE_KIND_BOOL) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_POP, "POP", \ + mptcp_rbs_value_skb_list_pop, TYPE_KIND_SKB) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_FILTER, "FILTER", \ + mptcp_rbs_value_skb_list_filter, \ + TYPE_KIND_SKBLIST) \ + RBS_APPLY(VALUE_KIND_SKBLIST_FILTER_SKB, "", \ + mptcp_rbs_value_skb_list_filter_skb, TYPE_KIND_SKB) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_COUNT, "COUNT", \ + mptcp_rbs_value_skb_list_count, TYPE_KIND_INT) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_TOP, "TOP", \ + mptcp_rbs_value_skb_list_top, TYPE_KIND_SKB) \ + RBS_APPLY_ON_SKB_LIST(VALUE_KIND_SKBLIST_GET, "GET", \ + mptcp_rbs_value_skb_list_get, TYPE_KIND_SKB) + +enum mptcp_rbs_value_kind { + /* Literals */ + VALUE_KIND_CONSTINT, + VALUE_KIND_CONSTSTRING, + VALUE_KIND_NULL, + + /* Used variables */ + VALUE_KIND_BOOL_VAR, + VALUE_KIND_INT_VAR, + VALUE_KIND_STRING_VAR, + VALUE_KIND_SBF_VAR, + VALUE_KIND_SBFLIST_VAR, + VALUE_KIND_SKB_VAR, + VALUE_KIND_SKBLIST_VAR, + + /* Operators */ + VALUE_KIND_NOT, + VALUE_KIND_EQUAL, + VALUE_KIND_UNEQUAL, + VALUE_KIND_LESS, + VALUE_KIND_LESS_EQUAL, + VALUE_KIND_GREATER, + VALUE_KIND_GREATER_EQUAL, + VALUE_KIND_AND, + VALUE_KIND_OR, + VALUE_KIND_ADD, + VALUE_KIND_SUBTRACT, + VALUE_KIND_MULTIPLY, + VALUE_KIND_DIVIDE, + VALUE_KIND_REMAINDER, + VALUE_KIND_IS_NULL, + VALUE_KIND_IS_NOT_NULL, + + /* Registers */ + VALUE_KIND_REG, + + /* Functions & properties */ + VALUE_KIND_Q, + VALUE_KIND_QU, + VALUE_KIND_RQ, + VALUE_KIND_SUBFLOWS, + VALUE_KIND_CURRENT_TIME_MS, + VALUE_KIND_RANDOM, + + /* Functions & properties on subflows */ + VALUE_KIND_SBF_RTT, + VALUE_KIND_SBF_RTT_MS, + VALUE_KIND_SBF_RTT_VAR, + VALUE_KIND_SBF_USER, + VALUE_KIND_SBF_IS_BACKUP, + VALUE_KIND_SBF_CWND, + VALUE_KIND_SBF_QUEUED, + VALUE_KIND_SBF_SKBS_IN_FLIGHT, + VALUE_KIND_SBF_LOST_SKBS, + VALUE_KIND_SBF_HAS_WINDOW_FOR, + VALUE_KIND_SBF_ID, + VALUE_KIND_SBF_DELAY_IN, + VALUE_KIND_SBF_DELAY_OUT, + VALUE_KIND_SBF_BW_OUT_SEND, + VALUE_KIND_SBF_BW_OUT_ACK, + VALUE_KIND_SBF_SSTHRESH, + VALUE_KIND_SBF_THROTTLED, + VALUE_KIND_SBF_LOSSY, + + /* Functions & properties on subflow lists */ + VALUE_KIND_SBFLIST_NEXT, + VALUE_KIND_SBFLIST_EMPTY, + VALUE_KIND_SBFLIST_FILTER, + VALUE_KIND_SBFLIST_FILTER_SBF, + VALUE_KIND_SBFLIST_MAX, + VALUE_KIND_SBFLIST_MIN, + VALUE_KIND_SBFLIST_GET, + VALUE_KIND_SBFLIST_COUNT, + VALUE_KIND_SBFLIST_SUM, + + /* Functions & properties on sockbuffers */ + VALUE_KIND_SKB_SENT_ON, + VALUE_KIND_SKB_SENT_ON_ALL, + VALUE_KIND_SKB_USER, + VALUE_KIND_SKB_SEQ, + VALUE_KIND_SKB_PSH, + VALUE_KIND_SKB_LENGTH, + + /* Functions & properties on sockbuffer lists */ + VALUE_KIND_SKBLIST_NEXT, + VALUE_KIND_SKBLIST_EMPTY, + VALUE_KIND_SKBLIST_POP, + VALUE_KIND_SKBLIST_FILTER, + VALUE_KIND_SKBLIST_FILTER_SKB, + VALUE_KIND_SKBLIST_COUNT, + VALUE_KIND_SKBLIST_TOP, + VALUE_KIND_SKBLIST_GET +}; + +/* Macro to release values */ +#define MPTCP_RBS_VALUE_FREE(val) \ + if (val) \ + val->free(val); + +/* + * Base struct for all values + */ +struct mptcp_rbs_value { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value *self); +}; + +/* + * Base struct for boolean values. The execute function returns -1 on null, 0 on + * false and 1 on true + */ +struct mptcp_rbs_value_bool { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_bool *self); + s32 (*execute)(struct mptcp_rbs_value_bool *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for integer values. The execute function returns -1 on null and + * positive values < 2^32 on success + */ +struct mptcp_rbs_value_int { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_int *self); + s64 (*execute)(struct mptcp_rbs_value_int *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for string values. The execute function returns NULL on null + */ +struct mptcp_rbs_value_string { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_string *self); + char *(*execute)(struct mptcp_rbs_value_string *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for subflow values. The execute function returns NULL on null + */ +struct mptcp_rbs_value_sbf { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +/* + * Base struct for subflow list values + */ +struct mptcp_rbs_value_sbf_list { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); +}; + +/* + * Base struct for sockbuffer values. The execute function returns NULL on null + */ +struct mptcp_rbs_value_skb { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; +}; + +/* + * Base struct for sockbuffer list values + */ +struct mptcp_rbs_value_skb_list { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +/* + * Integer literal value + */ +struct mptcp_rbs_value_constint { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_constint *self); + s64 (*execute)(struct mptcp_rbs_value_constint *self, + struct mptcp_rbs_eval_ctx *ctx); + unsigned int value; +}; + +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_new(unsigned int num); +void mptcp_rbs_value_constint_free(struct mptcp_rbs_value_constint *self); +s64 mptcp_rbs_value_constint_execute(struct mptcp_rbs_value_constint *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_constint *mptcp_rbs_value_constint_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_constint *value); +int mptcp_rbs_value_constint_print(const struct mptcp_rbs_value_constint *value, + char *buffer); + +/* + * String literal value + */ +struct mptcp_rbs_value_conststring { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_conststring *self); + char *(*execute)(struct mptcp_rbs_value_conststring *self, + struct mptcp_rbs_eval_ctx *ctx); + char *value; +}; + +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_new(char *str); +void mptcp_rbs_value_conststring_free(struct mptcp_rbs_value_conststring *self); +char *mptcp_rbs_value_conststring_execute( + struct mptcp_rbs_value_conststring *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_conststring *mptcp_rbs_value_conststring_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_conststring *value); +int mptcp_rbs_value_conststring_print( + const struct mptcp_rbs_value_conststring *value, char *buffer); + +/* + * NULL literal value + */ +struct mptcp_rbs_value_null { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_null *self); + s32 (*execute)(struct mptcp_rbs_value_null *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +struct mptcp_rbs_value_null *mptcp_rbs_value_null_new(void); +void mptcp_rbs_value_null_free(struct mptcp_rbs_value_null *self); +s32 mptcp_rbs_value_null_execute(struct mptcp_rbs_value_null *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_null *mptcp_rbs_value_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_null *value); +int mptcp_rbs_value_null_print(const struct mptcp_rbs_value_null *value, + char *buffer); + +/* + * Boolean variable value + */ +struct mptcp_rbs_value_bool_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_bool_var *self); + s32 (*execute)(struct mptcp_rbs_value_bool_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_new(int var_number); +void mptcp_rbs_value_bool_var_free(struct mptcp_rbs_value_bool_var *self); +s32 mptcp_rbs_value_bool_var_execute(struct mptcp_rbs_value_bool_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_bool_var *mptcp_rbs_value_bool_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_bool_var *value); +int mptcp_rbs_value_bool_var_print(const struct mptcp_rbs_value_bool_var *value, + char *buffer); + +/* + * Integer variable value + */ +struct mptcp_rbs_value_int_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_int_var *self); + s64 (*execute)(struct mptcp_rbs_value_int_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_new(int var_number); +void mptcp_rbs_value_int_var_free(struct mptcp_rbs_value_int_var *self); +s64 mptcp_rbs_value_int_var_execute(struct mptcp_rbs_value_int_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_int_var *mptcp_rbs_value_int_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_int_var *value); +int mptcp_rbs_value_int_var_print(const struct mptcp_rbs_value_int_var *value, + char *buffer); + +/* + * String variable value + */ +struct mptcp_rbs_value_string_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_string_var *self); + char *(*execute)(struct mptcp_rbs_value_string_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_new( + int var_number); +void mptcp_rbs_value_string_var_free(struct mptcp_rbs_value_string_var *self); +char *mptcp_rbs_value_string_var_execute( + struct mptcp_rbs_value_string_var *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_string_var *mptcp_rbs_value_string_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_string_var *value); +int mptcp_rbs_value_string_var_print( + const struct mptcp_rbs_value_string_var *value, char *buffer); + +/* + * Subflow variable value + */ +struct mptcp_rbs_value_sbf_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_var *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_var *self, + struct mptcp_rbs_eval_ctx *ctx); + int var_number; +}; + +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_new(int var_number); +void mptcp_rbs_value_sbf_var_free(struct mptcp_rbs_value_sbf_var *self); +struct tcp_sock *mptcp_rbs_value_sbf_var_execute( + struct mptcp_rbs_value_sbf_var *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_var *mptcp_rbs_value_sbf_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_var *value); +int mptcp_rbs_value_sbf_var_print(const struct mptcp_rbs_value_sbf_var *value, + char *buffer); + +/* + * Subflow list variable value + */ +struct mptcp_rbs_value_sbf_list_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_var *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_var *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + int var_number; +}; + +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_new( + int var_number); +void mptcp_rbs_value_sbf_list_var_free( + struct mptcp_rbs_value_sbf_list_var *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_var_execute( + struct mptcp_rbs_value_sbf_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_sbf_list_var *mptcp_rbs_value_sbf_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_var *value); +int mptcp_rbs_value_sbf_list_var_print( + const struct mptcp_rbs_value_sbf_list_var *value, char *buffer); + +/* + * Sockbuffer variable value + */ +struct mptcp_rbs_value_skb_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_var *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_var *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + int var_number; +}; + +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_new(int var_number, + bool reinject); +void mptcp_rbs_value_skb_var_free(struct mptcp_rbs_value_skb_var *self); +struct sk_buff *mptcp_rbs_value_skb_var_execute( + struct mptcp_rbs_value_skb_var *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_var *mptcp_rbs_value_skb_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_var *value); +int mptcp_rbs_value_skb_var_print(const struct mptcp_rbs_value_skb_var *value, + char *buffer); + +/* + * Sockbuffer list variable value + */ +struct mptcp_rbs_value_skb_list_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_var *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_var *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; + int var_number; +}; + +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_new( + int var_number, enum mptcp_rbs_value_kind underlying_queue_kind); +void mptcp_rbs_value_skb_list_var_free( + struct mptcp_rbs_value_skb_list_var *self); +struct sk_buff *mptcp_rbs_value_skb_list_var_execute( + struct mptcp_rbs_value_skb_list_var *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_skb_list_var *mptcp_rbs_value_skb_list_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_var *value); +int mptcp_rbs_value_skb_list_var_print( + const struct mptcp_rbs_value_skb_list_var *value, char *buffer); + +/* + * NOT operator value + */ +struct mptcp_rbs_value_not { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_not *self); + s32 (*execute)(struct mptcp_rbs_value_not *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_bool *operand; +}; + +struct mptcp_rbs_value_not *mptcp_rbs_value_not_new( + struct mptcp_rbs_value_bool *operand); +void mptcp_rbs_value_not_free(struct mptcp_rbs_value_not *self); +s32 mptcp_rbs_value_not_execute(struct mptcp_rbs_value_not *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_not *mptcp_rbs_value_not_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_not *value); +int mptcp_rbs_value_not_print(const struct mptcp_rbs_value_not *value, + char *buffer); + +/* + * == operator value + */ +struct mptcp_rbs_value_equal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_equal *self); + s32 (*execute)(struct mptcp_rbs_value_equal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_equal_free(struct mptcp_rbs_value_equal *self); +s32 mptcp_rbs_value_equal_execute(struct mptcp_rbs_value_equal *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_equal *mptcp_rbs_value_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_equal *value); +int mptcp_rbs_value_equal_print(const struct mptcp_rbs_value_equal *value, + char *buffer); + +/* + * != operator value + */ +struct mptcp_rbs_value_unequal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_unequal *self); + s32 (*execute)(struct mptcp_rbs_value_unequal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_unequal_free(struct mptcp_rbs_value_unequal *self); +s32 mptcp_rbs_value_unequal_execute(struct mptcp_rbs_value_unequal *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_unequal *mptcp_rbs_value_unequal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_unequal *value); +int mptcp_rbs_value_unequal_print(const struct mptcp_rbs_value_unequal *value, + char *buffer); + +/* + * < operator value + */ +struct mptcp_rbs_value_less { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_less *self); + s32 (*execute)(struct mptcp_rbs_value_less *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_less *mptcp_rbs_value_less_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_less_free(struct mptcp_rbs_value_less *self); +s32 mptcp_rbs_value_less_execute(struct mptcp_rbs_value_less *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_less *mptcp_rbs_value_less_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less *value); +int mptcp_rbs_value_less_print(const struct mptcp_rbs_value_less *value, + char *buffer); + +/* + * <= operator value + */ +struct mptcp_rbs_value_less_equal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_less_equal *self); + s32 (*execute)(struct mptcp_rbs_value_less_equal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_less_equal_free(struct mptcp_rbs_value_less_equal *self); +s32 mptcp_rbs_value_less_equal_execute(struct mptcp_rbs_value_less_equal *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_less_equal *mptcp_rbs_value_less_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_less_equal *value); +int mptcp_rbs_value_less_equal_print( + const struct mptcp_rbs_value_less_equal *value, char *buffer); + +/* + * > operator value + */ +struct mptcp_rbs_value_greater { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_greater *self); + s32 (*execute)(struct mptcp_rbs_value_greater *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_greater_free(struct mptcp_rbs_value_greater *self); +s32 mptcp_rbs_value_greater_execute(struct mptcp_rbs_value_greater *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_greater *mptcp_rbs_value_greater_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater *value); +int mptcp_rbs_value_greater_print(const struct mptcp_rbs_value_greater *value, + char *buffer); + +/* + * >= operator value + */ +struct mptcp_rbs_value_greater_equal { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_greater_equal *self); + s32 (*execute)(struct mptcp_rbs_value_greater_equal *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_greater_equal_free( + struct mptcp_rbs_value_greater_equal *self); +s32 mptcp_rbs_value_greater_equal_execute( + struct mptcp_rbs_value_greater_equal *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_greater_equal *mptcp_rbs_value_greater_equal_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_greater_equal *value); +int mptcp_rbs_value_greater_equal_print( + const struct mptcp_rbs_value_greater_equal *value, char *buffer); + +/* + * AND operator value + */ +struct mptcp_rbs_value_and { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_and *self); + s32 (*execute)(struct mptcp_rbs_value_and *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_bool *left_operand; + struct mptcp_rbs_value_bool *right_operand; +}; + +struct mptcp_rbs_value_and *mptcp_rbs_value_and_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand); +void mptcp_rbs_value_and_free(struct mptcp_rbs_value_and *self); +s32 mptcp_rbs_value_and_execute(struct mptcp_rbs_value_and *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_and *mptcp_rbs_value_and_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_and *value); +int mptcp_rbs_value_and_print(const struct mptcp_rbs_value_and *value, + char *buffer); + +/* + * OR operator value + */ +struct mptcp_rbs_value_or { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_or *self); + s32 (*execute)(struct mptcp_rbs_value_or *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_bool *left_operand; + struct mptcp_rbs_value_bool *right_operand; +}; + +struct mptcp_rbs_value_or *mptcp_rbs_value_or_new( + struct mptcp_rbs_value_bool *left_operand, + struct mptcp_rbs_value_bool *right_operand); +void mptcp_rbs_value_or_free(struct mptcp_rbs_value_or *self); +s32 mptcp_rbs_value_or_execute(struct mptcp_rbs_value_or *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_or *mptcp_rbs_value_or_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_or *value); +int mptcp_rbs_value_or_print(const struct mptcp_rbs_value_or *value, + char *buffer); + +/* + * + operator value + */ +struct mptcp_rbs_value_add { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_add *self); + s64 (*execute)(struct mptcp_rbs_value_add *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_add *mptcp_rbs_value_add_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_add_free(struct mptcp_rbs_value_add *self); +s64 mptcp_rbs_value_add_execute(struct mptcp_rbs_value_add *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_add *mptcp_rbs_value_add_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_add *value); +int mptcp_rbs_value_add_print(const struct mptcp_rbs_value_add *value, + char *buffer); + +/* + * - operator value + */ +struct mptcp_rbs_value_subtract { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_subtract *self); + s64 (*execute)(struct mptcp_rbs_value_subtract *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_subtract_free(struct mptcp_rbs_value_subtract *self); +s64 mptcp_rbs_value_subtract_execute(struct mptcp_rbs_value_subtract *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_subtract *mptcp_rbs_value_subtract_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subtract *value); +int mptcp_rbs_value_subtract_print(const struct mptcp_rbs_value_subtract *value, + char *buffer); + +/* + * * operator value + */ +struct mptcp_rbs_value_multiply { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_multiply *self); + s64 (*execute)(struct mptcp_rbs_value_multiply *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_multiply_free(struct mptcp_rbs_value_multiply *self); +s64 mptcp_rbs_value_multiply_execute(struct mptcp_rbs_value_multiply *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_multiply *mptcp_rbs_value_multiply_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_multiply *value); +int mptcp_rbs_value_multiply_print(const struct mptcp_rbs_value_multiply *value, + char *buffer); + +/* + * / operator value + */ +struct mptcp_rbs_value_divide { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_divide *self); + s64 (*execute)(struct mptcp_rbs_value_divide *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_divide_free(struct mptcp_rbs_value_divide *self); +s64 mptcp_rbs_value_divide_execute(struct mptcp_rbs_value_divide *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_divide *mptcp_rbs_value_divide_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_divide *value); +int mptcp_rbs_value_divide_print(const struct mptcp_rbs_value_divide *value, + char *buffer); + +/* + * % operator value + */ +struct mptcp_rbs_value_remainder { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_remainder *self); + s64 (*execute)(struct mptcp_rbs_value_remainder *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_int *left_operand; + struct mptcp_rbs_value_int *right_operand; +}; + +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_new( + struct mptcp_rbs_value_int *left_operand, + struct mptcp_rbs_value_int *right_operand); +void mptcp_rbs_value_remainder_free(struct mptcp_rbs_value_remainder *self); +s64 mptcp_rbs_value_remainder_execute(struct mptcp_rbs_value_remainder *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_remainder *mptcp_rbs_value_remainder_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_remainder *value); +int mptcp_rbs_value_remainder_print( + const struct mptcp_rbs_value_remainder *value, char *buffer); + +/* + * == null operator value + */ +struct mptcp_rbs_value_is_null { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_is_null *self); + s32 (*execute)(struct mptcp_rbs_value_is_null *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value *operand; +}; + +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_new( + struct mptcp_rbs_value *operand); +void mptcp_rbs_value_is_null_free(struct mptcp_rbs_value_is_null *self); +s32 mptcp_rbs_value_is_null_execute(struct mptcp_rbs_value_is_null *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_is_null *mptcp_rbs_value_is_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_null *value); +int mptcp_rbs_value_is_null_print(const struct mptcp_rbs_value_is_null *value, + char *buffer); + +/* + * != null operator value + */ +struct mptcp_rbs_value_is_not_null { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_is_not_null *self); + s32 (*execute)(struct mptcp_rbs_value_is_not_null *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value *operand; +}; + +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_new( + struct mptcp_rbs_value *operand); +void mptcp_rbs_value_is_not_null_free(struct mptcp_rbs_value_is_not_null *self); +s32 mptcp_rbs_value_is_not_null_execute( + struct mptcp_rbs_value_is_not_null *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_is_not_null *mptcp_rbs_value_is_not_null_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_is_not_null *value); +int mptcp_rbs_value_is_not_null_print( + const struct mptcp_rbs_value_is_not_null *value, char *buffer); + +/* + * R1-6 integer values + */ +struct mptcp_rbs_value_reg { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_reg *self); + s64 (*execute)(struct mptcp_rbs_value_reg *self, + struct mptcp_rbs_eval_ctx *ctx); + int reg_number; +}; + +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_new(int reg_number); +void mptcp_rbs_value_reg_free(struct mptcp_rbs_value_reg *self); +s64 mptcp_rbs_value_reg_execute(struct mptcp_rbs_value_reg *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_reg *mptcp_rbs_value_reg_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_reg *value); +int mptcp_rbs_value_reg_print(const struct mptcp_rbs_value_reg *value, + char *buffer); + +/* + * Q sockbuffer list value + */ +struct mptcp_rbs_value_q { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_q *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_q *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +struct mptcp_rbs_value_q *mptcp_rbs_value_q_new(void); +void mptcp_rbs_value_q_free(struct mptcp_rbs_value_q *self); +struct sk_buff *mptcp_rbs_value_q_execute(struct mptcp_rbs_value_q *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_q *mptcp_rbs_value_q_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_q *value); +int mptcp_rbs_value_q_print(const struct mptcp_rbs_value_q *value, + char *buffer); + +/* + * QU sockbuffer list value + */ +struct mptcp_rbs_value_qu { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_qu *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_qu *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_new(void); +void mptcp_rbs_value_qu_free(struct mptcp_rbs_value_qu *self); +struct sk_buff *mptcp_rbs_value_qu_execute(struct mptcp_rbs_value_qu *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_qu *value); +int mptcp_rbs_value_qu_print(const struct mptcp_rbs_value_qu *value, + char *buffer); + +/* + * RQ sockbuffer list value + */ +struct mptcp_rbs_value_rq { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_rq *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_rq *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_new(void); +void mptcp_rbs_value_rq_free(struct mptcp_rbs_value_rq *self); +struct sk_buff *mptcp_rbs_value_rq_execute(struct mptcp_rbs_value_rq *self, + struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_rq *value); +int mptcp_rbs_value_rq_print(const struct mptcp_rbs_value_rq *value, + char *buffer); + +/* + * SUBFLOWS subflow list value + */ +struct mptcp_rbs_value_subflows { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_subflows *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_subflows *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); +}; + +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_new(void); +void mptcp_rbs_value_subflows_free(struct mptcp_rbs_value_subflows *self); +struct tcp_sock *mptcp_rbs_value_subflows_execute( + struct mptcp_rbs_value_subflows *self, struct mptcp_rbs_eval_ctx *ctx, + void **prev, bool *is_null); +struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_subflows *value); +int mptcp_rbs_value_subflows_print(const struct mptcp_rbs_value_subflows *value, + char *buffer); + +/* + * CURRENT_TIME_MS integer value + */ +struct mptcp_rbs_value_current_time_ms { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_current_time_ms *self); + s64 (*execute)(struct mptcp_rbs_value_current_time_ms *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_new( + void); +void mptcp_rbs_value_current_time_ms_free( + struct mptcp_rbs_value_current_time_ms *self); +s64 mptcp_rbs_value_current_time_ms_execute( + struct mptcp_rbs_value_current_time_ms *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_current_time_ms *mptcp_rbs_value_current_time_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_current_time_ms *value); +int mptcp_rbs_value_current_time_ms_print( + const struct mptcp_rbs_value_current_time_ms *value, char *buffer); + +/* + * RANDOM integer value + */ +struct mptcp_rbs_value_random { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_random *self); + s64 (*execute)(struct mptcp_rbs_value_random *self, + struct mptcp_rbs_eval_ctx *ctx); +}; + +struct mptcp_rbs_value_random *mptcp_rbs_value_random_new(void); +void mptcp_rbs_value_random_free(struct mptcp_rbs_value_random *self); +s64 mptcp_rbs_value_random_execute(struct mptcp_rbs_value_random *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_random *mptcp_rbs_value_random_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_random *value); +int mptcp_rbs_value_random_print(const struct mptcp_rbs_value_random *value, + char *buffer); + +/* + * .RTT integer value + */ +struct mptcp_rbs_value_sbf_rtt { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_rtt *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_rtt *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_rtt_free(struct mptcp_rbs_value_sbf_rtt *self); +s64 mptcp_rbs_value_sbf_rtt_execute(struct mptcp_rbs_value_sbf_rtt *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt *value); +int mptcp_rbs_value_sbf_rtt_print(const struct mptcp_rbs_value_sbf_rtt *value, + char *buffer); + +/* + * .RTT_MS integer value + */ +struct mptcp_rbs_value_sbf_rtt_ms { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_rtt_ms *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_rtt_ms *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_rtt_ms_free(struct mptcp_rbs_value_sbf_rtt_ms *self); +s64 mptcp_rbs_value_sbf_rtt_ms_execute(struct mptcp_rbs_value_sbf_rtt_ms *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_ms *value); +int mptcp_rbs_value_sbf_rtt_ms_print(const struct mptcp_rbs_value_sbf_rtt_ms *value, + char *buffer); + +/* + * .RTT_VAR integer value + */ +struct mptcp_rbs_value_sbf_rtt_var { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_rtt_var *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_rtt_var *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_rtt_var_free(struct mptcp_rbs_value_sbf_rtt_var *self); +s64 mptcp_rbs_value_sbf_rtt_var_execute(struct mptcp_rbs_value_sbf_rtt_var *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_rtt_var *value); +int mptcp_rbs_value_sbf_rtt_var_print(const struct mptcp_rbs_value_sbf_rtt_var *value, + char *buffer); + + /* + * .USER integer value + */ +struct mptcp_rbs_value_sbf_user { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_user *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_user *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_user_free(struct mptcp_rbs_value_sbf_user *self); +s64 mptcp_rbs_value_sbf_user_execute(struct mptcp_rbs_value_sbf_user *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_user *value); +int mptcp_rbs_value_sbf_user_print(const struct mptcp_rbs_value_sbf_user *value, + char *buffer); + +/* + * .IS_BACKUP boolean value + */ +struct mptcp_rbs_value_sbf_is_backup { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_is_backup *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_is_backup *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_is_backup_free( + struct mptcp_rbs_value_sbf_is_backup *self); +s32 mptcp_rbs_value_sbf_is_backup_execute( + struct mptcp_rbs_value_sbf_is_backup *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_is_backup *mptcp_rbs_value_sbf_is_backup_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_is_backup *value); +int mptcp_rbs_value_sbf_is_backup_print( + const struct mptcp_rbs_value_sbf_is_backup *value, char *buffer); +/* + * .THROTTLED boolean value + */ +struct mptcp_rbs_value_sbf_throttled { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_throttled *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_throttled *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_throttled_free( + struct mptcp_rbs_value_sbf_throttled *self); +s32 mptcp_rbs_value_sbf_throttled_execute( + struct mptcp_rbs_value_sbf_throttled *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_throttled *mptcp_rbs_value_sbf_throttled_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_throttled *value); +int mptcp_rbs_value_sbf_throttled_print( + const struct mptcp_rbs_value_sbf_throttled *value, char *buffer); + +/* + * .CWND integer value + */ +struct mptcp_rbs_value_sbf_cwnd { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_cwnd *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_cwnd *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_cwnd_free(struct mptcp_rbs_value_sbf_cwnd *self); +s64 mptcp_rbs_value_sbf_cwnd_execute(struct mptcp_rbs_value_sbf_cwnd *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_cwnd *value); +int mptcp_rbs_value_sbf_cwnd_print(const struct mptcp_rbs_value_sbf_cwnd *value, + char *buffer); + +/* + * .QUEUED integer value + */ +struct mptcp_rbs_value_sbf_queued { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_queued *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_queued *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_queued_free(struct mptcp_rbs_value_sbf_queued *self); +s64 mptcp_rbs_value_sbf_queued_execute(struct mptcp_rbs_value_sbf_queued *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_queued *value); +int mptcp_rbs_value_sbf_queued_print(const struct mptcp_rbs_value_sbf_queued *value, + char *buffer); + +/* + * .SKBS_IN_FLIGHT integer value + */ +struct mptcp_rbs_value_sbf_skbs_in_flight { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_skbs_in_flight *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_skbs_in_flight *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_new(struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_skbs_in_flight_free( + struct mptcp_rbs_value_sbf_skbs_in_flight *self); +s64 mptcp_rbs_value_sbf_skbs_in_flight_execute( + struct mptcp_rbs_value_sbf_skbs_in_flight *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_skbs_in_flight *value); +int mptcp_rbs_value_sbf_skbs_in_flight_print( + const struct mptcp_rbs_value_sbf_skbs_in_flight *value, char *buffer); + +/* + * .LOST_SKBS integer value + */ +struct mptcp_rbs_value_sbf_lost_skbs { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_lost_skbs *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_lost_skbs *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_lost_skbs_free( + struct mptcp_rbs_value_sbf_lost_skbs *self); +s64 mptcp_rbs_value_sbf_lost_skbs_execute( + struct mptcp_rbs_value_sbf_lost_skbs *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_lost_skbs *mptcp_rbs_value_sbf_lost_skbs_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lost_skbs *value); +int mptcp_rbs_value_sbf_lost_skbs_print( + const struct mptcp_rbs_value_sbf_lost_skbs *value, char *buffer); + +/* + * .HAS_WINDOW_FOR boolean value + */ +struct mptcp_rbs_value_sbf_has_window_for { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_has_window_for *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_has_window_for *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_new(struct mptcp_rbs_value_sbf *sbf, + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_sbf_has_window_for_free( + struct mptcp_rbs_value_sbf_has_window_for *self); +s32 mptcp_rbs_value_sbf_has_window_for_execute( + struct mptcp_rbs_value_sbf_has_window_for *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_has_window_for *value); +int mptcp_rbs_value_sbf_has_window_for_print( + const struct mptcp_rbs_value_sbf_has_window_for *value, char *buffer); + +/* + * .ID integer value + */ +struct mptcp_rbs_value_sbf_id { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_id *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_id *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_id_free(struct mptcp_rbs_value_sbf_id *self); +s64 mptcp_rbs_value_sbf_id_execute(struct mptcp_rbs_value_sbf_id *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_id *value); +int mptcp_rbs_value_sbf_id_print(const struct mptcp_rbs_value_sbf_id *value, + char *buffer); + +/* + * .DELAY_IN integer value + */ +struct mptcp_rbs_value_sbf_delay_in { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_delay_in *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_delay_in *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_delay_in_free( + struct mptcp_rbs_value_sbf_delay_in *self); +s64 mptcp_rbs_value_sbf_delay_in_execute( + struct mptcp_rbs_value_sbf_delay_in *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_in *value); +int mptcp_rbs_value_sbf_delay_in_print( + const struct mptcp_rbs_value_sbf_delay_in *value, char *buffer); + +/* + * .DELAY_OUT integer value + */ +struct mptcp_rbs_value_sbf_delay_out { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_delay_out *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_delay_out *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_delay_out_free( + struct mptcp_rbs_value_sbf_delay_out *self); +s64 mptcp_rbs_value_sbf_delay_out_execute( + struct mptcp_rbs_value_sbf_delay_out *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_delay_out *mptcp_rbs_value_sbf_delay_out_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_delay_out *value); +int mptcp_rbs_value_sbf_delay_out_print( + const struct mptcp_rbs_value_sbf_delay_out *value, char *buffer); + +/* some helper */ + +void mptcp_rbs_sbf_delay_update(struct tcp_sock *tp, const struct sk_buff *skb); + +void mptcp_rbs_sbf_bw_ack_add(struct tcp_sock *tp, unsigned int bytes); + +void mptcp_rbs_sbf_bw_send_add(struct tcp_sock *tp, unsigned int bytes); + +/* + * .BW_OUT_SEND integer value + */ +struct mptcp_rbs_value_sbf_bw_out_send { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_bw_out_send *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_bw_out_send *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_bw_out_send_free( + struct mptcp_rbs_value_sbf_bw_out_send *self); +s64 mptcp_rbs_value_sbf_bw_out_send_execute( + struct mptcp_rbs_value_sbf_bw_out_send *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_bw_out_send *mptcp_rbs_value_sbf_bw_out_send_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_send *value); +int mptcp_rbs_value_sbf_bw_out_send_print( + const struct mptcp_rbs_value_sbf_bw_out_send *value, char *buffer); + +/* + * .BW_OUT_ACK integer value + */ +struct mptcp_rbs_value_sbf_bw_out_ack { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_bw_out_ack *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_bw_out_ack *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_bw_out_ack_free( + struct mptcp_rbs_value_sbf_bw_out_ack *self); +s64 mptcp_rbs_value_sbf_bw_out_ack_execute( + struct mptcp_rbs_value_sbf_bw_out_ack *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_bw_out_ack *mptcp_rbs_value_sbf_bw_out_ack_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_bw_out_ack *value); +int mptcp_rbs_value_sbf_bw_out_ack_print( + const struct mptcp_rbs_value_sbf_bw_out_ack *value, char *buffer); + +/* + * .SSTHRESH integer value + */ +struct mptcp_rbs_value_sbf_ssthresh { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_ssthresh *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_ssthresh *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_ssthresh_free( + struct mptcp_rbs_value_sbf_ssthresh *self); +s64 mptcp_rbs_value_sbf_ssthresh_execute( + struct mptcp_rbs_value_sbf_ssthresh *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_ssthresh *value); +int mptcp_rbs_value_sbf_ssthresh_print( + const struct mptcp_rbs_value_sbf_ssthresh *value, char *buffer); + +/* + * .LOSSY boolean value + */ +struct mptcp_rbs_value_sbf_lossy { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_lossy *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_lossy *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_new( + struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_sbf_lossy_free(struct mptcp_rbs_value_sbf_lossy *self); +s32 mptcp_rbs_value_sbf_lossy_execute(struct mptcp_rbs_value_sbf_lossy *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_lossy *value); +int mptcp_rbs_value_sbf_lossy_print( + const struct mptcp_rbs_value_sbf_lossy *value, char *buffer); + +/* + * .NEXT subflow value + */ +struct mptcp_rbs_value_sbf_list_next { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_next *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_next *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + void *prev; + /* + * The next 2 fields ensure that prev is correctly reset after the + * foreach loop finished + */ + u32 exec_count; + bool is_null; +}; + +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_new( + struct mptcp_rbs_value_sbf_list *list); +void mptcp_rbs_value_sbf_list_next_free( + struct mptcp_rbs_value_sbf_list_next *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_next_execute( + struct mptcp_rbs_value_sbf_list_next *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_next *mptcp_rbs_value_sbf_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_next *value); +int mptcp_rbs_value_sbf_list_next_print( + const struct mptcp_rbs_value_sbf_list_next *value, char *buffer); + +/* + * .EMPTY boolean value + */ +struct mptcp_rbs_value_sbf_list_empty { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_empty *self); + s32 (*execute)(struct mptcp_rbs_value_sbf_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; +}; + +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_new( + struct mptcp_rbs_value_sbf_list *list); +void mptcp_rbs_value_sbf_list_empty_free( + struct mptcp_rbs_value_sbf_list_empty *self); +s32 mptcp_rbs_value_sbf_list_empty_execute( + struct mptcp_rbs_value_sbf_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_empty *mptcp_rbs_value_sbf_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_empty *value); +int mptcp_rbs_value_sbf_list_empty_print( + const struct mptcp_rbs_value_sbf_list_empty *value, char *buffer); + +/* + * .FILTER subflow list value + */ +struct mptcp_rbs_value_sbf_list_filter { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_filter *self); + struct tcp_sock *(*execute)( + struct mptcp_rbs_value_sbf_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_bool *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_new( + void); +void mptcp_rbs_value_sbf_list_filter_free( + struct mptcp_rbs_value_sbf_list_filter *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_execute( + struct mptcp_rbs_value_sbf_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null); +struct mptcp_rbs_value_sbf_list_filter *mptcp_rbs_value_sbf_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter *value); +int mptcp_rbs_value_sbf_list_filter_print( + const struct mptcp_rbs_value_sbf_list_filter *value, char *buffer); + +/* + * Special value holding the actual subflow for FILTER subflow list value + */ +struct mptcp_rbs_value_sbf_list_filter_sbf { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_filter_sbf *self); + struct tcp_sock *(*execute)( + struct mptcp_rbs_value_sbf_list_filter_sbf *self, + struct mptcp_rbs_eval_ctx *ctx); + struct tcp_sock **cur; +}; + +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_new(struct tcp_sock **cur); +void mptcp_rbs_value_sbf_list_filter_sbf_free( + struct mptcp_rbs_value_sbf_list_filter_sbf *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_filter_sbf_execute( + struct mptcp_rbs_value_sbf_list_filter_sbf *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_filter_sbf *value); +int mptcp_rbs_value_sbf_list_filter_sbf_print( + const struct mptcp_rbs_value_sbf_list_filter_sbf *value, char *buffer); + +/* + * .MAX subflow value + */ +struct mptcp_rbs_value_sbf_list_max { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_max *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_max *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_new(void); +void mptcp_rbs_value_sbf_list_max_free( + struct mptcp_rbs_value_sbf_list_max *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_max_execute( + struct mptcp_rbs_value_sbf_list_max *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_max *value); +int mptcp_rbs_value_sbf_list_max_print( + const struct mptcp_rbs_value_sbf_list_max *value, char *buffer); + +/* + * .MIN subflow value + */ +struct mptcp_rbs_value_sbf_list_min { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_min *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_min *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_new(void); +void mptcp_rbs_value_sbf_list_min_free( + struct mptcp_rbs_value_sbf_list_min *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_min_execute( + struct mptcp_rbs_value_sbf_list_min *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_min *value); +int mptcp_rbs_value_sbf_list_min_print( + const struct mptcp_rbs_value_sbf_list_min *value, char *buffer); + +/* + * .GET subflow value + */ +struct mptcp_rbs_value_sbf_list_get { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_get *self); + struct tcp_sock *(*execute)(struct mptcp_rbs_value_sbf_list_get *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *index; +}; + +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_new( + struct mptcp_rbs_value_sbf_list *list, struct mptcp_rbs_value_int *index); +void mptcp_rbs_value_sbf_list_get_free( + struct mptcp_rbs_value_sbf_list_get *self); +struct tcp_sock *mptcp_rbs_value_sbf_list_get_execute( + struct mptcp_rbs_value_sbf_list_get *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_get *value); +int mptcp_rbs_value_sbf_list_get_print( + const struct mptcp_rbs_value_sbf_list_get *value, char *buffer); + +/* + * .COUNT integer value + */ +struct mptcp_rbs_value_sbf_list_count { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_count *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; +}; + +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_new( + struct mptcp_rbs_value_sbf_list *list); +void mptcp_rbs_value_sbf_list_count_free( + struct mptcp_rbs_value_sbf_list_count *self); +s64 mptcp_rbs_value_sbf_list_count_execute( + struct mptcp_rbs_value_sbf_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_count *mptcp_rbs_value_sbf_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_count *value); +int mptcp_rbs_value_sbf_list_count_print( + const struct mptcp_rbs_value_sbf_list_count *value, char *buffer); + +/* + * .SUM integer value + */ +struct mptcp_rbs_value_sbf_list_sum { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_sbf_list_sum *self); + s64 (*execute)(struct mptcp_rbs_value_sbf_list_sum *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_sbf_list *list; + struct mptcp_rbs_value_int *cond; + struct tcp_sock *cur; +}; + +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_new(void); +void mptcp_rbs_value_sbf_list_sum_free( + struct mptcp_rbs_value_sbf_list_sum *self); +s64 mptcp_rbs_value_sbf_list_sum_execute( + struct mptcp_rbs_value_sbf_list_sum *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_sbf_list_sum *value); +int mptcp_rbs_value_sbf_list_sum_print( + const struct mptcp_rbs_value_sbf_list_sum *value, char *buffer); + +/* + * .NEXT sockbuffer value + */ +struct mptcp_rbs_value_skb_list_next { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_next *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_next *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list *list; + void *prev; + /* + * The next 2 fields ensure that prev is correctly reset after the + * foreach loop finished + */ + u32 exec_count; + bool is_null; +}; + +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_next_free( + struct mptcp_rbs_value_skb_list_next *self); +struct sk_buff *mptcp_rbs_value_skb_list_next_execute( + struct mptcp_rbs_value_skb_list_next *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_next *mptcp_rbs_value_skb_list_next_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_next *value); +int mptcp_rbs_value_skb_list_next_print( + const struct mptcp_rbs_value_skb_list_next *value, char *buffer); + +/* + * .SENT_ON boolean value + */ +struct mptcp_rbs_value_skb_sent_on { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_sent_on *self); + s32 (*execute)(struct mptcp_rbs_value_skb_sent_on *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_new( + struct mptcp_rbs_value_skb *skb, struct mptcp_rbs_value_sbf *sbf); +void mptcp_rbs_value_skb_sent_on_free(struct mptcp_rbs_value_skb_sent_on *self); +s32 mptcp_rbs_value_skb_sent_on_execute( + struct mptcp_rbs_value_skb_sent_on *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on *value); +int mptcp_rbs_value_skb_sent_on_print( + const struct mptcp_rbs_value_skb_sent_on *value, char *buffer); + +/* + * .SENT_ON_ALL boolean value + */ +struct mptcp_rbs_value_skb_sent_on_all { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_sent_on_all *self); + s32 (*execute)(struct mptcp_rbs_value_skb_sent_on_all *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_sent_on_all_free( + struct mptcp_rbs_value_skb_sent_on_all *self); +s32 mptcp_rbs_value_skb_sent_on_all_execute( + struct mptcp_rbs_value_skb_sent_on_all *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_sent_on_all *mptcp_rbs_value_skb_sent_on_all_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_sent_on_all *value); +int mptcp_rbs_value_skb_sent_on_all_print( + const struct mptcp_rbs_value_skb_sent_on_all *value, char *buffer); + +/* + * .USER integer value + */ +struct mptcp_rbs_value_skb_user { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_user *self); + s64 (*execute)(struct mptcp_rbs_value_skb_user *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_user_free(struct mptcp_rbs_value_skb_user *self); +s64 mptcp_rbs_value_skb_user_execute(struct mptcp_rbs_value_skb_user *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_user *value); +int mptcp_rbs_value_skb_user_print(const struct mptcp_rbs_value_skb_user *value, + char *buffer); + +/* + * .SEQ integer value + */ +struct mptcp_rbs_value_skb_seq { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_seq *self); + s64 (*execute)(struct mptcp_rbs_value_skb_seq *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_seq_free(struct mptcp_rbs_value_skb_seq *self); +s64 mptcp_rbs_value_skb_seq_execute(struct mptcp_rbs_value_skb_seq *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_seq *value); +int mptcp_rbs_value_skb_seq_print(const struct mptcp_rbs_value_skb_seq *value, + char *buffer); + +/* + * .PSH boolean value + */ +struct mptcp_rbs_value_skb_psh { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_psh *self); + s32 (*execute)(struct mptcp_rbs_value_skb_psh *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; + struct mptcp_rbs_value_sbf *sbf; +}; + +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_psh_free(struct mptcp_rbs_value_skb_psh *self); +s32 mptcp_rbs_value_skb_psh_execute( + struct mptcp_rbs_value_skb_psh *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_psh *value); +int mptcp_rbs_value_skb_psh_print( + const struct mptcp_rbs_value_skb_psh *value, char *buffer); + +/* + * .LENGTH integer value + */ +struct mptcp_rbs_value_skb_length { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_length *self); + s64 (*execute)(struct mptcp_rbs_value_skb_length *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb *skb; +}; + +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_new( + struct mptcp_rbs_value_skb *skb); +void mptcp_rbs_value_skb_length_free(struct mptcp_rbs_value_skb_length *self); +s64 mptcp_rbs_value_skb_length_execute(struct mptcp_rbs_value_skb_length *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_length *value); +int mptcp_rbs_value_skb_length_print(const struct mptcp_rbs_value_skb_length *value, + char *buffer); + +/* + * .EMPTY boolean value + */ +struct mptcp_rbs_value_skb_list_empty { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_empty *self); + s32 (*execute)(struct mptcp_rbs_value_skb_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_empty_free( + struct mptcp_rbs_value_skb_list_empty *self); +s32 mptcp_rbs_value_skb_list_empty_execute( + struct mptcp_rbs_value_skb_list_empty *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_empty *mptcp_rbs_value_skb_list_empty_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_empty *value); +int mptcp_rbs_value_skb_list_empty_print( + const struct mptcp_rbs_value_skb_list_empty *value, char *buffer); + +/* + * .POP() sockbuffer value + */ +struct mptcp_rbs_value_skb_list_pop { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_pop *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_pop *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_pop_free( + struct mptcp_rbs_value_skb_list_pop *self); +struct sk_buff *mptcp_rbs_value_skb_list_pop_execute( + struct mptcp_rbs_value_skb_list_pop *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_pop *value); +int mptcp_rbs_value_skb_list_pop_print( + const struct mptcp_rbs_value_skb_list_pop *value, char *buffer); + +/* + * .FILTER sockbuffer list value + */ + +struct mptcp_rbs_value_skb_list_filter_progress { + struct sk_buff *cur; + bool reinject; +}; + +struct mptcp_rbs_value_skb_list_filter { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_filter *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, + bool *is_null); + enum mptcp_rbs_value_kind underlying_queue_kind; + struct mptcp_rbs_value_skb_list *list; + struct mptcp_rbs_value_bool *cond; + struct mptcp_rbs_value_skb_list_filter_progress progress; +}; + +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_new( + void); +void mptcp_rbs_value_skb_list_filter_free( + struct mptcp_rbs_value_skb_list_filter *self); +struct sk_buff *mptcp_rbs_value_skb_list_filter_execute( + struct mptcp_rbs_value_skb_list_filter *self, + struct mptcp_rbs_eval_ctx *ctx, void **prev, bool *is_null); +struct mptcp_rbs_value_skb_list_filter *mptcp_rbs_value_skb_list_filter_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter *value); +int mptcp_rbs_value_skb_list_filter_print( + const struct mptcp_rbs_value_skb_list_filter *value, char *buffer); + +/* + * Special value holding the actual sockbuffer for FILTER sockbuffer list value + */ +struct mptcp_rbs_value_skb_list_filter_skb { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_filter_skb *self); + struct sk_buff *(*execute)( + struct mptcp_rbs_value_skb_list_filter_skb *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list_filter_progress *progress; +}; + +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_new( + struct mptcp_rbs_value_skb_list_filter_progress *progress); +void mptcp_rbs_value_skb_list_filter_skb_free( + struct mptcp_rbs_value_skb_list_filter_skb *self); +struct sk_buff *mptcp_rbs_value_skb_list_filter_skb_execute( + struct mptcp_rbs_value_skb_list_filter_skb *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_filter_skb *value); +int mptcp_rbs_value_skb_list_filter_skb_print( + const struct mptcp_rbs_value_skb_list_filter_skb *value, char *buffer); + +/* + * .COUNT integer value + */ +struct mptcp_rbs_value_skb_list_count { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_count *self); + s64 (*execute)(struct mptcp_rbs_value_skb_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_count_free( + struct mptcp_rbs_value_skb_list_count *self); +s64 mptcp_rbs_value_skb_list_count_execute( + struct mptcp_rbs_value_skb_list_count *self, + struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_count *mptcp_rbs_value_skb_list_count_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_count *value); +int mptcp_rbs_value_skb_list_count_print( + const struct mptcp_rbs_value_skb_list_count *value, char *buffer); + +/* + * .TOP sockbuffer value + */ +struct mptcp_rbs_value_skb_list_top { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_top *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_top *self, + struct mptcp_rbs_eval_ctx *ctx); + bool reinject; + struct mptcp_rbs_value_skb_list *list; +}; + +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_new( + struct mptcp_rbs_value_skb_list *list); +void mptcp_rbs_value_skb_list_top_free( + struct mptcp_rbs_value_skb_list_top *self); +struct sk_buff *mptcp_rbs_value_skb_list_top_execute( + struct mptcp_rbs_value_skb_list_top *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_top *value); +int mptcp_rbs_value_skb_list_top_print( + const struct mptcp_rbs_value_skb_list_top *value, char *buffer); + +/* + * .GET sockbuffer value + */ +struct mptcp_rbs_value_skb_list_get { + enum mptcp_rbs_value_kind kind; + void (*free)(struct mptcp_rbs_value_skb_list_get *self); + struct sk_buff *(*execute)(struct mptcp_rbs_value_skb_list_get *self, + struct mptcp_rbs_eval_ctx *ctx); + struct mptcp_rbs_value_skb_list *list; + struct mptcp_rbs_value_int *index; +}; + +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_new( + struct mptcp_rbs_value_skb_list *list, struct mptcp_rbs_value_int *index); +void mptcp_rbs_value_skb_list_get_free( + struct mptcp_rbs_value_skb_list_get *self); +struct sk_buff *mptcp_rbs_value_skb_list_get_execute( + struct mptcp_rbs_value_skb_list_get *self, struct mptcp_rbs_eval_ctx *ctx); +struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_clone( + struct mptcp_rbs_value_clone_ctx *ctx, + const struct mptcp_rbs_value_skb_list_get *value); +int mptcp_rbs_value_skb_list_get_print( + const struct mptcp_rbs_value_skb_list_get *value, char *buffer); + +/* + * Returns the returned type of a given value kind + */ +enum mptcp_rbs_type_kind mptcp_rbs_value_get_type( + enum mptcp_rbs_value_kind kind); + +#ifndef MPTCP_RBS_CLONE_USER_FUNC_DEFINED +#define MPTCP_RBS_CLONE_USER_FUNC_DEFINED +typedef struct mptcp_rbs_value *(*mptcp_rbs_value_clone_user_func)( + void *user_ctx, const struct mptcp_rbs_value *value); +#endif + +/* + * Creates a copy of a value and all its subvalues + * @value: The value to copy + * @user_ctx: User context for the user function or NULL + * @user_func: Function that is executed for each value or NULL. If this + * function returns a value other than NULL the current value is replaced with + * it instead of cloned + * Return: The new instance + */ +struct mptcp_rbs_value *mptcp_rbs_value_clone( + const struct mptcp_rbs_value *value, void *user_ctx, + mptcp_rbs_value_clone_user_func user_func); + +/* + * Writes a string representation of a value to the given buffer + * @value: The value + * @buffer: Pointer to the buffer where the string should be stored or NULL + * Return: Number of written characters + */ +int mptcp_rbs_value_print(const struct mptcp_rbs_value *value, char *buffer); + +/* some helper for bw calculation */ + +u64 mptcp_rbs_sbf_get_bw_send(struct mptcp_rbs_sbf_cb *sbf_cb); +u64 mptcp_rbs_sbf_get_bw_ack(struct mptcp_rbs_sbf_cb *sbf_cb); + +struct sk_buff *mptcp_rbs_next_in_queue(struct sk_buff_head *queue, + struct sk_buff *skb); + +#endif diff --git a/net/mptcp/mptcp_rbs_value_parser.h b/net/mptcp/mptcp_rbs_value_parser.h new file mode 100644 index 0000000000000..07852709f6514 --- /dev/null +++ b/net/mptcp/mptcp_rbs_value_parser.h @@ -0,0 +1,928 @@ +#ifndef _MPTCP_RBS_VALUE_PARSER_H +#define _MPTCP_RBS_VALUE_PARSER_H + +#include "mptcp_rbs_dynarray.h" +#include "mptcp_rbs_lexer.h" +#include "mptcp_rbs_value.h" +#include +#include + +/* + * Clones a string by allocating memory and copying the content + */ +static char *strclone(const char *str) +{ + int len; + char *result; + + if (!str) + return NULL; + + len = strlen(str); + result = kzalloc(len + 1, GFP_KERNEL); + memcpy(result, str, len); + return result; +} + +/* + * Type to manage variables + */ +struct var { + char *name; + int var_number; + enum mptcp_rbs_type_kind type; + union { + bool reinject; + enum mptcp_rbs_value_kind underlying_queue_kind; + }; +}; + +static struct var *var_new(char *name, int var_number, + enum mptcp_rbs_type_kind type, bool *reinject, + enum mptcp_rbs_value_kind *underlying_queue_kind) +{ + struct var *var; + + var = kzalloc(sizeof(struct var), GFP_KERNEL); + var->name = strclone(name); + var->var_number = var_number; + var->type = type; + if (reinject) + var->reinject = *reinject; + else if (underlying_queue_kind) + var->underlying_queue_kind = *underlying_queue_kind; + + return var; +} + +static void var_free(struct var *var) +{ + kfree(var->name); + kfree(var); +} + +/* + * Variable lists + */ + +DECL_DA(var_list, struct var *); + +#define INIT_VAR_LIST(list) INIT_DA(list) + +#define FREE_VAR_LIST(list) FREE_DA(list) + +#define ADD_VAR(list, var) ADD_DA_ITEM(list, var) + +#define FOREACH_VAR(list, var_, cmds) FOREACH_DA_ITEM(list, var_, cmds) + +/* + * Variable list stacks + */ + +DECL_DA(var_list_stack, struct var_list *); + +#define INIT_VAR_LIST_STACK(stack) INIT_DA(stack) + +#define FREE_VAR_LIST_STACK(stack) FREE_DA(stack) + +#define PUSH_VAR_LIST(stack, list) ADD_DA_ITEM(stack, list) + +#define POP_VAR_LIST(stack) DELETE_DA_ITEM(stack, GET_DA_LEN(stack) - 1) + +#define GET_VAR_LIST_STACK_TOP(stack) GET_DA_ITEM(stack, GET_DA_LEN(stack) - 1) + +#define FOREACH_STACK_VAR(stack, var, cmds) \ + do { \ + struct var_list *__list; \ + FOREACH_DA_ITEM_REV(stack, __list, \ + FOREACH_VAR(__list, var, cmds)); \ + } while (0) + +/* + * Type and functions to manage a stack of replacements. Replacements can be + * registered by values to replace identifiers in sub values with values. This + * is useful for values like FILTER(s => ...) + */ + +typedef struct mptcp_rbs_value *(*new_repl_value_func)(void *tag); + +struct repl { + char *name; + new_repl_value_func new_value; + void *tag; +}; + +/* + * Replacement stacks + */ + +DECL_DA(repl_stack, struct repl *); + +#define INIT_REPL_STACK(stack) INIT_DA(stack) + +#define FREE_REPL_STACK(stack) FREE_DA(stack) + +#define PUSH_REPL(stack, repl) ADD_DA_ITEM(stack, repl) + +#define POP_REPL(stack) DELETE_DA_ITEM(stack, GET_DA_LEN(stack) - 1) + +#define FOREACH_REPL(stack, var, cmds) FOREACH_DA_ITEM_REV(stack, var, cmds) + +struct parse_ctx { + char const *str; + int position; + int line; + int line_position; + struct repl_stack repls; + struct var_list_stack var_stack; + /* Index of the next free variable */ + int var_index; + enum mptcp_rbs_value_kind underlying_queue_kind; +}; + +static bool expect_token(struct parse_ctx *ctx, enum mptcp_rbs_token_kind kind, + struct mptcp_rbs_token *token); +static bool lookahead_token(struct parse_ctx *ctx, + struct mptcp_rbs_token *token); +static struct mptcp_rbs_value_bool *parse_value_bool(struct parse_ctx *ctx); +static struct mptcp_rbs_value_int *parse_value_int(struct parse_ctx *ctx); +static struct mptcp_rbs_value_string *parse_value_string(struct parse_ctx *ctx); +static struct mptcp_rbs_value_sbf *parse_value_sbf(struct parse_ctx *ctx); +static struct mptcp_rbs_value_skb *parse_value_skb(struct parse_ctx *ctx); + +/* + * Q sockbuffer list value + */ + +static struct mptcp_rbs_value_q *mptcp_rbs_value_q_parse(struct parse_ctx *ctx) +{ + ctx->underlying_queue_kind = VALUE_KIND_Q; + return mptcp_rbs_value_q_new(); +} + +/* + * QU sockbuffer list value + */ + +static struct mptcp_rbs_value_qu *mptcp_rbs_value_qu_parse( + struct parse_ctx *ctx) +{ + ctx->underlying_queue_kind = VALUE_KIND_QU; + return mptcp_rbs_value_qu_new(); +} + +/* + * RQ sockbuffer list value + */ + +static struct mptcp_rbs_value_rq *mptcp_rbs_value_rq_parse( + struct parse_ctx *ctx) +{ + ctx->underlying_queue_kind = VALUE_KIND_RQ; + return mptcp_rbs_value_rq_new(); +} + +/* + * .RTT integer value + */ + +static struct mptcp_rbs_value_sbf_rtt *mptcp_rbs_value_sbf_rtt_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_rtt_new(sbf); +} + +/* + * .RTT_MS integer value + */ + +static struct mptcp_rbs_value_sbf_rtt_ms *mptcp_rbs_value_sbf_rtt_ms_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_rtt_ms_new(sbf); +} + +/* + * .RTT_VAR integer value + */ + +static struct mptcp_rbs_value_sbf_rtt_var *mptcp_rbs_value_sbf_rtt_var_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_rtt_var_new(sbf); +} + +/* + * .USER integer value + */ + +static struct mptcp_rbs_value_sbf_user *mptcp_rbs_value_sbf_user_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_user_new(sbf); +} + +/* + * .IS_BACKUP boolean value + */ + +static struct mptcp_rbs_value_sbf_is_backup * +mptcp_rbs_value_sbf_is_backup_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_is_backup_new(sbf); +} + +/* + * .CWND integer value + */ + +static struct mptcp_rbs_value_sbf_cwnd *mptcp_rbs_value_sbf_cwnd_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_cwnd_new(sbf); +} + +/* + * .QUEUED integer value + */ + +static struct mptcp_rbs_value_sbf_queued *mptcp_rbs_value_sbf_queued_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_queued_new(sbf); +} + +/* + * .SKBS_IN_FLIGHT integer value + */ + +static struct mptcp_rbs_value_sbf_skbs_in_flight * +mptcp_rbs_value_sbf_skbs_in_flight_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_skbs_in_flight_new(sbf); +} + +/* + * .LOST_SKBS integer value + */ + +static struct mptcp_rbs_value_sbf_lost_skbs * +mptcp_rbs_value_sbf_lost_skbs_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_lost_skbs_new(sbf); +} + +/* + * .HAS_WINDOW_FOR boolean value + */ + +static struct mptcp_rbs_value_sbf_has_window_for * +mptcp_rbs_value_sbf_has_window_for_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_skb *skb; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Sockbuffer value must follow */ + skb = parse_value_skb(ctx); + if (!skb) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + skb->free(skb); + return NULL; + } + + return mptcp_rbs_value_sbf_has_window_for_new(sbf, skb); +} + +/* + * .ID integer value + */ + +static struct mptcp_rbs_value_sbf_id *mptcp_rbs_value_sbf_id_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_id_new(sbf); +} + +/* + * .DELAY_IN integer value + */ + +static struct mptcp_rbs_value_sbf_delay_in *mptcp_rbs_value_sbf_delay_in_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_delay_in_new(sbf); +} + +/* + * .DELAY_OUT integer value + */ + +static struct mptcp_rbs_value_sbf_delay_out * +mptcp_rbs_value_sbf_delay_out_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_delay_out_new(sbf); +} + +/* + * .BW_OUT_SEND integer value + */ + +static struct mptcp_rbs_value_sbf_bw_out_send * +mptcp_rbs_value_sbf_bw_out_send_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_bw_out_send_new(sbf); +} + +/* + * .BW_OUT_ACK integer value + */ + +static struct mptcp_rbs_value_sbf_bw_out_ack * +mptcp_rbs_value_sbf_bw_out_ack_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_bw_out_ack_new(sbf); +} + +/* + * .SSTHRESH integer value + */ + +static struct mptcp_rbs_value_sbf_ssthresh *mptcp_rbs_value_sbf_ssthresh_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_ssthresh_new(sbf); +} + +/* + * .THROTTLED boolean value + */ + +static struct mptcp_rbs_value_sbf_throttled * +mptcp_rbs_value_sbf_throttled_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_throttled_new(sbf); +} + +/* + * .LOSSY boolean value + */ + +static struct mptcp_rbs_value_sbf_lossy *mptcp_rbs_value_sbf_lossy_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf *sbf) +{ + return mptcp_rbs_value_sbf_lossy_new(sbf); +} + +/* + * SUBFLOWS subflow list value + */ + +static struct mptcp_rbs_value_subflows *mptcp_rbs_value_subflows_parse( + struct parse_ctx *ctx) +{ + return mptcp_rbs_value_subflows_new(); +} + +/* + * CURRENT_TIME_MS integer value + */ + +static struct mptcp_rbs_value_current_time_ms * +mptcp_rbs_value_current_time_ms_parse(struct parse_ctx *ctx) +{ + return mptcp_rbs_value_current_time_ms_new(); +} + +/* + * RANDOM integer value + */ + +static struct mptcp_rbs_value_random *mptcp_rbs_value_random_parse( + struct parse_ctx *ctx) +{ + return mptcp_rbs_value_random_new(); +} + +/* + * .EMPTY boolean value + */ + +static struct mptcp_rbs_value_sbf_list_empty * +mptcp_rbs_value_sbf_list_empty_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf_list *list) +{ + return mptcp_rbs_value_sbf_list_empty_new(list); +} + +/* + * .FILTER subflow list value + */ + +static struct mptcp_rbs_value_sbf_list_filter * +mptcp_rbs_value_sbf_list_filter_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_filter *value; + struct repl repl; + struct mptcp_rbs_value_bool *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_filter_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Boolean value must follow */ + cond = parse_value_bool(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_filter_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * Special value holding the actual subflow for FILTER subflow list value + */ + +static struct mptcp_rbs_value_sbf_list_filter_sbf * +mptcp_rbs_value_sbf_list_filter_sbf_parse(struct parse_ctx *ctx) +{ + /* This should never be called because this value cannot be parsed */ + BUG_ON(true); + return NULL; +} + +/* + * .MAX subflow value + */ + +static struct mptcp_rbs_value_sbf_list_max *mptcp_rbs_value_sbf_list_max_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_max *value; + struct repl repl; + struct mptcp_rbs_value_int *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_max_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Integer value must follow */ + cond = parse_value_int(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_max_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * .MIN subflow value + */ + +static struct mptcp_rbs_value_sbf_list_min *mptcp_rbs_value_sbf_list_min_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_min *value; + struct repl repl; + struct mptcp_rbs_value_int *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_min_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Integer value must follow */ + cond = parse_value_int(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_min_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * .GET subflow value + */ + +static struct mptcp_rbs_value_sbf_list_get *mptcp_rbs_value_sbf_list_get_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_int *index; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Integer value must follow */ + index = parse_value_int(ctx); + if (!index) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + index->free(index); + return NULL; + } + + return mptcp_rbs_value_sbf_list_get_new(list, index); +} + +/* + * .COUNT integer value + */ + +static struct mptcp_rbs_value_sbf_list_count * +mptcp_rbs_value_sbf_list_count_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_sbf_list *list) +{ + return mptcp_rbs_value_sbf_list_count_new(list); +} + +/* + * .SUM integer value + */ + +static struct mptcp_rbs_value_sbf_list_sum *mptcp_rbs_value_sbf_list_sum_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_sbf_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_sbf_list_sum *value; + struct repl repl; + struct mptcp_rbs_value_int *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_sbf_list_sum_new(); + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_sbf_list_filter_sbf_new; + repl.tag = &value->cur; + PUSH_REPL(&ctx->repls, &repl); + + /* Integer value must follow */ + cond = parse_value_int(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_sbf_list_sum_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + + return value; +} + +/* + * .SENT_ON boolean value + */ + +static struct mptcp_rbs_value_skb_sent_on *mptcp_rbs_value_skb_sent_on_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_sbf *sbf; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Subflow value must follow */ + sbf = parse_value_sbf(ctx); + if (!sbf) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + sbf->free(sbf); + return NULL; + } + + return mptcp_rbs_value_skb_sent_on_new(skb, sbf); +} + +/* + * .SENT_ON_ALL boolean value + */ + +static struct mptcp_rbs_value_skb_sent_on_all * +mptcp_rbs_value_skb_sent_on_all_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_sent_on_all_new(skb); +} + +/* + * .USER integer value + */ + +static struct mptcp_rbs_value_skb_user *mptcp_rbs_value_skb_user_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_user_new(skb); +} + +/* + * .SEQ integer value + */ + +static struct mptcp_rbs_value_skb_seq *mptcp_rbs_value_skb_seq_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_seq_new(skb); +} + +/* + * .PSH integer value + */ + +static struct mptcp_rbs_value_skb_psh *mptcp_rbs_value_skb_psh_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_psh_new(skb); +} + +/* + * .LENGTH integer value + */ + +static struct mptcp_rbs_value_skb_length *mptcp_rbs_value_skb_length_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb *skb) +{ + return mptcp_rbs_value_skb_length_new(skb); +} + +/* + * .EMPTY boolean value + */ + +static struct mptcp_rbs_value_skb_list_empty * +mptcp_rbs_value_skb_list_empty_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb_list *list) +{ + return mptcp_rbs_value_skb_list_empty_new(list); +} + +/* + * .POP() sockbuffer value + */ + +static struct mptcp_rbs_value_skb_list_pop *mptcp_rbs_value_skb_list_pop_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_token token; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) + return NULL; + + return mptcp_rbs_value_skb_list_pop_new(list); +} + +/* + * .FILTER sockbuffer list value + */ + +static struct mptcp_rbs_value_skb_list_filter * +mptcp_rbs_value_skb_list_filter_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_token ident_token; + struct mptcp_rbs_value_skb_list_filter *value; + struct repl repl; + struct mptcp_rbs_value_bool *cond; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Identifier must follow */ + if (!expect_token(ctx, TOKEN_KIND_IDENT, &ident_token)) + return NULL; + + /* => must follow */ + if (!expect_token(ctx, TOKEN_KIND_ASSIGN, &token)) + return NULL; + if (!expect_token(ctx, TOKEN_KIND_GREATER, &token)) + return NULL; + + /* Install replacement */ + value = mptcp_rbs_value_skb_list_filter_new(); + value->progress.reinject = list->underlying_queue_kind == VALUE_KIND_RQ; + repl.name = ident_token.string; + repl.new_value = + (new_repl_value_func) mptcp_rbs_value_skb_list_filter_skb_new; + repl.tag = &value->progress; + PUSH_REPL(&ctx->repls, &repl); + + /* Boolean value must follow */ + cond = parse_value_bool(ctx); + POP_REPL(&ctx->repls); + if (!cond) { + mptcp_rbs_value_skb_list_filter_free(value); + return NULL; + } + value->cond = cond; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + value->free(value); + return NULL; + } + value->list = list; + value->underlying_queue_kind = list->underlying_queue_kind; + + return value; +} + +/* + * Special value holding the actual sockbuffer for FILTER sockbuffer list value + */ + +static struct mptcp_rbs_value_skb_list_filter_skb * +mptcp_rbs_value_skb_list_filter_skb_parse(struct parse_ctx *ctx) +{ + /* This should never be called because this value cannot be parsed */ + BUG_ON(true); + return NULL; +} + +/* + * .COUNT integer value + */ + +static struct mptcp_rbs_value_skb_list_count * +mptcp_rbs_value_skb_list_count_parse(struct parse_ctx *ctx, + struct mptcp_rbs_value_skb_list *list) +{ + return mptcp_rbs_value_skb_list_count_new(list); +} + +/* + * .TOP sockbuffer value + */ + +static struct mptcp_rbs_value_skb_list_top *mptcp_rbs_value_skb_list_top_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb_list *list) +{ + return mptcp_rbs_value_skb_list_top_new(list); +} + +/* + * .GET sockbuffer value + */ + +static struct mptcp_rbs_value_skb_list_get *mptcp_rbs_value_skb_list_get_parse( + struct parse_ctx *ctx, struct mptcp_rbs_value_skb_list *list) +{ + struct mptcp_rbs_token token; + struct mptcp_rbs_value_int *index; + + /* ( must follow */ + if (!expect_token(ctx, TOKEN_KIND_OPEN_BRACKET, &token)) + return NULL; + + /* Integer value must follow */ + index = parse_value_int(ctx); + if (!index) + return NULL; + + /* ) must follow */ + if (!expect_token(ctx, TOKEN_KIND_CLOSE_BRACKET, &token)) { + index->free(index); + return NULL; + } + + return mptcp_rbs_value_skb_list_get_new(list, index); +} + +#endif diff --git a/net/mptcp/mptcp_rbs_var.c b/net/mptcp/mptcp_rbs_var.c new file mode 100644 index 0000000000000..785cb83ab1601 --- /dev/null +++ b/net/mptcp/mptcp_rbs_var.c @@ -0,0 +1,29 @@ +#include "mptcp_rbs_var.h" +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic error "-Wswitch" +void mptcp_rbs_var_free(struct mptcp_rbs_var *self) +{ + if (self->is_lazy) + return; + + switch (self->type) { + case TYPE_KIND_NULL: + case TYPE_KIND_BOOL: + case TYPE_KIND_INT: + case TYPE_KIND_STRING: + case TYPE_KIND_SBF: + case TYPE_KIND_SKB: + break; + case TYPE_KIND_SBFLIST: { + kfree(self->sbf_list_value); + break; + } + case TYPE_KIND_SKBLIST: { + kfree(self->skb_list_value); + break; + } + } +} +#pragma GCC diagnostic pop diff --git a/net/mptcp/mptcp_rbs_var.h b/net/mptcp/mptcp_rbs_var.h new file mode 100644 index 0000000000000..e96ff58adcb02 --- /dev/null +++ b/net/mptcp/mptcp_rbs_var.h @@ -0,0 +1,31 @@ +#ifndef _MPTCP_RBS_VAR_H +#define _MPTCP_RBS_VAR_H + +#include "mptcp_rbs_type.h" +#include "mptcp_rbs_value.h" +#include + +#define MPTCP_RBS_MAX_VAR_COUNT 24 + +/* Struct for a single variable */ +struct mptcp_rbs_var { + enum mptcp_rbs_type_kind type; + bool is_lazy; + union { + s32 bool_value; + s64 int_value; + char *string_value; + struct tcp_sock *sbf_value; + struct tcp_sock **sbf_list_value; + struct sk_buff *skb_value; + struct sk_buff **skb_list_value; + struct mptcp_rbs_value *lazy_value; + }; +}; + +/* + * Releases a variable struct + */ +void mptcp_rbs_var_free(struct mptcp_rbs_var *self); + +#endif diff --git a/net/mptcp/mptcp_rr.c b/net/mptcp/mptcp_rr.c index 8910ba9e6052e..e36871c9eb186 100644 --- a/net/mptcp/mptcp_rr.c +++ b/net/mptcp/mptcp_rr.c @@ -2,6 +2,7 @@ #include #include +#include static unsigned char num_segments __read_mostly = 1; module_param(num_segments, byte, 0644); @@ -171,11 +172,17 @@ static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int * return skb; } +u64 total_rr_time_skb; +u64 total_rr_time_no_skb; +u64 total_rr_count_skb; +u64 total_rr_count_no_skb; + static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, int *reinject, struct sock **subsk, unsigned int *limit) { + u64 begin_time = __native_read_tsc(); const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct sock *sk_it, *choose_sk = NULL; struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject); @@ -185,13 +192,23 @@ static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, /* As we set it, we have to reset it as well. */ *limit = 0; - if (!skb) + if (!skb) { + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; return NULL; + } if (*reinject) { *subsk = rr_get_available_subflow(meta_sk, skb, false); - if (!*subsk) + if (!*subsk) { + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; + return NULL; + } + + total_rr_time_skb += __native_read_tsc() - begin_time; + total_rr_count_skb++; return skb; } @@ -252,8 +269,12 @@ static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, struct tcp_sock *choose_tp = tcp_sk(choose_sk); struct rrsched_priv *rsp = rrsched_get_priv(choose_tp); - if (!mptcp_rr_is_available(choose_sk, skb, false, true)) + if (!mptcp_rr_is_available(choose_sk, skb, false, true)) { + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; + return NULL; + } *subsk = choose_sk; mss_now = tcp_current_mss(*subsk); @@ -264,9 +285,15 @@ static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, else rsp->quota++; + total_rr_time_skb += __native_read_tsc() - begin_time; + total_rr_count_skb++; + return skb; } + total_rr_time_no_skb += __native_read_tsc() - begin_time; + total_rr_count_no_skb++; + return NULL; } diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c index 2e1d08a6e930f..42ecbef333653 100644 --- a/net/mptcp/mptcp_sched.c +++ b/net/mptcp/mptcp_sched.c @@ -2,10 +2,14 @@ #include #include +#include +#include static DEFINE_SPINLOCK(mptcp_sched_list_lock); static LIST_HEAD(mptcp_sched_list); +static LIST_HEAD(mptcp_sched_select_list); + struct defsched_priv { u32 last_rbuf_opti; }; @@ -384,11 +388,17 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) return skb; } +u64 total_default_time_skb; +u64 total_default_time_no_skb; +u64 total_default_count_skb; +u64 total_default_count_no_skb; + static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, int *reinject, struct sock **subsk, unsigned int *limit) { + u64 begin_time = __native_read_tsc(); struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject); unsigned int mss_now; struct tcp_sock *subtp; @@ -398,12 +408,18 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, /* As we set it, we have to reset it as well. */ *limit = 0; - if (!skb) + if (!skb) { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; return NULL; + } *subsk = get_available_subflow(meta_sk, skb, false); - if (!*subsk) + if (!*subsk) { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; return NULL; + } subtp = tcp_sk(*subsk); mss_now = tcp_current_mss(*subsk); @@ -412,13 +428,21 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, skb = mptcp_rcv_buf_optimization(*subsk, 1); if (skb) *reinject = -1; - else + else { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; + return NULL; + } } /* No splitting required, as we will only send one single segment */ - if (skb->len <= mss_now) + if (skb->len <= mss_now) { + total_default_time_skb += __native_read_tsc() - begin_time; + total_default_count_skb++; + return skb; + } /* The following is similar to tcp_mss_split_point, but * we do not care about nagle, because we will anyways @@ -432,8 +456,12 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, if (!gso_max_segs) /* No gso supported on the subflow's NIC */ gso_max_segs = 1; max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); - if (!max_segs) + if (!max_segs) { + total_default_time_no_skb += __native_read_tsc() - begin_time; + total_default_count_no_skb++; + return NULL; + } max_len = mss_now * max_segs; window = tcp_wnd_end(subtp) - subtp->write_seq; @@ -446,6 +474,9 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, /* Or, take the window */ *limit = needed; + total_default_time_skb += __native_read_tsc() - begin_time; + total_default_count_skb++; + return skb; } @@ -464,7 +495,7 @@ struct mptcp_sched_ops mptcp_sched_default = { .owner = THIS_MODULE, }; -static struct mptcp_sched_ops *mptcp_sched_find(const char *name) +struct mptcp_sched_ops *mptcp_sched_find(const char *name) { struct mptcp_sched_ops *e; @@ -545,9 +576,92 @@ int mptcp_set_default_scheduler(const char *name) return ret; } -void mptcp_init_scheduler(struct mptcp_cb *mpcb) +int mptcp_set_default_scheduler_for_tuple(const char *name, __be32 dstip, + __be16 sport, unsigned long till_time_s) { struct mptcp_sched_ops *sched; + int ret = -ENOENT; + + spin_lock(&mptcp_sched_list_lock); + sched = mptcp_sched_find(name); + mptcp_debug("afr: found scheduler %p for %s\n", sched, name); +#ifdef CONFIG_MODULES + if (!sched && capable(CAP_NET_ADMIN)) { + spin_unlock(&mptcp_sched_list_lock); + + request_module("mptcp_%s", name); + spin_lock(&mptcp_sched_list_lock); + sched = mptcp_sched_find(name); + } +#endif + mptcp_debug("afr: found scheduler %p for %s after module\n", sched, name); + if (sched) { + struct mptcp_sched_select *sched_select = kzalloc(sizeof(struct mptcp_sched_select), GFP_ATOMIC); + sched_select->dstip = dstip; + sched_select->sport = sport; + sched_select->till_time_s = till_time_s; + sched_select->sched_ops = sched; + + list_add(&sched_select->list, &mptcp_sched_select_list); + ret = 0; + } else { + pr_info("%s is not available\n", name); + } + spin_unlock(&mptcp_sched_list_lock); + + return ret; +} + +struct mptcp_sched_ops *mptcp_sched_find_for_tuple(__be32 dstip, __be16 sport_end) +{ + struct mptcp_sched_select *e; + struct mptcp_sched_select *n; + struct timespec ts; + __be16 sport = ntohs(sport_end); + + getnstimeofday(&ts); + + mptcp_debug("afr: searching for ip %i.%i.%i.%i and port %i in list at jiffy %llu\n", dstip & 0x000000FF, + (dstip & 0x0000FF00)>>8, + (dstip & 0x00FF0000)>>16, + (dstip & 0xFF000000)>>24, + sport, ts.tv_sec); + list_for_each_entry_safe(e, n, &mptcp_sched_select_list, list) { + mptcp_debug("afr: comparing with ip %i.%i.%i.%i and port %i in list with sched %p\n", e->dstip & 0x000000FF, + (e->dstip & 0x0000FF00)>>8, + (e->dstip & 0x00FF0000)>>16, + (e->dstip & 0xFF000000)>>24, + e->sport, e->sched_ops); + if (e->till_time_s < ts.tv_sec) { + mptcp_debug("afr: removing selection scheduler\n"); + list_del(e); + } + + + + if (e->dstip == dstip && e->sport == sport) + return e->sched_ops; + } + + return NULL; +} + +void mptcp_init_scheduler(struct mptcp_cb *mpcb) +{ + struct mptcp_sched_ops *sched = NULL; + + /* afr tests to set scheduler specific for port */ + struct inet_sock *isk_tmp = inet_sk(mpcb->master_sk); + + /* local port and remote ip should allow to specify scheduler */ + mptcp_debug("afr new scheduler for %u:%i\n", isk_tmp->inet_saddr, isk_tmp->inet_dport); + sched = mptcp_sched_find_for_tuple(isk_tmp->inet_saddr, isk_tmp->inet_dport); + mptcp_debug("afr found %p\n", sched); + + if(sched) { + mpcb->sched_ops = sched; + return; + } rcu_read_lock(); list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f1acb0f60dc35..9bfe551ca62bd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -575,6 +575,8 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); + //printk("afr: htb_enqueue skb %p for sch %p with class %p and q.qlen %u\n", skb, sch, cl, sch->q.qlen); + if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { @@ -847,6 +849,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, } skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); + //printk("afr: htb_dequeue_tree found skb in cl->un.leaf.q %p with qlen %u\n", skb, cl->un.leaf.q->q.qlen); if (likely(skb != NULL)) break; @@ -885,6 +888,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); + //printk("afr: htb_dequeue found skb %p early\n", skb); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -921,6 +925,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) m |= 1 << prio; skb = htb_dequeue_tree(q, prio, level); + //printk("afr: htb_dequeue found skb %p late\n", skb); if (likely(skb != NULL)) goto ok; } @@ -973,13 +978,18 @@ static void htb_reset(struct Qdisc *sch) struct htb_class *cl; unsigned int i; + //printk("afr: htb_reset for sch %p with qlen %u\n", sch, sch->q.qlen); + for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { - if (cl->un.leaf.q) + if (cl->un.leaf.q) { + //printk("afr: htb_reset for sch %p in loop reset class %p with q %u\n", sch, cl, cl->un.leaf.q->q.qlen); + qdisc_reset(cl->un.leaf.q); + } INIT_LIST_HEAD(&cl->un.leaf.drop_list); } cl->prio_activity = 0; @@ -1025,6 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; + //printk("afr: htb_init for qdisc %p\n", sch); err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); if (err < 0) return err; @@ -1232,6 +1243,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { + //printk("afr: htb_destroy class %p for sched %p\n", cl, sch); if (!cl->level) { WARN_ON(!cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); @@ -1248,6 +1260,7 @@ static void htb_destroy(struct Qdisc *sch) struct htb_class *cl; unsigned int i; + //printk("afr: htb_destroy sch %p\n", sch); cancel_work_sync(&q->work); qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below @@ -1267,6 +1280,7 @@ static void htb_destroy(struct Qdisc *sch) htb_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); + //printk("afr: htb_destroy sch %p with direct_qlen %u and qlen %u\n", sch, q->direct_qlen, q->direct_queue.qlen); __skb_queue_purge(&q->direct_queue); } @@ -1277,7 +1291,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) unsigned int qlen; struct Qdisc *new_q = NULL; int last_child = 0; - + //printk("afr: htb_delete for sch %p\n", sch); /* TODO: why don't allow to delete subtree ? references ? does * tc subsys guarantee us that in htb_destroy it holds no class * refs so that we can remove children safely there ? @@ -1344,6 +1358,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct tc_htb_opt *hopt; u64 rate64, ceil64; + //printk("afr: htb_change class with id %u for sch %p\n", classid, sch); + /* extract all subattrs from opt attr */ if (!opt) goto failure; @@ -1479,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - + //printk("afr: htb_change to new rate64 %llu for sch %p\n", rate64, sch); ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);