From 98afe6dfdef0ef9df6e21cdd9d977bfc6147b0a9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 14 Apr 2014 16:58:55 -0400 Subject: [PATCH 01/24] user namespace: fix incorrect memory barriers commit e79323bd87808fdfbc68ce6c5371bd224d9672ee upstream. smp_read_barrier_depends() can be used if there is data dependency between the readers - i.e. if the read operation after the barrier uses address that was obtained from the read operation before the barrier. In this file, there is only control dependency, no data dependecy, so the use of smp_read_barrier_depends() is incorrect. The code could fail in the following way: * the cpu predicts that idx < entries is true and starts executing the body of the for loop * the cpu fetches map->extent[0].first and map->extent[0].count * the cpu fetches map->nr_extents * the cpu verifies that idx < extents is true, so it commits the instructions in the body of the for loop The problem is that in this scenario, the cpu read map->extent[0].first and map->nr_extents in the wrong order. We need a full read memory barrier to prevent it. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9064b919a406..9bea1d7dd21f 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -148,7 +148,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -172,7 +172,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -195,7 +195,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; @@ -611,9 +611,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write - * order and smp_read_barrier_depends() is guaranteed that we - * don't have crazy architectures returning stale data. - * + * order and smp_rmb() is guaranteed that we don't have crazy + * architectures returning stale data. */ mutex_lock(&id_map_mutex); From 96a405de04abddf003b59467d277595e3ba02449 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 14 Apr 2014 09:46:50 -0500 Subject: [PATCH 02/24] Char: ipmi_bt_sm, fix infinite loop commit a94cdd1f4d30f12904ab528152731fb13a812a16 upstream. In read_all_bytes, we do unsigned char i; ... bt->read_data[0] = BMC2HOST; bt->read_count = bt->read_data[0]; ... for (i = 1; i <= bt->read_count; i++) bt->read_data[i] = BMC2HOST; If bt->read_data[0] == bt->read_count == 255, we loop infinitely in the 'for' loop. Make 'i' an 'int' instead of 'char' to get rid of the overflow and finish the loop after 255 iterations every time. Signed-off-by: Jiri Slaby Reported-and-debugged-by: Rui Hui Dian Cc: Tomas Cech Cc: Corey Minyard Cc: Signed-off-by: Corey Minyard Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_bt_sm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index a22a7a502740..8156cafad11a 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -352,7 +352,7 @@ static inline void write_all_bytes(struct si_sm_data *bt) static inline int read_all_bytes(struct si_sm_data *bt) { - unsigned char i; + unsigned int i; /* * length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode. From d7a834fb18764f2451847eaafa27f1b70ca6a381 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 12 Mar 2014 14:44:33 -0400 Subject: [PATCH 03/24] x86: Adjust irq remapping quirk for older revisions of 5500/5520 chipsets commit 6f8a1b335fde143b7407036e2368d3cd6eb55674 upstream. Commit 03bbcb2e7e2 (iommu/vt-d: add quirk for broken interrupt remapping on 55XX chipsets) properly disables irq remapping on the 5500/5520 chipsets that don't correctly perform that feature. However, when I wrote it, I followed the errata sheet linked in that commit too closely, and explicitly tied the activation of the quirk to revision 0x13 of the chip, under the assumption that earlier revisions were not in the field. Recently a system was reported to be suffering from this remap bug and the quirk hadn't triggered, because the revision id register read at a lower value that 0x13, so the quirk test failed improperly. Given this, it seems only prudent to adjust this quirk so that any revision less than 0x13 has the quirk asserted. [ tglx: Removed the 0x12 comparison of pci id 3405 as this is covered by the <= 0x13 check already ] Signed-off-by: Neil Horman Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1394649873-14913-1-git-send-email-nhorman@tuxdriver.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/early-quirks.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 63bdb29b2549..4f7c82cdd0f5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -202,18 +202,15 @@ static void __init intel_remapping_check(int num, int slot, int func) revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); /* - * Revision 13 of all triggering devices id in this quirk have - * a problem draining interrupts when irq remapping is enabled, - * and should be flagged as broken. Additionally revisions 0x12 - * and 0x22 of device id 0x3405 has this problem. + * Revision <= 13 of all triggering devices id in this quirk + * have a problem draining interrupts when irq remapping is + * enabled, and should be flagged as broken. Additionally + * revision 0x22 of device id 0x3405 has this problem. */ - if (revision == 0x13) + if (revision <= 0x13) set_irq_remapping_broken(); - else if ((device == 0x3405) && - ((revision == 0x12) || - (revision == 0x22))) + else if (device == 0x3405 && revision == 0x22) set_irq_remapping_broken(); - } #define QFLAG_APPLY_ONCE 0x1 From 7a63f58358152cb2757cae6184bb39a25e3069d7 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 13 Mar 2014 15:30:39 +0000 Subject: [PATCH 04/24] staging: comedi: 8255_pci: initialize MITE data window commit 268d1e799663b795cba15c64f5d29407786a9dd4 upstream. According to National Instruments' PCI-DIO-96/PXI-6508/PCI-6503 User Manual, the physical address in PCI BAR1 needs to be OR'ed with 0x80 and written to register offset 0xC0 in the "MITE" registers (BAR0). Do so during initialization of the National Instruments boards handled by the "8255_pci" driver. The boards were previously handled by the "ni_pcidio" driver, where the initialization was done by `mite_setup()` in the "mite" module. The "mite" module comes with too much extra baggage for the "8255_pci" driver to deal with so use a local, simpler initialization function. Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/8255_pci.c | 34 +++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/staging/comedi/drivers/8255_pci.c b/drivers/staging/comedi/drivers/8255_pci.c index 05bcf0dffb8c..e54031c558e8 100644 --- a/drivers/staging/comedi/drivers/8255_pci.c +++ b/drivers/staging/comedi/drivers/8255_pci.c @@ -59,6 +59,7 @@ Configuration Options: not applicable, uses PCI auto config #include "../comedidev.h" #include "8255.h" +#include "mite.h" enum pci_8255_boardid { BOARD_ADLINK_PCI7224, @@ -82,6 +83,7 @@ struct pci_8255_boardinfo { const char *name; int dio_badr; int n_8255; + unsigned int has_mite:1; }; static const struct pci_8255_boardinfo pci_8255_boards[] = { @@ -129,36 +131,43 @@ static const struct pci_8255_boardinfo pci_8255_boards[] = { .name = "ni_pci-dio-96", .dio_badr = 1, .n_8255 = 4, + .has_mite = 1, }, [BOARD_NI_PCIDIO96B] = { .name = "ni_pci-dio-96b", .dio_badr = 1, .n_8255 = 4, + .has_mite = 1, }, [BOARD_NI_PXI6508] = { .name = "ni_pxi-6508", .dio_badr = 1, .n_8255 = 4, + .has_mite = 1, }, [BOARD_NI_PCI6503] = { .name = "ni_pci-6503", .dio_badr = 1, .n_8255 = 1, + .has_mite = 1, }, [BOARD_NI_PCI6503B] = { .name = "ni_pci-6503b", .dio_badr = 1, .n_8255 = 1, + .has_mite = 1, }, [BOARD_NI_PCI6503X] = { .name = "ni_pci-6503x", .dio_badr = 1, .n_8255 = 1, + .has_mite = 1, }, [BOARD_NI_PXI_6503] = { .name = "ni_pxi-6503", .dio_badr = 1, .n_8255 = 1, + .has_mite = 1, }, }; @@ -166,6 +175,25 @@ struct pci_8255_private { void __iomem *mmio_base; }; +static int pci_8255_mite_init(struct pci_dev *pcidev) +{ + void __iomem *mite_base; + u32 main_phys_addr; + + /* ioremap the MITE registers (BAR 0) temporarily */ + mite_base = pci_ioremap_bar(pcidev, 0); + if (!mite_base) + return -ENOMEM; + + /* set data window to main registers (BAR 1) */ + main_phys_addr = pci_resource_start(pcidev, 1); + writel(main_phys_addr | WENAB, mite_base + MITE_IODWBSR); + + /* finished with MITE registers */ + iounmap(mite_base); + return 0; +} + static int pci_8255_mmio(int dir, int port, int data, unsigned long iobase) { void __iomem *mmio_base = (void __iomem *)iobase; @@ -205,6 +233,12 @@ static int pci_8255_auto_attach(struct comedi_device *dev, if (ret) return ret; + if (board->has_mite) { + ret = pci_8255_mite_init(pcidev); + if (ret) + return ret; + } + is_mmio = (pci_resource_flags(pcidev, board->dio_badr) & IORESOURCE_MEM) != 0; if (is_mmio) { From f5b4cbf53a1da6f90b4ac8c5389d6c8528507e8b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 27 Feb 2014 12:30:51 +0100 Subject: [PATCH 05/24] tty: Set correct tty name in 'active' sysfs attribute commit 723abd87f6e536f1353c8f64f621520bc29523a3 upstream. The 'active' sysfs attribute should refer to the currently active tty devices the console is running on, not the currently active console. The console structure doesn't refer to any device in sysfs, only the tty the console is running on has. So we need to print out the tty names in 'active', not the console names. There is one special-case, which is tty0. If the console is directed to it, we want 'tty0' to show up in the file, so user-space knows that the messages get forwarded to the active VT. The ->device() callback would resolve tty0, though. Hence, treat it special and don't call into the VT layer to resolve it (plymouth is known to depend on it). Cc: Lennart Poettering Cc: Kay Sievers Cc: Jiri Slaby Signed-off-by: Werner Fink Signed-off-by: Hannes Reinecke Signed-off-by: David Herrmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 59d26ef538d8..3723c0ebb316 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1267,12 +1267,13 @@ static void pty_line_name(struct tty_driver *driver, int index, char *p) * * Locking: None */ -static void tty_line_name(struct tty_driver *driver, int index, char *p) +static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p) { if (driver->flags & TTY_DRIVER_UNNUMBERED_NODE) - strcpy(p, driver->name); + return sprintf(p, "%s", driver->name); else - sprintf(p, "%s%d", driver->name, index + driver->name_base); + return sprintf(p, "%s%d", driver->name, + index + driver->name_base); } /** @@ -3538,9 +3539,19 @@ static ssize_t show_cons_active(struct device *dev, if (i >= ARRAY_SIZE(cs)) break; } - while (i--) - count += sprintf(buf + count, "%s%d%c", - cs[i]->name, cs[i]->index, i ? ' ':'\n'); + while (i--) { + int index = cs[i]->index; + struct tty_driver *drv = cs[i]->device(cs[i], &index); + + /* don't resolve tty0 as some programs depend on it */ + if (drv && (cs[i]->index > 0 || drv->major != TTY_MAJOR)) + count += tty_line_name(drv, index, buf + count); + else + count += sprintf(buf + count, "%s%d", + cs[i]->name, cs[i]->index); + + count += sprintf(buf + count, "%c", i ? ' ':'\n'); + } console_unlock(); return count; From a15c36b2de489e2388819b85ccb933b747a18b70 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 2 Apr 2014 17:45:05 +0200 Subject: [PATCH 06/24] pid_namespace: pidns_get() should check task_active_pid_ns() != NULL commit d23082257d83e4bc89727d5aedee197e907999d2 upstream. pidns_get()->get_pid_ns() can hit ns == NULL. This task_struct can't go away, but task_active_pid_ns(task) is NULL if release_task(task) was already called. Alternatively we could change get_pid_ns(ns) to check ns != NULL, but it seems that other callers are fine. Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman ebiederm@xmission.com> Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/pid_namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6917e8edb48e..e32703d5e0ab 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -312,7 +312,9 @@ static void *pidns_get(struct task_struct *task) struct pid_namespace *ns; rcu_read_lock(); - ns = get_pid_ns(task_active_pid_ns(task)); + ns = task_active_pid_ns(task); + if (ns) + get_pid_ns(ns); rcu_read_unlock(); return ns; From 2dbecc73f3b8aa4a6d4c0465eb276ba08161e79d Mon Sep 17 00:00:00 2001 From: Claudio Takahasi Date: Thu, 25 Jul 2013 16:34:24 -0300 Subject: [PATCH 07/24] Bluetooth: Fix removing Long Term Key commit 5981a8821b774ada0be512fd9bad7c241e17657e upstream. This patch fixes authentication failure on LE link re-connection when BlueZ acts as slave (peripheral). LTK is removed from the internal list after its first use causing PIN or Key missing reply when re-connecting the link. The LE Long Term Key Request event indicates that the master is attempting to encrypt or re-encrypt the link. Pre-condition: BlueZ host paired and running as slave. How to reproduce(master): 1) Establish an ACL LE encrypted link 2) Disconnect the link 3) Try to re-establish the ACL LE encrypted link (fails) > HCI Event: LE Meta Event (0x3e) plen 19 LE Connection Complete (0x01) Status: Success (0x00) Handle: 64 Role: Slave (0x01) ... @ Device Connected: 00:02:72:DC:29:C9 (1) flags 0x0000 > HCI Event: LE Meta Event (0x3e) plen 13 LE Long Term Key Request (0x05) Handle: 64 Random number: 875be18439d9aa37 Encryption diversifier: 0x76ed < HCI Command: LE Long Term Key Request Reply (0x08|0x001a) plen 18 Handle: 64 Long term key: 2aa531db2fce9f00a0569c7d23d17409 > HCI Event: Command Complete (0x0e) plen 6 LE Long Term Key Request Reply (0x08|0x001a) ncmd 1 Status: Success (0x00) Handle: 64 > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 64 Encryption: Enabled with AES-CCM (0x01) ... @ Device Disconnected: 00:02:72:DC:29:C9 (1) reason 3 < HCI Command: LE Set Advertise Enable (0x08|0x000a) plen 1 Advertising: Enabled (0x01) > HCI Event: Command Complete (0x0e) plen 4 LE Set Advertise Enable (0x08|0x000a) ncmd 1 Status: Success (0x00) > HCI Event: LE Meta Event (0x3e) plen 19 LE Connection Complete (0x01) Status: Success (0x00) Handle: 64 Role: Slave (0x01) ... @ Device Connected: 00:02:72:DC:29:C9 (1) flags 0x0000 > HCI Event: LE Meta Event (0x3e) plen 13 LE Long Term Key Request (0x05) Handle: 64 Random number: 875be18439d9aa37 Encryption diversifier: 0x76ed < HCI Command: LE Long Term Key Request Neg Reply (0x08|0x001b) plen 2 Handle: 64 > HCI Event: Command Complete (0x0e) plen 6 LE Long Term Key Request Neg Reply (0x08|0x001b) ncmd 1 Status: Success (0x00) Handle: 64 > HCI Event: Disconnect Complete (0x05) plen 4 Status: Success (0x00) Handle: 64 Reason: Authentication Failure (0x05) @ Device Disconnected: 00:02:72:DC:29:C9 (1) reason 0 Signed-off-by: Claudio Takahasi Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dcaa6dbbab2c..cfca44f8d048 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3619,7 +3619,13 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); - if (ltk->type & HCI_SMP_STK) { + /* Ref. Bluetooth Core SPEC pages 1975 and 2004. STK is a + * temporary key used to encrypt a connection following + * pairing. It is used during the Encrypted Session Setup to + * distribute the keys. Later, security can be re-established + * using a distributed LTK. + */ + if (ltk->type == HCI_SMP_STK_SLAVE) { list_del(<k->list); kfree(ltk); } From 39305a6ac73ca6e8349773d032cdb5336d42196f Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Thu, 3 Apr 2014 14:46:22 -0700 Subject: [PATCH 08/24] backing_dev: fix hung task on sync commit 6ca738d60c563d5c6cf6253ee4b8e76fa77b2b9e upstream. bdi_wakeup_thread_delayed() used the mod_delayed_work() function to schedule work to writeback dirty inodes. The problem with this is that it can delay work that is scheduled for immediate execution, such as the work from sync_inodes_sb(). This can happen since mod_delayed_work() can now steal work from a work_queue. This fixes the problem by using queue_delayed_work() instead. This is a regression caused by commit 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue"). The reason that this causes a problem is that laptop-mode will change the delay, dirty_writeback_centisecs, to 60000 (10 minutes) by default. In the case that bdi_wakeup_thread_delayed() races with sync_inodes_sb(), sync will be stopped for 10 minutes and trigger a hung task. Even if dirty_writeback_centisecs is not long enough to cause a hung task, we still don't want to delay sync for that long. We fix the problem by using queue_delayed_work() when we want to schedule writeback sometime in future. This function doesn't change the timer if it is already armed. For the same reason, we also change bdi_writeback_workfn() to immediately queue the work again in the case that the work_list is not empty. The same problem can happen if the sync work is run on the rescue worker. [jack@suse.cz: update changelog, add comment, use bdi_wakeup_thread_delayed()] Signed-off-by: Derek Basehore Reviewed-by: Jan Kara Cc: Alexander Viro Reviewed-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: "Darrick J. Wong" Cc: Derek Basehore Cc: Kees Cook Cc: Benson Leung Cc: Sonny Rao Cc: Luigi Semenzato Cc: Jens Axboe Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 8 ++++---- mm/backing-dev.c | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e3ab1e4dc442..f79f641de4ff 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1038,10 +1038,10 @@ void bdi_writeback_workfn(struct work_struct *work) trace_writeback_pages_written(pages_written); } - if (!list_empty(&bdi->work_list) || - (wb_has_dirty_io(wb) && dirty_writeback_interval)) - queue_delayed_work(bdi_wq, &wb->dwork, - msecs_to_jiffies(dirty_writeback_interval * 10)); + if (!list_empty(&bdi->work_list)) + mod_delayed_work(bdi_wq, &wb->dwork, 0); + else if (wb_has_dirty_io(wb) && dirty_writeback_interval) + bdi_wakeup_thread_delayed(bdi); current->flags &= ~PF_SWAPWRITE; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 502517492258..95e437435788 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -287,13 +287,16 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) * Note, we wouldn't bother setting up the timer, but this function is on the * fast-path (used by '__mark_inode_dirty()'), so we save few context switches * by delaying the wake-up. + * + * We have to be careful not to postpone flush work if it is scheduled for + * earlier. Thus we use queue_delayed_work(). */ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) { unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); + queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); } /* From bf0972039ddc483a9cb79edae73076c635876568 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 Apr 2014 14:46:23 -0700 Subject: [PATCH 09/24] bdi: avoid oops on device removal commit 5acda9d12dcf1ad0d9a5a2a7c646de3472fa7555 upstream. After commit 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue") when device is removed while we are writing to it we crash in bdi_writeback_workfn() -> set_worker_desc() because bdi->dev is NULL. This can happen because even though bdi_unregister() cancels all pending flushing work, nothing really prevents new ones from being queued from balance_dirty_pages() or other places. Fix the problem by clearing BDI_registered bit in bdi_unregister() and checking it before scheduling of any flushing work. Fixes: 839a8e8660b6777e7fe4e80af1a048aebe2b5977 Reviewed-by: Tejun Heo Signed-off-by: Jan Kara Cc: Derek Basehore Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 23 ++++++++++++++++++----- include/linux/backing-dev.h | 2 +- mm/backing-dev.c | 13 +++++++++---- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f79f641de4ff..387213ac2608 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -87,16 +87,29 @@ static inline struct inode *wb_inode(struct list_head *head) #define CREATE_TRACE_POINTS #include +static void bdi_wakeup_thread(struct backing_dev_info *bdi) +{ + spin_lock_bh(&bdi->wb_lock); + if (test_bit(BDI_registered, &bdi->state)) + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + spin_unlock_bh(&bdi->wb_lock); +} + static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { trace_writeback_queue(bdi, work); spin_lock_bh(&bdi->wb_lock); + if (!test_bit(BDI_registered, &bdi->state)) { + if (work->done) + complete(work->done); + goto out_unlock; + } list_add_tail(&work->list, &bdi->work_list); - spin_unlock_bh(&bdi->wb_lock); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); +out_unlock: + spin_unlock_bh(&bdi->wb_lock); } static void @@ -112,7 +125,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { trace_writeback_nowork(bdi); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + bdi_wakeup_thread(bdi); return; } @@ -159,7 +172,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) * writeback as soon as there is no other work to do. */ trace_writeback_wake_background(bdi); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + bdi_wakeup_thread(bdi); } /* @@ -1016,7 +1029,7 @@ void bdi_writeback_workfn(struct work_struct *work) current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || - list_empty(&bdi->bdi_list))) { + !test_bit(BDI_registered, &bdi->state))) { /* * The normal path. Keep writing back @bdi until its * work_list is empty. Note that this path is also taken diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c3881553f7d1..4cfdbf28fc6a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -95,7 +95,7 @@ struct backing_dev_info { unsigned int max_ratio, max_prop_frac; struct bdi_writeback wb; /* default writeback info for this bdi */ - spinlock_t wb_lock; /* protects work_list */ + spinlock_t wb_lock; /* protects work_list & wb.dwork scheduling */ struct list_head work_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 95e437435788..eea1a9dfac38 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -296,7 +296,10 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) unsigned long timeout; timeout = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); + spin_lock_bh(&bdi->wb_lock); + if (test_bit(BDI_registered, &bdi->state)) + queue_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); + spin_unlock_bh(&bdi->wb_lock); } /* @@ -309,9 +312,6 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) spin_unlock_bh(&bdi_lock); synchronize_rcu_expedited(); - - /* bdi_list is now unused, clear it to mark @bdi dying */ - INIT_LIST_HEAD(&bdi->bdi_list); } int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -362,6 +362,11 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) */ bdi_remove_from_list(bdi); + /* Make sure nobody queues further work */ + spin_lock_bh(&bdi->wb_lock); + clear_bit(BDI_registered, &bdi->state); + spin_unlock_bh(&bdi->wb_lock); + /* * Drain work list and shutdown the delayed_work. At this point, * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi From 19fc37ed7b9e8cdde28597adb9714d6f863985b4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 5 Feb 2014 16:34:38 +0900 Subject: [PATCH 10/24] Btrfs: skip submitting barrier for missing device commit f88ba6a2a44ee98e8d59654463dc157bb6d13c43 upstream. I got an error on v3.13: BTRFS error (device sdf1) in write_all_supers:3378: errno=-5 IO failure (errors while submitting device barriers.) how to reproduce: > mkfs.btrfs -f -d raid1 /dev/sdf1 /dev/sdf2 > wipefs -a /dev/sdf2 > mount -o degraded /dev/sdf1 /mnt > btrfs balance start -f -sconvert=single -mconvert=single -dconvert=single /mnt The reason of the error is that barrier_all_devices() failed to submit barrier to the missing device. However it is clear that we cannot do anything on missing device, and also it is not necessary to care chunks on the missing device. This patch stops sending/waiting barrier if device is missing. Signed-off-by: Hidetoshi Seto Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8b60b660c8f..4354b9127713 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3161,6 +3161,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* send down all the barriers */ head = &info->fs_devices->devices; list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; if (!dev->bdev) { errors_send++; continue; @@ -3175,6 +3177,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* wait for all the barriers */ list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; if (!dev->bdev) { errors_wait++; continue; From cbe099ed98247f60f18a828edd1b0bd07560b460 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Wed, 19 Feb 2014 18:52:39 -0500 Subject: [PATCH 11/24] ext4: fix error return from ext4_ext_handle_uninitialized_extents() commit ce37c42919608e96ade3748fe23c3062a0a966c5 upstream. Commit 3779473246 breaks the return of error codes from ext4_ext_handle_uninitialized_extents() in ext4_ext_map_blocks(). A portion of the patch assigns that function's signed integer return value to an unsigned int. Consequently, negatively valued error codes are lost and can be treated as a bogus allocated block count. Signed-off-by: Eric Whitney Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a2b625e279db..141d0fce318d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4032,7 +4032,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent newex, *ex, *ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0; - int free_on_err = 0, err = 0, depth; + int free_on_err = 0, err = 0, depth, ret; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; @@ -4093,9 +4093,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (!ext4_ext_is_uninitialized(ex)) goto out; - allocated = ext4_ext_handle_uninitialized_extents( + ret = ext4_ext_handle_uninitialized_extents( handle, inode, map, path, flags, allocated, newblock); + if (ret < 0) + err = ret; + else + allocated = ret; goto out3; } } From 8459d9c808d91f2170e83ac45fef6225392d98db Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Thu, 13 Mar 2014 23:34:16 -0400 Subject: [PATCH 12/24] ext4: fix partial cluster handling for bigalloc file systems commit c06344939422bbd032ac967223a7863de57496b5 upstream. Commit 9cb00419fa, which enables hole punching for bigalloc file systems, exposed a bug introduced by commit 6ae06ff51e in an earlier release. When run on a bigalloc file system, xfstests generic/013, 068, 075, 083, 091, 100, 112, 127, 263, 269, and 270 fail with e2fsck errors or cause kernel error messages indicating that previously freed blocks are being freed again. The latter commit optimizes the selection of the starting extent in ext4_ext_rm_leaf() when hole punching by beginning with the extent supplied in the path argument rather than with the last extent in the leaf node (as is still done when truncating). However, the code in rm_leaf that initially sets partial_cluster to track cluster sharing on extent boundaries is only guaranteed to run if rm_leaf starts with the last node in the leaf. Consequently, partial_cluster is not correctly initialized when hole punching, and a cluster on the boundary of a punched region that should be retained may instead be deallocated. Signed-off-by: Eric Whitney Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 141d0fce318d..84d817b842a8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2511,6 +2511,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); + /* + * If we're starting with an extent other than the last one in the + * node, we need to see if it shares a cluster with the extent to + * the right (towards the end of the file). If its leftmost cluster + * is this extent's rightmost cluster and it is not cluster aligned, + * we'll mark it as a partial that is not to be deallocated. + */ + + if (ex != EXT_LAST_EXTENT(eh)) { + ext4_fsblk_t current_pblk, right_pblk; + long long current_cluster, right_cluster; + + current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; + current_cluster = (long long)EXT4_B2C(sbi, current_pblk); + right_pblk = ext4_ext_pblock(ex + 1); + right_cluster = (long long)EXT4_B2C(sbi, right_pblk); + if (current_cluster == right_cluster && + EXT4_PBLK_COFF(sbi, right_pblk)) + *partial_cluster = -right_cluster; + } + trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); while (ex >= EXT_FIRST_EXTENT(eh) && From 5734144cd1e407869063a82d9c741e1092c354db Mon Sep 17 00:00:00 2001 From: Kamlakant Patel Date: Mon, 6 Jan 2014 19:06:54 +0530 Subject: [PATCH 13/24] jffs2: Fix segmentation fault found in stress test commit 3367da5610c50e6b83f86d366d72b41b350b06a2 upstream. Creating a large file on a JFFS2 partition sometimes crashes with this call trace: [ 306.476000] CPU 13 Unable to handle kernel paging request at virtual address c0000000dfff8002, epc == ffffffffc03a80a8, ra == ffffffffc03a8044 [ 306.488000] Oops[#1]: [ 306.488000] Cpu 13 [ 306.492000] $ 0 : 0000000000000000 0000000000000000 0000000000008008 0000000000008007 [ 306.500000] $ 4 : c0000000dfff8002 000000000000009f c0000000e0007cde c0000000ee95fa58 [ 306.508000] $ 8 : 0000000000000001 0000000000008008 0000000000010000 ffffffffffff8002 [ 306.516000] $12 : 0000000000007fa9 000000000000ff0e 000000000000ff0f 80e55930aebb92bb [ 306.524000] $16 : c0000000e0000000 c0000000ee95fa5c c0000000efc80000 ffffffffc09edd70 [ 306.532000] $20 : ffffffffc2b60000 c0000000ee95fa58 0000000000000000 c0000000efc80000 [ 306.540000] $24 : 0000000000000000 0000000000000004 [ 306.548000] $28 : c0000000ee950000 c0000000ee95f738 0000000000000000 ffffffffc03a8044 [ 306.556000] Hi : 00000000000574a5 [ 306.560000] Lo : 6193b7a7e903d8c9 [ 306.564000] epc : ffffffffc03a80a8 jffs2_rtime_compress+0x98/0x198 [ 306.568000] Tainted: G W [ 306.572000] ra : ffffffffc03a8044 jffs2_rtime_compress+0x34/0x198 [ 306.580000] Status: 5000f8e3 KX SX UX KERNEL EXL IE [ 306.584000] Cause : 00800008 [ 306.588000] BadVA : c0000000dfff8002 [ 306.592000] PrId : 000c1100 (Netlogic XLP) [ 306.596000] Modules linked in: [ 306.596000] Process dd (pid: 170, threadinfo=c0000000ee950000, task=c0000000ee6e0858, tls=0000000000c47490) [ 306.608000] Stack : 7c547f377ddc7ee4 7ffc7f967f5d7fae 7f617f507fc37ff4 7e7d7f817f487f5f 7d8e7fec7ee87eb3 7e977ff27eec7f9e 7d677ec67f917f67 7f3d7e457f017ed7 7fd37f517f867eb2 7fed7fd17ca57e1d 7e5f7fe87f257f77 7fd77f0d7ede7fdb 7fba7fef7e197f99 7fde7fe07ee37eb5 7f5c7f8c7fc67f65 7f457fb87f847e93 7f737f3e7d137cd9 7f8e7e9c7fc47d25 7dbb7fac7fb67e52 7ff17f627da97f64 7f6b7df77ffa7ec5 80057ef17f357fb3 7f767fa27dfc7fd5 7fe37e8e7fd07e53 7e227fcf7efb7fa1 7f547e787fa87fcc 7fcb7fc57f5a7ffb 7fc07f6c7ea97e80 7e2d7ed17e587ee0 7fb17f9d7feb7f31 7f607e797e887faa 7f757fdd7c607ff3 7e877e657ef37fbd 7ec17fd67fe67ff7 7ff67f797ff87dc4 7eef7f3a7c337fa6 7fe57fc97ed87f4b 7ebe7f097f0b8003 7fe97e2a7d997cba 7f587f987f3c7fa9 ... [ 306.676000] Call Trace: [ 306.680000] [] jffs2_rtime_compress+0x98/0x198 [ 306.684000] [] jffs2_selected_compress+0x110/0x230 [ 306.692000] [] jffs2_compress+0x5c/0x388 [ 306.696000] [] jffs2_write_inode_range+0xd8/0x388 [ 306.704000] [] jffs2_write_end+0x16c/0x2d0 [ 306.708000] [] generic_file_buffered_write+0xf8/0x2b8 [ 306.716000] [] __generic_file_aio_write+0x1ac/0x350 [ 306.720000] [] generic_file_aio_write+0x80/0x168 [ 306.728000] [] do_sync_write+0x94/0xf8 [ 306.732000] [] vfs_write+0xa4/0x1a0 [ 306.736000] [] SyS_write+0x50/0x90 [ 306.744000] [] handle_sys+0x180/0x1a0 [ 306.748000] [ 306.748000] Code: 020b202d 0205282d 90a50000 <90840000> 14a40038 00000000 0060602d 0000282d 016c5823 [ 306.760000] ---[ end trace 79dd088435be02d0 ]--- Segmentation fault This crash is caused because the 'positions' is declared as an array of signed short. The value of position is in the range 0..65535, and will be converted to a negative number when the position is greater than 32767 and causes a corruption and crash. Changing the definition to 'unsigned short' fixes this issue Signed-off-by: Jayachandran C Signed-off-by: Kamlakant Patel Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/compr_rtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c index 16a5047903a6..406d9cc84ba8 100644 --- a/fs/jffs2/compr_rtime.c +++ b/fs/jffs2/compr_rtime.c @@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in, unsigned char *cpage_out, uint32_t *sourcelen, uint32_t *dstlen) { - short positions[256]; + unsigned short positions[256]; int outpos = 0; int pos=0; @@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in, unsigned char *cpage_out, uint32_t srclen, uint32_t destlen) { - short positions[256]; + unsigned short positions[256]; int outpos = 0; int pos=0; From 16668c2b1a12ae83427f78b2be165865d782df0b Mon Sep 17 00:00:00 2001 From: Ajesh Kunhipurayil Vijayan Date: Mon, 6 Jan 2014 19:06:55 +0530 Subject: [PATCH 14/24] jffs2: Fix crash due to truncation of csize commit 41bf1a24c1001f4d0d41a78e1ac575d2f14789d7 upstream. mounting JFFS2 partition sometimes crashes with this call trace: [ 1322.240000] Kernel bug detected[#1]: [ 1322.244000] Cpu 2 [ 1322.244000] $ 0 : 0000000000000000 0000000000000018 000000003ff00070 0000000000000001 [ 1322.252000] $ 4 : 0000000000000000 c0000000f3980150 0000000000000000 0000000000010000 [ 1322.260000] $ 8 : ffffffffc09cd5f8 0000000000000001 0000000000000088 c0000000ed300de8 [ 1322.268000] $12 : e5e19d9c5f613a45 ffffffffc046d464 0000000000000000 66227ba5ea67b74e [ 1322.276000] $16 : c0000000f1769c00 c0000000ed1e0200 c0000000f3980150 0000000000000000 [ 1322.284000] $20 : c0000000f3a80000 00000000fffffffc c0000000ed2cfbd8 c0000000f39818f0 [ 1322.292000] $24 : 0000000000000004 0000000000000000 [ 1322.300000] $28 : c0000000ed2c0000 c0000000ed2cfab8 0000000000010000 ffffffffc039c0b0 [ 1322.308000] Hi : 000000000000023c [ 1322.312000] Lo : 000000000003f802 [ 1322.316000] epc : ffffffffc039a9f8 check_tn_node+0x88/0x3b0 [ 1322.320000] Not tainted [ 1322.324000] ra : ffffffffc039c0b0 jffs2_do_read_inode_internal+0x1250/0x1e48 [ 1322.332000] Status: 5400f8e3 KX SX UX KERNEL EXL IE [ 1322.336000] Cause : 00800034 [ 1322.340000] PrId : 000c1004 (Netlogic XLP) [ 1322.344000] Modules linked in: [ 1322.348000] Process jffs2_gcd_mtd7 (pid: 264, threadinfo=c0000000ed2c0000, task=c0000000f0e68dd8, tls=0000000000000000) [ 1322.356000] Stack : c0000000f1769e30 c0000000ed010780 c0000000ed010780 c0000000ed300000 c0000000f1769c00 c0000000f3980150 c0000000f3a80000 00000000fffffffc c0000000ed2cfbd8 ffffffffc039c0b0 ffffffffc09c6340 0000000000001000 0000000000000dec ffffffffc016c9d8 c0000000f39805a0 c0000000f3980180 0000008600000000 0000000000000000 0000000000000000 0000000000000000 0001000000000dec c0000000f1769d98 c0000000ed2cfb18 0000000000010000 0000000000010000 0000000000000044 c0000000f3a80000 c0000000f1769c00 c0000000f3d207a8 c0000000f1769d98 c0000000f1769de0 ffffffffc076f9c0 0000000000000009 0000000000000000 0000000000000000 ffffffffc039cf90 0000000000000017 ffffffffc013fbdc 0000000000000001 000000010003e61c ... [ 1322.424000] Call Trace: [ 1322.428000] [] check_tn_node+0x88/0x3b0 [ 1322.432000] [] jffs2_do_read_inode_internal+0x1250/0x1e48 [ 1322.440000] [] jffs2_do_crccheck_inode+0x70/0xd0 [ 1322.448000] [] jffs2_garbage_collect_pass+0x160/0x870 [ 1322.452000] [] jffs2_garbage_collect_thread+0xdc/0x1f0 [ 1322.460000] [] kthread+0xb8/0xc0 [ 1322.464000] [] kernel_thread_helper+0x10/0x18 [ 1322.472000] [ 1322.472000] Code: 67bd0050 94a4002c 2c830001 <00038036> de050218 2403fffc 0080a82d 00431824 24630044 [ 1322.480000] ---[ end trace b052bb90e97dfbf5 ]--- The variable csize in structure jffs2_tmp_dnode_info is of type uint16_t, but it is used to hold the compressed data length(csize) which is declared as uint32_t. So, when the value of csize exceeds 16bits, it gets truncated when assigned to tn->csize. This is causing a kernel BUG. Changing the definition of csize in jffs2_tmp_dnode_info to uint32_t fixes the issue. Signed-off-by: Ajesh Kunhipurayil Vijayan Signed-off-by: Kamlakant Patel Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/nodelist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index e4619b00f7c5..fa35ff79ab35 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info uint32_t version; uint32_t data_crc; uint32_t partial_crc; - uint16_t csize; + uint32_t csize; uint16_t overlapped; }; From 3a196f46fc0d37392ba686df66b94a1edc0b8523 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 12 Feb 2014 12:44:56 -0800 Subject: [PATCH 15/24] jffs2: avoid soft-lockup in jffs2_reserve_space_gc() commit 13b546d96207c131eeae15dc7b26c6e7d0f1cad7 upstream. We triggered soft-lockup under stress test on 2.6.34 kernel. BUG: soft lockup - CPU#1 stuck for 60009ms! [lockf2.test:14488] ... [] (jffs2_do_reserve_space+0x420/0x440 [jffs2]) [] (jffs2_reserve_space_gc+0x34/0x78 [jffs2]) [] (jffs2_garbage_collect_dnode.isra.3+0x264/0x478 [jffs2]) [] (jffs2_garbage_collect_pass+0x9c0/0xe4c [jffs2]) [] (jffs2_reserve_space+0x104/0x2a8 [jffs2]) [] (jffs2_write_inode_range+0x5c/0x4d4 [jffs2]) [] (jffs2_write_end+0x198/0x2c0 [jffs2]) [] (generic_file_buffered_write+0x158/0x200) [] (__generic_file_aio_write+0x3a4/0x414) [] (generic_file_aio_write+0x5c/0xbc) [] (do_sync_write+0x98/0xd4) [] (vfs_write+0xa8/0x150) [] (sys_write+0x3c/0xc0)] Fix this by adding a cond_resched() in the while loop. [akpm@linux-foundation.org: don't initialize `ret'] Signed-off-by: Li Zefan Cc: David Woodhouse Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/nodemgmt.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 03310721712f..41789e6fa6fe 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -211,20 +211,25 @@ out: int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *len, uint32_t sumsize) { - int ret = -EAGAIN; + int ret; minsize = PAD(minsize); jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); - spin_lock(&c->erase_completion_lock); - while(ret == -EAGAIN) { + while (true) { + spin_lock(&c->erase_completion_lock); ret = jffs2_do_reserve_space(c, minsize, len, sumsize); if (ret) { jffs2_dbg(1, "%s(): looping, ret is %d\n", __func__, ret); } + spin_unlock(&c->erase_completion_lock); + + if (ret == -EAGAIN) + cond_resched(); + else + break; } - spin_unlock(&c->erase_completion_lock); if (!ret) ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); From cc8ece834baaac84cffafad31dc25c1f73ef6add Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 12 Feb 2014 12:44:57 -0800 Subject: [PATCH 16/24] jffs2: remove from wait queue after schedule() commit 3ead9578443b66ddb3d50ed4f53af8a0c0298ec5 upstream. @wait is a local variable, so if we don't remove it from the wait queue list, later wake_up() may end up accessing invalid memory. This was spotted by eyes. Signed-off-by: Li Zefan Cc: David Woodhouse Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/nodemgmt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 41789e6fa6fe..b6bd4affd9ad 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, spin_unlock(&c->erase_completion_lock); schedule(); + remove_wait_queue(&c->erase_wait, &wait); } else spin_unlock(&c->erase_completion_lock); } else if (ret) From 3297037a222f69637a961f49b72daa6a755299db Mon Sep 17 00:00:00 2001 From: oftedal Date: Fri, 18 Oct 2013 22:28:29 +0200 Subject: [PATCH 17/24] sparc: PCI: Fix incorrect address calculation of PCI Bridge windows on Simba-bridges [ Upstream commit 557fc5873ef178c4b3e1e36a42db547ecdc43f9b ] The SIMBA APB Bridges lacks the 'ranges' of-property describing the PCI I/O and memory areas located beneath the bridge. Faking this information has been performed by reading range registers in the APB bridge, and calculating the corresponding areas. In commit 01f94c4a6ced476ce69b895426fc29bfc48c69bd ("Fix sabre pci controllers with new probing scheme.") a bug was introduced into this calculation, causing the PCI memory areas to be calculated incorrectly: The shift size was set to be identical for I/O and MEM ranges, which is incorrect. This patch set the shift size of the MEM range back to the value used before 01f94c4a6ced476ce69b895426fc29bfc48c69bd. Signed-off-by: Kjetil Oftedal Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index baf4366e2d6a..906cbf0f8608 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -399,8 +399,8 @@ static void apb_fake_ranges(struct pci_dev *dev, apb_calc_first_last(map, &first, &last); res = bus->resource[1]; res->flags = IORESOURCE_MEM; - region.start = (first << 21); - region.end = (last << 21) + ((1 << 21) - 1); + region.start = (first << 29); + region.end = (last << 29) + ((1 << 29) - 1); pcibios_bus_to_resource(dev, res, ®ion); } From 667a65c16786d9a97c2b7f2b76945b8694755e32 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 16 Dec 2013 15:01:00 -0600 Subject: [PATCH 18/24] Revert "sparc64: Fix __copy_{to,from}_user_inatomic defines." [ Upstream commit 16932237f2978a2265662f8de4af743b1f55a209 ] This reverts commit 145e1c0023585e0e8f6df22316308ec61c5066b2. This commit broke the behavior of __copy_from_user_inatomic when it is only partially successful. Instead of returning the number of bytes not copied, it now returns 1. This translates to the wrong value being returned by iov_iter_copy_from_user_atomic. xfstests generic/246 and LTP writev01 both fail on btrfs and nfs because of this. Signed-off-by: Dave Kleikamp Cc: Hugh Dickins Cc: David S. Miller Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/uaccess_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index e562d3caee57..ad7e178337f1 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -262,8 +262,8 @@ extern unsigned long __must_check __clear_user(void __user *, unsigned long); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#define __copy_to_user_inatomic ___copy_to_user -#define __copy_from_user_inatomic ___copy_from_user +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user struct pt_regs; extern unsigned long compute_effective_address(struct pt_regs *, From 10538045303f58a17b1e59b8f9f0ebc7cf067d31 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 13 Feb 2014 13:57:44 -0500 Subject: [PATCH 19/24] sparc32: fix build failure for arch_jump_label_transform [ Upstream commit 4f6500fff5f7644a03c46728fd7ef0f62fa6940b ] In arch/sparc/Kernel/Makefile, we see: obj-$(CONFIG_SPARC64) += jump_label.o However, the Kconfig selects HAVE_ARCH_JUMP_LABEL unconditionally for all SPARC. This in turn leads to the following failure when doing allmodconfig coverage builds: kernel/built-in.o: In function `__jump_label_update': jump_label.c:(.text+0x8560c): undefined reference to `arch_jump_label_transform' kernel/built-in.o: In function `arch_jump_label_transform_static': (.text+0x85cf4): undefined reference to `arch_jump_label_transform' make: *** [vmlinux] Error 1 Change HAVE_ARCH_JUMP_LABEL to be conditional on SPARC64 so that it matches the Makefile. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 9ac9f1666339..2668b3142fa2 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,7 +25,7 @@ config SPARC select RTC_DRV_M48T59 select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG - select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL if SPARC64 select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION From 4c0d2e83fdc6743ba7d76ac2d2a311b2e2cbc1d7 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 14 Mar 2014 10:42:01 -0500 Subject: [PATCH 20/24] sparc64: don't treat 64-bit syscall return codes as 32-bit [ Upstream commit 1535bd8adbdedd60a0ee62e28fd5225d66434371 ] When checking a system call return code for an error, linux_sparc_syscall was sign-extending the lower 32-bit value and comparing it to -ERESTART_RESTARTBLOCK. lseek can return valid return codes whose lower 32-bits alone would indicate a failure (such as 4G-1). Use the whole 64-bit value to check for errors. Only the 32-bit path should sign extend the lower 32-bit value. Signed-off-by: Dave Kleikamp Acked-by: Bob Picco Acked-by: Allen Pais Cc: David S. Miller Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/syscalls.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 73ec8a798d95..c79c687fbe1e 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -189,7 +189,8 @@ linux_sparc_syscall32: mov %i0, %l5 ! IEU1 5: call %l7 ! CTI Group brk forced srl %i5, 0, %o5 ! IEU1 - ba,a,pt %xcc, 3f + ba,pt %xcc, 3f + sra %o0, 0, %o0 /* Linux native system calls enter here... */ .align 32 @@ -217,7 +218,6 @@ linux_sparc_syscall: 3: stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] ret_sys_call: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %g3 - sra %o0, 0, %o0 mov %ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2 sllx %g2, 32, %g2 From adf6de0646c0b2bda53874970b076a4f0ea63cd5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Mar 2014 14:45:12 -0400 Subject: [PATCH 21/24] sparc64: Make sure %pil interrupts are enabled during hypervisor yield. [ Upstream commit cb3042d609e30e6144024801c89be3925106752b ] In arch_cpu_idle() we must enable %pil based interrupts before potentially invoking the hypervisor cpu yield call. As per the Hypervisor API documentation for cpu_yield: Interrupts which are blocked by some mechanism other that pstate.ie (for example %pil) are not guaranteed to cause a return from this service. It seems that only first generation Niagara chips are hit by this bug. My best guess is that later chips implement this in hardware and wake up anyways from %pil events, whereas in first generation chips the yield is implemented completely in hypervisor code and requires %pil to be enabled in order to wake properly from this call. Fixes: 87fa05aeb3a5 ("sparc: Use generic idle loop") Reported-by: Fabio M. Di Nitto Reported-by: Jan Engelhardt Tested-by: Jan Engelhardt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/process_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index baebab215492..b9cc9763faf4 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -57,9 +57,12 @@ void arch_cpu_idle(void) { if (tlb_type != hypervisor) { touch_nmi_watchdog(); + local_irq_enable(); } else { unsigned long pstate; + local_irq_enable(); + /* The sun4v sleeping code requires that we have PSTATE.IE cleared over * the cpu sleep hypervisor call. */ @@ -81,7 +84,6 @@ void arch_cpu_idle(void) : "=&r" (pstate) : "i" (PSTATE_IE)); } - local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU From a756e3d4c71d3de28373076da2552260741f3d0d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:41 -0700 Subject: [PATCH 22/24] wait: fix reparent_leader() vs EXIT_DEAD->EXIT_ZOMBIE race commit dfccbb5e49a621c1b21a62527d61fc4305617aca upstream. wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and drops tasklist_lock. If this task is not the natural child and it is traced, we change its state back to EXIT_ZOMBIE for ->real_parent. The last transition is racy, this is even documented in 50b8d257486a "ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race". wait_consider_task() tries to detect this transition and clear ->notask_error but we can't rely on ptrace_reparented(), debugger can exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE. And there is another problem which were missed before: this transition can also race with reparent_leader() which doesn't reset >exit_signal if EXIT_DEAD, assuming that this task must be reaped by someone else. So the tracee can be re-parented with ->exit_signal != SIGCHLD, and if /sbin/init doesn't use __WALL it becomes unreapable. Change reparent_leader() to update ->exit_signal even if EXIT_DEAD. Note: this is the simple temporary hack for -stable, it doesn't try to solve all problems, it will be reverted by the next changes. Signed-off-by: Oleg Nesterov Reported-by: Jan Kratochvil Reported-by: Michal Schmidt Tested-by: Michal Schmidt Cc: Al Viro Cc: Lennart Poettering Cc: Roland McGrath Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 7bb73f9d09db..80495a1ed651 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -570,9 +570,6 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, struct list_head *dead) { list_move_tail(&p->sibling, &p->real_parent->children); - - if (p->exit_state == EXIT_DEAD) - return; /* * If this is a threaded reparent there is no need to * notify anyone anything has happened. @@ -580,9 +577,19 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, if (same_thread_group(p->real_parent, father)) return; - /* We don't want people slaying init. */ + /* + * We don't want people slaying init. + * + * Note: we do this even if it is EXIT_DEAD, wait_task_zombie() + * can change ->exit_state to EXIT_ZOMBIE. If this is the final + * state, do_notify_parent() was already called and ->exit_signal + * doesn't matter. + */ p->exit_signal = SIGCHLD; + if (p->exit_state == EXIT_DEAD) + return; + /* If it has exited notify the new parent about this child's death. */ if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { From 1673978155f56ce8e1bcabacb9ee09464b5de728 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:29 -0700 Subject: [PATCH 23/24] exit: call disassociate_ctty() before exit_task_namespaces() commit c39df5fa37b0623589508c95515b4aa1531c524e upstream. Commit 8aac62706ada ("move exit_task_namespaces() outside of exit_notify()") breaks pppd and the exiting service crashes the kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: ppp_register_channel+0x13/0x20 [ppp_generic] Call Trace: ppp_asynctty_open+0x12b/0x170 [ppp_async] tty_ldisc_open.isra.2+0x27/0x60 tty_ldisc_hangup+0x1e3/0x220 __tty_hangup+0x2c4/0x440 disassociate_ctty+0x61/0x270 do_exit+0x7f2/0xa50 ppp_register_channel() needs ->net_ns and current->nsproxy == NULL. Move disassociate_ctty() before exit_task_namespaces(), it doesn't make sense to delay it after perf_event_exit_task() or cgroup_exit(). This also allows to use task_work_add() inside the (nontrivial) code paths in disassociate_ctty(). Investigated by Peter Hurley. Signed-off-by: Oleg Nesterov Reported-by: Sree Harsha Totakura Cc: Peter Hurley Cc: Sree Harsha Totakura Cc: "Eric W. Biederman" Cc: Jeff Dike Cc: Ingo Molnar Cc: Andrey Vagin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 80495a1ed651..6682b2ea5b11 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -801,6 +801,8 @@ void do_exit(long code) exit_shm(tsk); exit_files(tsk); exit_fs(tsk); + if (group_dead) + disassociate_ctty(1); exit_task_namespaces(tsk); exit_task_work(tsk); check_stack_usage(); @@ -816,13 +818,9 @@ void do_exit(long code) cgroup_exit(tsk, 1); - if (group_dead) - disassociate_ctty(1); - module_put(task_thread_info(tsk)->exec_domain->module); proc_exit_connector(tsk); - /* * FIXME: do that only when needed, using sched_exit tracepoint */ From bdec4322971737d2d70087b6cbfd9d39e781e114 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2014 17:16:33 -0700 Subject: [PATCH 24/24] Linux 3.10.38 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bd9fb5b72fc0..bd51b50a567b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 37 +SUBLEVEL = 38 EXTRAVERSION = NAME = TOSSUG Baby Fish