From b9978c27454cca3a0ba73577c23c15401d30e833 Mon Sep 17 00:00:00 2001 From: Orlando Arias Date: Tue, 16 May 2017 15:34:00 -0400 Subject: [PATCH 01/52] sparc: Fix -Wstringop-overflow warning [ Upstream commit deba804c90642c8ed0f15ac1083663976d578f54 ] Greetings, GCC 7 introduced the -Wstringop-overflow flag to detect buffer overflows in calls to string handling functions [1][2]. Due to the way ``empty_zero_page'' is declared in arch/sparc/include/setup.h, this causes a warning to trigger at compile time in the function mem_init(), which is subsequently converted to an error. The ensuing patch fixes this issue and aligns the declaration of empty_zero_page to that of other architectures. Thank you. Cheers, Orlando. [1] https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02308.html [2] https://gcc.gnu.org/gcc-7/changes.html Signed-off-by: Orlando Arias -------------------------------------------------------------------------------- Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/include/asm/pgtable_32.h | 4 ++-- arch/sparc/include/asm/setup.h | 2 +- arch/sparc/mm/init_32.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 91b963a887b7..29c3b400f949 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -91,9 +91,9 @@ extern unsigned long pfn_base; * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page)) +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * In general all page table modifications should use the V8 atomic diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1758ed..be0cc1beed41 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -16,7 +16,7 @@ extern char reboot_command[]; */ extern unsigned char boot_cpu_id; -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern int serial_console; static inline int con_is_present(void) diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index eb8287155279..3b7092d9ea8f 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -301,7 +301,7 @@ void __init mem_init(void) /* Saves us work later. */ - memset((void *)&empty_zero_page, 0, PAGE_SIZE); + memset((void *)empty_zero_page, 0, PAGE_SIZE); i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5); i += 1; From 5f67a1663c03a73962fb240cf821338f78981a23 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 May 2017 06:29:19 -0700 Subject: [PATCH 02/52] dccp/tcp: do not inherit mc_list from parent [ Upstream commit 657831ffc38e30092a2d5f03d385d710eb88b09a ] syzkaller found a way to trigger double frees from ip_mc_drop_socket() It turns out that leave a copy of parent mc_list at accept() time, which is very bad. Very similar to commit 8b485ce69876 ("tcp: do not inherit fastopen_req from parent") Initial report from Pray3r, completed by Andrey one. Thanks a lot to them ! Signed-off-by: Eric Dumazet Reported-by: Pray3r Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_connection_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 64148914803a..45fa2aaa3d3f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -669,6 +669,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + inet_sk(newsk)->mc_list = NULL; + newsk->sk_mark = inet_rsk(req)->ir_mark; atomic64_set(&newsk->sk_cookie, atomic64_read(&inet_rsk(req)->ir_cookie)); From d1428ee5407396185aab56ca62d49e89726455e0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 9 May 2017 16:59:54 -0700 Subject: [PATCH 03/52] ipv6/dccp: do not inherit ipv6_mc_list from parent [ Upstream commit 83eaddab4378db256d00d295bda6ca997cd13a52 ] Like commit 657831ffc38e ("dccp/tcp: do not inherit mc_list from parent") we should clear ipv6_mc_list etc. for IPv6 sockets too. Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv6.c | 6 ++++++ net/ipv6/tcp_ipv6.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8113ad58fcb4..3470ad1843bb 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -422,6 +422,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -486,6 +489,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1a63c4deef26..8e958fde6e4b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1033,6 +1033,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; @@ -1102,6 +1103,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; From 2ac37098ee3db7777ff61cc5a92487cdb6e642d0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 10 May 2017 19:07:51 +0200 Subject: [PATCH 04/52] s390/qeth: handle sysfs error during initialization [ Upstream commit 9111e7880ccf419548c7b0887df020b08eadb075 ] When setting up the device from within the layer discipline's probe routine, creating the layer-specific sysfs attributes can fail. Report this error back to the caller, and handle it by releasing the layer discipline. Signed-off-by: Ursula Braun [jwi: updated commit msg, moved an OSN change to a subsequent patch] Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core_main.c | 4 +++- drivers/s390/net/qeth_core_sys.c | 2 ++ drivers/s390/net/qeth_l2_main.c | 5 ++++- drivers/s390/net/qeth_l3_main.c | 5 ++++- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 31ac53fa5cee..2593aa98f493 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5650,8 +5650,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) if (rc) goto err; rc = card->discipline->setup(card->gdev); - if (rc) + if (rc) { + qeth_core_free_discipline(card); goto err; + } } rc = card->discipline->set_online(gdev); err: diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index e6e5b9671bf2..debd41648b64 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -422,6 +422,8 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, goto out; rc = card->discipline->setup(card->gdev); + if (rc) + qeth_core_free_discipline(card); out: mutex_unlock(&card->discipline_mutex); return rc ? rc : count; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index df036b872b05..48a5bb5d1484 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1030,8 +1030,11 @@ static int qeth_l2_stop(struct net_device *dev) static int qeth_l2_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - qeth_l2_create_device_attributes(&gdev->dev); + rc = qeth_l2_create_device_attributes(&gdev->dev); + if (rc) + return rc; INIT_LIST_HEAD(&card->vid_list); hash_init(card->mac_htable); card->options.layer2 = 1; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index cc4d3c3d8cc5..29aefd932092 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3227,8 +3227,11 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) static int qeth_l3_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - qeth_l3_create_device_attributes(&gdev->dev); + rc = qeth_l3_create_device_attributes(&gdev->dev); + if (rc) + return rc; card->options.layer2 = 0; card->info.hwtrap = 0; return 0; From 21b871582375ed711311fbfae37db91f789ac10a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 10 May 2017 19:07:52 +0200 Subject: [PATCH 05/52] s390/qeth: unbreak OSM and OSN support [ Upstream commit 2d2ebb3ed0c6acfb014f98e427298673a5d07b82 ] commit b4d72c08b358 ("qeth: bridgeport support - basic control") broke the support for OSM and OSN devices as follows: As OSM and OSN are L2 only, qeth_core_probe_device() does an early setup by loading the l2 discipline and calling qeth_l2_probe_device(). In this context, adding the l2-specific bridgeport sysfs attributes via qeth_l2_create_device_attributes() hits a BUG_ON in fs/sysfs/group.c, since the basic sysfs infrastructure for the device hasn't been established yet. Note that OSN actually has its own unique sysfs attributes (qeth_osn_devtype), so the additional attributes shouldn't be created at all. For OSM, add a new qeth_l2_devtype that contains all the common and l2-specific sysfs attributes. When qeth_core_probe_device() does early setup for OSM or OSN, assign the corresponding devtype so that the ccwgroup probe code creates the full set of sysfs attributes. This allows us to skip qeth_l2_create_device_attributes() in case of an early setup. Any device that can't do early setup will initially have only the generic sysfs attributes, and when it's probed later qeth_l2_probe_device() adds the l2-specific attributes. If an early-setup device is removed (by calling ccwgroup_ungroup()), device_unregister() will - using the devtype - delete the l2-specific attributes before qeth_l2_remove_device() is called. So make sure to not remove them twice. What complicates the issue is that qeth_l2_probe_device() and qeth_l2_remove_device() is also called on a device when its layer2 attribute changes (ie. its layer mode is switched). For early-setup devices this wouldn't work properly - we wouldn't remove the l2-specific attributes when switching to L3. But switching the layer mode doesn't actually make any sense; we already decided that the device can only operate in L2! So just refuse to switch the layer mode on such devices. Note that OSN doesn't have a layer2 attribute, so we only need to special-case OSM. Based on an initial patch by Ursula Braun. Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_core.h | 4 ++++ drivers/s390/net/qeth_core_main.c | 17 +++++++++-------- drivers/s390/net/qeth_core_sys.c | 22 ++++++++++++++-------- drivers/s390/net/qeth_l2.h | 2 ++ drivers/s390/net/qeth_l2_main.c | 17 +++++++++++++---- drivers/s390/net/qeth_l2_sys.c | 8 ++++++++ drivers/s390/net/qeth_l3_main.c | 1 + 7 files changed, 51 insertions(+), 20 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 1766a20ebcb1..741f3ee81cfe 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -717,6 +717,7 @@ enum qeth_discipline_id { }; struct qeth_discipline { + const struct device_type *devtype; void (*start_poll)(struct ccw_device *, int, unsigned long); qdio_handler_t *input_handler; qdio_handler_t *output_handler; @@ -881,6 +882,9 @@ extern struct qeth_discipline qeth_l2_discipline; extern struct qeth_discipline qeth_l3_discipline; extern const struct attribute_group *qeth_generic_attr_groups[]; extern const struct attribute_group *qeth_osn_attr_groups[]; +extern const struct attribute_group qeth_device_attr_group; +extern const struct attribute_group qeth_device_blkt_group; +extern const struct device_type qeth_generic_devtype; extern struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2593aa98f493..d10bf3da8e5f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5449,10 +5449,12 @@ void qeth_core_free_discipline(struct qeth_card *card) card->discipline = NULL; } -static const struct device_type qeth_generic_devtype = { +const struct device_type qeth_generic_devtype = { .name = "qeth_generic", .groups = qeth_generic_attr_groups, }; +EXPORT_SYMBOL_GPL(qeth_generic_devtype); + static const struct device_type qeth_osn_devtype = { .name = "qeth_osn", .groups = qeth_osn_attr_groups, @@ -5578,23 +5580,22 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) goto err_card; } - if (card->info.type == QETH_CARD_TYPE_OSN) - gdev->dev.type = &qeth_osn_devtype; - else - gdev->dev.type = &qeth_generic_devtype; - switch (card->info.type) { case QETH_CARD_TYPE_OSN: case QETH_CARD_TYPE_OSM: rc = qeth_core_load_discipline(card, QETH_DISCIPLINE_LAYER2); if (rc) goto err_card; + + gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN) + ? card->discipline->devtype + : &qeth_osn_devtype; rc = card->discipline->setup(card->gdev); if (rc) goto err_disc; - case QETH_CARD_TYPE_OSD: - case QETH_CARD_TYPE_OSX: + break; default: + gdev->dev.type = &qeth_generic_devtype; break; } diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index debd41648b64..fa844b0ff847 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -409,12 +409,16 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, if (card->options.layer2 == newdis) goto out; - else { - card->info.mac_bits = 0; - if (card->discipline) { - card->discipline->remove(card->gdev); - qeth_core_free_discipline(card); - } + if (card->info.type == QETH_CARD_TYPE_OSM) { + /* fixed layer, can't switch */ + rc = -EOPNOTSUPP; + goto out; + } + + card->info.mac_bits = 0; + if (card->discipline) { + card->discipline->remove(card->gdev); + qeth_core_free_discipline(card); } rc = qeth_core_load_discipline(card, newdis); @@ -701,10 +705,11 @@ static struct attribute *qeth_blkt_device_attrs[] = { &dev_attr_inter_jumbo.attr, NULL, }; -static struct attribute_group qeth_device_blkt_group = { +const struct attribute_group qeth_device_blkt_group = { .name = "blkt", .attrs = qeth_blkt_device_attrs, }; +EXPORT_SYMBOL_GPL(qeth_device_blkt_group); static struct attribute *qeth_device_attrs[] = { &dev_attr_state.attr, @@ -724,9 +729,10 @@ static struct attribute *qeth_device_attrs[] = { &dev_attr_switch_attrs.attr, NULL, }; -static struct attribute_group qeth_device_attr_group = { +const struct attribute_group qeth_device_attr_group = { .attrs = qeth_device_attrs, }; +EXPORT_SYMBOL_GPL(qeth_device_attr_group); const struct attribute_group *qeth_generic_attr_groups[] = { &qeth_device_attr_group, diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 0767556404bd..eb87bf97d38a 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -8,6 +8,8 @@ #include "qeth_core.h" +extern const struct attribute_group *qeth_l2_attr_groups[]; + int qeth_l2_create_device_attributes(struct device *); void qeth_l2_remove_device_attributes(struct device *); void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 48a5bb5d1484..f93a9fcbb8cd 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1027,14 +1027,21 @@ static int qeth_l2_stop(struct net_device *dev) return 0; } +static const struct device_type qeth_l2_devtype = { + .name = "qeth_layer2", + .groups = qeth_l2_attr_groups, +}; + static int qeth_l2_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; - rc = qeth_l2_create_device_attributes(&gdev->dev); - if (rc) - return rc; + if (gdev->dev.type == &qeth_generic_devtype) { + rc = qeth_l2_create_device_attributes(&gdev->dev); + if (rc) + return rc; + } INIT_LIST_HEAD(&card->vid_list); hash_init(card->mac_htable); card->options.layer2 = 1; @@ -1046,7 +1053,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) { struct qeth_card *card = dev_get_drvdata(&cgdev->dev); - qeth_l2_remove_device_attributes(&cgdev->dev); + if (cgdev->dev.type == &qeth_generic_devtype) + qeth_l2_remove_device_attributes(&cgdev->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); @@ -1432,6 +1440,7 @@ static int qeth_l2_control_event(struct qeth_card *card, } struct qeth_discipline qeth_l2_discipline = { + .devtype = &qeth_l2_devtype, .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 692db49e3d2a..a48ed9e7e168 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -272,3 +272,11 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card) } else qeth_bridgeport_an_set(card, 0); } + +const struct attribute_group *qeth_l2_attr_groups[] = { + &qeth_device_attr_group, + &qeth_device_blkt_group, + /* l2 specific, see l2_{create,remove}_device_attributes(): */ + &qeth_l2_bridgeport_attr_group, + NULL, +}; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 29aefd932092..285fe0b2c753 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3522,6 +3522,7 @@ static int qeth_l3_control_event(struct qeth_card *card, } struct qeth_discipline qeth_l3_discipline = { + .devtype = &qeth_generic_devtype, .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, .output_handler = (qdio_handler_t *) qeth_qdio_output_handler, From 182abc4e74a1599cc35be6acb3e70fe89823a94b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 10 May 2017 19:07:53 +0200 Subject: [PATCH 06/52] s390/qeth: avoid null pointer dereference on OSN [ Upstream commit 25e2c341e7818a394da9abc403716278ee646014 ] Access card->dev only after checking whether's its valid. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/s390/net/qeth_l2_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index f93a9fcbb8cd..bf1e0e39334d 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1112,7 +1112,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) case QETH_CARD_TYPE_OSN: card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN, ether_setup); - card->dev->flags |= IFF_NOARP; break; default: card->dev = alloc_etherdev(0); @@ -1125,9 +1124,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->watchdog_timeo = QETH_TX_TIMEOUT; card->dev->mtu = card->info.initial_mtu; card->dev->netdev_ops = &qeth_l2_netdev_ops; - card->dev->ethtool_ops = - (card->info.type != QETH_CARD_TYPE_OSN) ? - &qeth_l2_ethtool_ops : &qeth_l2_osn_ops; + if (card->info.type == QETH_CARD_TYPE_OSN) { + card->dev->ethtool_ops = &qeth_l2_osn_ops; + card->dev->flags |= IFF_NOARP; + } else { + card->dev->ethtool_ops = &qeth_l2_ethtool_ops; + } card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; From 90e3f8a5587147f088df361bee984b524b7c0ac1 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 10 May 2017 17:01:27 -0700 Subject: [PATCH 07/52] tcp: avoid fragmenting peculiar skbs in SACK [ Upstream commit b451e5d24ba6687c6f0e7319c727a709a1846c06 ] This patch fixes a bug in splitting an SKB during SACK processing. Specifically if an skb contains multiple packets and is only partially sacked in the higher sequences, tcp_match_sack_to_skb() splits the skb and marks the second fragment as SACKed. The current code further attempts rounding up the first fragment to MSS boundaries. But it misses a boundary condition when the rounded-up fragment size (pkt_len) is exactly skb size. Spliting such an skb is pointless and causses a kernel warning and aborts the SACK processing. This patch universally checks such over-split before calling tcp_fragment to prevent these unnecessary warnings. Fixes: adb92db857ee ("tcp: Make SACK code to split only at mss boundaries") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 818630cec54f..6cb682167c89 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1134,13 +1134,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; - if (!in_sack && new_len < pkt_len) { + if (!in_sack && new_len < pkt_len) new_len += mss; - if (new_len >= skb->len) - return 0; - } pkt_len = new_len; } + + if (pkt_len >= skb->len && !in_sack) + return 0; + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; From 704e6c6b865113c859e5ce3e1f42cec7b1551b79 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 12 May 2017 14:39:52 +0800 Subject: [PATCH 08/52] sctp: fix src address selection if using secondary addresses for ipv6 [ Upstream commit dbc2b5e9a09e9a6664679a667ff81cff6e5f2641 ] Commit 0ca50d12fe46 ("sctp: fix src address selection if using secondary addresses") has fixed a src address selection issue when using secondary addresses for ipv4. Now sctp ipv6 also has the similar issue. When using a secondary address, sctp_v6_get_dst tries to choose the saddr which has the most same bits with the daddr by sctp_v6_addr_match_len. It may make some cases not work as expected. hostA: [1] fd21:356b:459a:cf10::11 (eth1) [2] fd21:356b:459a:cf20::11 (eth2) hostB: [a] fd21:356b:459a:cf30::2 (eth1) [b] fd21:356b:459a:cf40::2 (eth2) route from hostA to hostB: fd21:356b:459a:cf30::/64 dev eth1 metric 1024 mtu 1500 The expected path should be: fd21:356b:459a:cf10::11 <-> fd21:356b:459a:cf30::2 But addr[2] matches addr[a] more bits than addr[1] does, according to sctp_v6_addr_match_len. It causes the path to be: fd21:356b:459a:cf20::11 <-> fd21:356b:459a:cf30::2 This patch is to fix it with the same way as Marcelo's fix for sctp ipv4. As no ip_dev_find for ipv6, this patch is to use ipv6_chk_addr to check if the saddr is in a dev instead. Note that for backwards compatibility, it will still do the addr_match_len check here when no optimal is found. Reported-by: Patrick Talbert Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ce46f1c7f133..079df5168fb1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -239,12 +239,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; - union sctp_addr *baddr = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; __u8 matchlen = 0; - __u8 bmatchlen; sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); @@ -311,23 +309,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid) + struct dst_entry *bdst; + __u8 bmatchlen; + + if (!laddr->valid || + laddr->state != SCTP_ADDR_SRC || + laddr->a.sa.sa_family != AF_INET6 || + scope > sctp_scope(&laddr->a)) continue; - if ((laddr->state == SCTP_ADDR_SRC) && - (laddr->a.sa.sa_family == AF_INET6) && - (scope <= sctp_scope(&laddr->a))) { - bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); - if (!baddr || (matchlen < bmatchlen)) { - baddr = &laddr->a; - matchlen = bmatchlen; - } - } - } - if (baddr) { - fl6->saddr = baddr->v6.sin6_addr; - fl6->fl6_sport = baddr->v6.sin6_port; + + fl6->saddr = laddr->a.v6.sin6_addr; + fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p); + bdst = ip6_dst_lookup_flow(sk, fl6, final_p); + + if (!IS_ERR(bdst) && + ipv6_chk_addr(dev_net(bdst->dev), + &laddr->a.v6.sin6_addr, bdst->dev, 1)) { + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + break; + } + + bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); + if (matchlen > bmatchlen) + continue; + + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + matchlen = bmatchlen; } rcu_read_unlock(); From ffa551def59c9b0e1747955af6a742443ae152fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 May 2017 07:16:40 -0700 Subject: [PATCH 09/52] sctp: do not inherit ipv6_{mc|ac|fl}_list from parent [ Upstream commit fdcee2cbb8438702ea1b328fb6e0ac5e9a40c7f8 ] SCTP needs fixes similar to 83eaddab4378 ("ipv6/dccp: do not inherit ipv6_mc_list from parent"), otherwise bad things can happen. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 079df5168fb1..7527c168e471 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -674,6 +674,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; rcu_read_lock(); opt = rcu_dereference(np->opt); From 7ede5c90fcdd6d33b3507203e3b8e57b025f34b2 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 15 May 2017 17:05:47 -0400 Subject: [PATCH 10/52] tcp: eliminate negative reordering in tcp_clean_rtx_queue [ Upstream commit bafbb9c73241760023d8981191ddd30bb1c6dbac ] tcp_ack() can call tcp_fragment() which may dededuct the value tp->fackets_out when MSS changes. When prior_fackets is larger than tp->fackets_out, tcp_clean_rtx_queue() can invoke tcp_update_reordering() with negative values. This results in absurd tp->reodering values higher than sysctl_tcp_max_reordering. Note that tcp_update_reordering indeeds sets tp->reordering to min(sysctl_tcp_max_reordering, metric), but because the comparison is signed, a negative metric always wins. Fixes: c7caf8d3ed7a ("[TCP]: Fix reord detection due to snd_una covered holes") Reported-by: Rebecca Isaacs Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6cb682167c89..87791f803627 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3220,7 +3220,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets) + if (reord < prior_fackets && reord <= tp->fackets_out) tcp_update_reordering(sk, tp->fackets_out - reord, 0); delta = tcp_is_fack(tp) ? pkts_acked : From 640bfcf232a93bdd446552f5417c52a0ad300e8b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 May 2017 23:19:17 -0700 Subject: [PATCH 11/52] net: Improve handling of failures on link and route dumps [ Upstream commit f6c5775ff0bfa62b072face6bf1d40f659f194b2 ] In general, rtnetlink dumps do not anticipate failure to dump a single object (e.g., link or route) on a single pass. As both route and link objects have grown via more attributes, that is no longer a given. netlink dumps can handle a failure if the dump function returns an error; specifically, netlink_dump adds the return code to the response if it is <= 0 so userspace is notified of the failure. The missing piece is the rtnetlink dump functions returning the error. Fix route and link dump functions to return the errors if no object is added to an skb (detected by skb->len != 0). IPv6 route dumps (rt6_dump_route) already return the error; this patch updates IPv4 and link dumps. Other dump functions may need to be ajusted as well. Reported-by: Jan Moskyto Matejka Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 36 ++++++++++++++++++++++++------------ net/ipv4/fib_frontend.c | 15 +++++++++++---- net/ipv4/fib_trie.c | 26 ++++++++++++++------------ 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fe38ef58997c..d43544ce7550 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1458,13 +1458,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, NLM_F_MULTI, ext_filter_mask); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err < 0) - goto out; + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: @@ -1472,10 +1472,12 @@ cont: } } out: + err = skb->len; +out_err: cb->args[1] = idx; cb->args[0] = h; - return skb->len; + return err; } int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) @@ -3127,8 +3129,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) err = br_dev->netdev_ops->ndo_bridge_getlink( skb, portid, seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } @@ -3139,16 +3145,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } } + err = skb->len; +out_err: rcu_read_unlock(); cb->args[0] = idx; - return skb->len; + return err; } static inline size_t bridge_nlmsg_size(void) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1adba44f8fbc..66dcb529fd9c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -757,7 +757,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0; + int dumped = 0, err; if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) @@ -777,20 +777,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - if (fib_table_dump(tb, skb, cb) < 0) - goto out; + err = fib_table_dump(tb, skb, cb); + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } dumped = 1; next: e++; } } out: + err = skb->len; +out_err: rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; - return skb->len; + return err; } /* Prepare and feed intra-kernel routing request. diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 7c52afb98c42..5c598f99a500 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1906,6 +1906,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + int err; + if (i < s_i) { i++; continue; @@ -1916,17 +1918,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - xkey, - KEYLENGTH - fa->fa_slen, - fa->fa_tos, - fa->fa_info, NLM_F_MULTI) < 0) { + err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + tb->tb_id, fa->fa_type, + xkey, KEYLENGTH - fa->fa_slen, + fa->fa_tos, fa->fa_info, NLM_F_MULTI); + if (err < 0) { cb->args[4] = i; - return -1; + return err; } i++; } @@ -1948,10 +1947,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, t_key key = cb->args[3]; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { + int err; + + err = fn_trie_dump_leaf(l, tb, skb, cb); + if (err < 0) { cb->args[3] = key; cb->args[2] = count; - return -1; + return err; } ++count; From 017fabead5c2aacb36df910bbfbfb1e813517ae3 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 16 May 2017 14:36:23 -0400 Subject: [PATCH 12/52] ipv6: Prevent overrun when parsing v6 header options [ Upstream commit 2423496af35d94a87156b063ea5cedffc10a70a1 ] The KASAN warning repoted below was discovered with a syzkaller program. The reproducer is basically: int s = socket(AF_INET6, SOCK_RAW, NEXTHDR_HOP); send(s, &one_byte_of_data, 1, MSG_MORE); send(s, &more_than_mtu_bytes_data, 2000, 0); The socket() call sets the nexthdr field of the v6 header to NEXTHDR_HOP, the first send call primes the payload with a non zero byte of data, and the second send call triggers the fragmentation path. The fragmentation code tries to parse the header options in order to figure out where to insert the fragment option. Since nexthdr points to an invalid option, the calculation of the size of the network header can made to be much larger than the linear section of the skb and data is read outside of it. This fix makes ip6_find_1stfrag return an error if it detects running out-of-bounds. [ 42.361487] ================================================================== [ 42.364412] BUG: KASAN: slab-out-of-bounds in ip6_fragment+0x11c8/0x3730 [ 42.365471] Read of size 840 at addr ffff88000969e798 by task ip6_fragment-oo/3789 [ 42.366469] [ 42.366696] CPU: 1 PID: 3789 Comm: ip6_fragment-oo Not tainted 4.11.0+ #41 [ 42.367628] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 [ 42.368824] Call Trace: [ 42.369183] dump_stack+0xb3/0x10b [ 42.369664] print_address_description+0x73/0x290 [ 42.370325] kasan_report+0x252/0x370 [ 42.370839] ? ip6_fragment+0x11c8/0x3730 [ 42.371396] check_memory_region+0x13c/0x1a0 [ 42.371978] memcpy+0x23/0x50 [ 42.372395] ip6_fragment+0x11c8/0x3730 [ 42.372920] ? nf_ct_expect_unregister_notifier+0x110/0x110 [ 42.373681] ? ip6_copy_metadata+0x7f0/0x7f0 [ 42.374263] ? ip6_forward+0x2e30/0x2e30 [ 42.374803] ip6_finish_output+0x584/0x990 [ 42.375350] ip6_output+0x1b7/0x690 [ 42.375836] ? ip6_finish_output+0x990/0x990 [ 42.376411] ? ip6_fragment+0x3730/0x3730 [ 42.376968] ip6_local_out+0x95/0x160 [ 42.377471] ip6_send_skb+0xa1/0x330 [ 42.377969] ip6_push_pending_frames+0xb3/0xe0 [ 42.378589] rawv6_sendmsg+0x2051/0x2db0 [ 42.379129] ? rawv6_bind+0x8b0/0x8b0 [ 42.379633] ? _copy_from_user+0x84/0xe0 [ 42.380193] ? debug_check_no_locks_freed+0x290/0x290 [ 42.380878] ? ___sys_sendmsg+0x162/0x930 [ 42.381427] ? rcu_read_lock_sched_held+0xa3/0x120 [ 42.382074] ? sock_has_perm+0x1f6/0x290 [ 42.382614] ? ___sys_sendmsg+0x167/0x930 [ 42.383173] ? lock_downgrade+0x660/0x660 [ 42.383727] inet_sendmsg+0x123/0x500 [ 42.384226] ? inet_sendmsg+0x123/0x500 [ 42.384748] ? inet_recvmsg+0x540/0x540 [ 42.385263] sock_sendmsg+0xca/0x110 [ 42.385758] SYSC_sendto+0x217/0x380 [ 42.386249] ? SYSC_connect+0x310/0x310 [ 42.386783] ? __might_fault+0x110/0x1d0 [ 42.387324] ? lock_downgrade+0x660/0x660 [ 42.387880] ? __fget_light+0xa1/0x1f0 [ 42.388403] ? __fdget+0x18/0x20 [ 42.388851] ? sock_common_setsockopt+0x95/0xd0 [ 42.389472] ? SyS_setsockopt+0x17f/0x260 [ 42.390021] ? entry_SYSCALL_64_fastpath+0x5/0xbe [ 42.390650] SyS_sendto+0x40/0x50 [ 42.391103] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.391731] RIP: 0033:0x7fbbb711e383 [ 42.392217] RSP: 002b:00007ffff4d34f28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 42.393235] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbbb711e383 [ 42.394195] RDX: 0000000000001000 RSI: 00007ffff4d34f60 RDI: 0000000000000003 [ 42.395145] RBP: 0000000000000046 R08: 00007ffff4d34f40 R09: 0000000000000018 [ 42.396056] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000400aad [ 42.396598] R13: 0000000000000066 R14: 00007ffff4d34ee0 R15: 00007fbbb717af00 [ 42.397257] [ 42.397411] Allocated by task 3789: [ 42.397702] save_stack_trace+0x16/0x20 [ 42.398005] save_stack+0x46/0xd0 [ 42.398267] kasan_kmalloc+0xad/0xe0 [ 42.398548] kasan_slab_alloc+0x12/0x20 [ 42.398848] __kmalloc_node_track_caller+0xcb/0x380 [ 42.399224] __kmalloc_reserve.isra.32+0x41/0xe0 [ 42.399654] __alloc_skb+0xf8/0x580 [ 42.400003] sock_wmalloc+0xab/0xf0 [ 42.400346] __ip6_append_data.isra.41+0x2472/0x33d0 [ 42.400813] ip6_append_data+0x1a8/0x2f0 [ 42.401122] rawv6_sendmsg+0x11ee/0x2db0 [ 42.401505] inet_sendmsg+0x123/0x500 [ 42.401860] sock_sendmsg+0xca/0x110 [ 42.402209] ___sys_sendmsg+0x7cb/0x930 [ 42.402582] __sys_sendmsg+0xd9/0x190 [ 42.402941] SyS_sendmsg+0x2d/0x50 [ 42.403273] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.403718] [ 42.403871] Freed by task 1794: [ 42.404146] save_stack_trace+0x16/0x20 [ 42.404515] save_stack+0x46/0xd0 [ 42.404827] kasan_slab_free+0x72/0xc0 [ 42.405167] kfree+0xe8/0x2b0 [ 42.405462] skb_free_head+0x74/0xb0 [ 42.405806] skb_release_data+0x30e/0x3a0 [ 42.406198] skb_release_all+0x4a/0x60 [ 42.406563] consume_skb+0x113/0x2e0 [ 42.406910] skb_free_datagram+0x1a/0xe0 [ 42.407288] netlink_recvmsg+0x60d/0xe40 [ 42.407667] sock_recvmsg+0xd7/0x110 [ 42.408022] ___sys_recvmsg+0x25c/0x580 [ 42.408395] __sys_recvmsg+0xd6/0x190 [ 42.408753] SyS_recvmsg+0x2d/0x50 [ 42.409086] entry_SYSCALL_64_fastpath+0x1f/0xbe [ 42.409513] [ 42.409665] The buggy address belongs to the object at ffff88000969e780 [ 42.409665] which belongs to the cache kmalloc-512 of size 512 [ 42.410846] The buggy address is located 24 bytes inside of [ 42.410846] 512-byte region [ffff88000969e780, ffff88000969e980) [ 42.411941] The buggy address belongs to the page: [ 42.412405] page:ffffea000025a780 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 [ 42.413298] flags: 0x100000000008100(slab|head) [ 42.413729] raw: 0100000000008100 0000000000000000 0000000000000000 00000001800c000c [ 42.414387] raw: ffffea00002a9500 0000000900000007 ffff88000c401280 0000000000000000 [ 42.415074] page dumped because: kasan: bad access detected [ 42.415604] [ 42.415757] Memory state around the buggy address: [ 42.416222] ffff88000969e880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 42.416904] ffff88000969e900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 42.417591] >ffff88000969e980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 42.418273] ^ [ 42.418588] ffff88000969ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 42.419273] ffff88000969ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 42.419882] ================================================================== Reported-by: Andrey Konovalov Signed-off-by: Craig Gallek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_offload.c | 2 ++ net/ipv6/ip6_output.c | 4 ++++ net/ipv6/output_core.c | 14 ++++++++------ net/ipv6/udp_offload.c | 2 ++ 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 225f5f7f26ba..9a2247cfafbf 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -122,6 +122,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (unfrag_ip6hlen < 0) + return ERR_PTR(unfrag_ip6hlen); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); if (skb->next) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8004532fa882..7a3a29cc033b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -572,6 +572,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, u8 *prevhdr, nexthdr = 0; hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (hlen < 0) { + err = hlen; + goto fail; + } nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 1d184322a7b1..8b56c5240429 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; - while (offset + 1 <= packet_len) { + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { @@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; } - return offset; + return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 7441e1e63893..04ed91da6cc9 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -98,6 +98,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, * bytes to insert fragment header. */ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + if (unfrag_ip6hlen < 0) + return ERR_PTR(unfrag_ip6hlen); nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + From f76d54a8882ed61d4686707fff58a84a1664860c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 May 2017 22:54:11 -0400 Subject: [PATCH 13/52] ipv6: Check ip6_find_1stfragopt() return value properly. [ Upstream commit 7dd7eb9513bd02184d45f000ab69d78cb1fa1531 ] Do not use unsigned variables to see if it returns a negative error or not. Fixes: 2423496af35d ("ipv6: Prevent overrun when parsing v6 header options") Reported-by: Julia Lawall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_offload.c | 9 ++++----- net/ipv6/ip6_output.c | 7 +++---- net/ipv6/udp_offload.c | 8 +++++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 9a2247cfafbf..568bc0a52ca1 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -62,7 +62,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, const struct net_offload *ops; int proto; struct frag_hdr *fptr; - unsigned int unfrag_ip6hlen; u8 *prevhdr; int offset = 0; bool encap, udpfrag; @@ -121,10 +120,10 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (unfrag_ip6hlen < 0) - return ERR_PTR(unfrag_ip6hlen); - fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); + int err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + fptr = (struct frag_hdr *)((u8 *)ipv6h + err); fptr->frag_off = htons(offset); if (skb->next) fptr->frag_off |= htons(IP6_MF); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7a3a29cc033b..077e29bba699 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -571,11 +571,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; - hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (hlen < 0) { - err = hlen; + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) goto fail; - } + hlen = err; nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 04ed91da6cc9..01582966ffa0 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u8 frag_hdr_sz = sizeof(struct frag_hdr); __wsum csum; int tnl_hlen; + int err; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -97,9 +98,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - if (unfrag_ip6hlen < 0) - return ERR_PTR(unfrag_ip6hlen); + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + unfrag_ip6hlen = err; nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + From 94c0bf3cbb9969ee79c758e4664be0e2a3ecf937 Mon Sep 17 00:00:00 2001 From: Tobias Jungel Date: Wed, 17 May 2017 09:29:12 +0200 Subject: [PATCH 14/52] bridge: netlink: check vlan_default_pvid range [ Upstream commit a285860211bf257b0e6d522dac6006794be348af ] Currently it is allowed to set the default pvid of a bridge to a value above VLAN_VID_MASK (0xfff). This patch adds a check to br_validate and returns -EINVAL in case the pvid is out of bounds. Reproduce by calling: [root@test ~]# ip l a type bridge [root@test ~]# ip l a type dummy [root@test ~]# ip l s bridge0 type bridge vlan_filtering 1 [root@test ~]# ip l s bridge0 type bridge vlan_default_pvid 9999 [root@test ~]# ip l s dummy0 master bridge0 [root@test ~]# bridge vlan port vlan ids bridge0 9999 PVID Egress Untagged dummy0 9999 PVID Egress Untagged Fixes: 0f963b7592ef ("bridge: netlink: add support for default_pvid") Acked-by: Nikolay Aleksandrov Signed-off-by: Tobias Jungel Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 413d18e37083..ff8bb41d713f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -768,6 +768,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EPROTONOSUPPORT; } } + + if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { + __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); + + if (defpvid >= VLAN_VID_MASK) + return -EINVAL; + } #endif return 0; From b543ccc4f627cea5d9aee6009aca39f3e2d4822e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 17 May 2017 16:31:41 +0200 Subject: [PATCH 15/52] qmi_wwan: add another Lenovo EM74xx device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 486181bcb3248e2f1977f4e69387a898234a4e1e ] In their infinite wisdom, and never ending quest for end user frustration, Lenovo has decided to use a new USB device ID for the wwan modules in their 2017 laptops. The actual hardware is still the Sierra Wireless EM7455 or EM7430, depending on region. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c6f5d9a6bec6..582d8f0c6266 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -730,6 +730,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */ {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ From 3a854210f9a555681eef4480ef2400922b31ca2b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 19 May 2017 22:20:29 +0800 Subject: [PATCH 16/52] bridge: start hello_timer when enabling KERNEL_STP in br_stp_start [ Upstream commit 6d18c732b95c0a9d35e9f978b4438bba15412284 ] Since commit 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers"), bridge would not start hello_timer if stp_enabled is not KERNEL_STP when br_dev_open. The problem is even if users set stp_enabled with KERNEL_STP later, the timer will still not be started. It causes that KERNEL_STP can not really work. Users have to re-ifup the bridge to avoid this. This patch is to fix it by starting br->hello_timer when enabling KERNEL_STP in br_stp_start. As an improvement, it's also to start hello_timer again only when br->stp_enabled is KERNEL_STP in br_hello_timer_expired, there is no reason to start the timer again when it's NO_STP. Fixes: 76b91c32dd86 ("bridge: stp: when using userspace stp stop kernel hello and hold timers") Reported-by: Haidong Li Signed-off-by: Xin Long Acked-by: Nikolay Aleksandrov Reviewed-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_stp_if.c | 1 + net/bridge/br_stp_timer.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 8a7ada8bb947..57be733a99bc 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -166,6 +166,7 @@ static void br_stp_start(struct net_bridge *br) br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); br_port_state_selection(br); } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 5f0f5af0ec35..7dbe6a5c31eb 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - if (br->stp_enabled != BR_USER_STP) + if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } From 38f02f2ce0ca58c45d95567a5d64f7dc90aa9c95 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 May 2017 14:17:48 -0700 Subject: [PATCH 17/52] ipv6: fix out of bound writes in __ip6_append_data() [ Upstream commit 232cd35d0804cc241eb887bb8d4d9b3b9881c64a ] Andrey Konovalov and idaifish@gmail.com reported crashes caused by one skb shared_info being overwritten from __ip6_append_data() Andrey program lead to following state : copy -4200 datalen 2000 fraglen 2040 maxfraglen 2040 alloclen 2048 transhdrlen 0 offset 0 fraggap 6200 The skb_copy_and_csum_bits(skb_prev, maxfraglen, data + transhdrlen, fraggap, 0); is overwriting skb->head and skb_shared_info Since we apparently detect this rare condition too late, move the code earlier to even avoid allocating skb and risking crashes. Once again, many thanks to Andrey and syzkaller team. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Reported-by: Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 077e29bba699..1db17efe36c1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1432,6 +1432,11 @@ alloc_new_skb: */ alloclen += sizeof(struct frag_hdr); + copy = datalen - transhdrlen - fraggap; + if (copy < 0) { + err = -EINVAL; + goto error; + } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, @@ -1481,13 +1486,9 @@ alloc_new_skb: data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; - - if (copy < 0) { - err = -EINVAL; - kfree_skb(skb); - goto error; - } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { + if (copy > 0 && + getfrag(from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; From 8380f16d0702817d2b5070d82972db03d2462e50 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:42 -0400 Subject: [PATCH 18/52] be2net: Fix offload features for Q-in-Q packets [ Upstream commit cc6e9de62a7f84c9293a2ea41bc412b55bb46e85 ] At least some of the be2net cards do not seem to be capabled of performing checksum offload computions on Q-in-Q packets. In these case, the recevied checksum on the remote is invalid and TCP syn packets are dropped. This patch adds a call to check disbled acceleration features on Q-in-Q tagged traffic. CC: Sathya Perla CC: Ajit Khaparde CC: Sriharsha Basavapatna CC: Somnath Kotur Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 8a1d9fffd7d6..26255862d1cf 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5260,9 +5260,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb, struct be_adapter *adapter = netdev_priv(dev); u8 l4_hdr = 0; - /* The code below restricts offload features for some tunneled packets. + /* The code below restricts offload features for some tunneled and + * Q-in-Q packets. * Offload features for normal (non tunnel) packets are unchanged. */ + features = vlan_features_check(skb, features); if (!skb->encapsulation || !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)) return features; From d7ed7fcecf2082f44ac6b1e7b69055a0e713af03 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:43 -0400 Subject: [PATCH 19/52] virtio-net: enable TSO/checksum offloads for Q-in-Q vlans [ Upstream commit 2836b4f224d4fd7d1a2b23c3eecaf0f0ae199a74 ] Since virtio does not provide it's own ndo_features_check handler, TSO, and now checksum offload, are disabled for stacked vlans. Re-enable the support and let the host take care of it. This restores/improves Guest-to-Guest performance over Q-in-Q vlans. Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0e2a19e58923..7f7c87762bc6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1415,6 +1415,7 @@ static const struct net_device_ops virtnet_netdev = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = virtnet_busy_poll, #endif + .ndo_features_check = passthru_features_check, }; static void virtnet_config_changed_work(struct work_struct *work) From fe22b6005538095f6e95e5152a99fc59e9ad198c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 24 May 2017 09:59:31 -0700 Subject: [PATCH 20/52] tcp: avoid fastopen API to be used on AF_UNSPEC [ Upstream commit ba615f675281d76fd19aa03558777f81fb6b6084 ] Fastopen API should be used to perform fastopen operations on the TCP socket. It does not make sense to use fastopen API to perform disconnect by calling it with AF_UNSPEC. The fastopen data path is also prone to race conditions and bugs when using with AF_UNSPEC. One issue reported and analyzed by Vegard Nossum is as follows: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Thread A: Thread B: ------------------------------------------------------------------------ sendto() - tcp_sendmsg() - sk_stream_memory_free() = 0 - goto wait_for_sndbuf - sk_stream_wait_memory() - sk_wait_event() // sleep | sendto(flags=MSG_FASTOPEN, dest_addr=AF_UNSPEC) | - tcp_sendmsg() | - tcp_sendmsg_fastopen() | - __inet_stream_connect() | - tcp_disconnect() //because of AF_UNSPEC | - tcp_transmit_skb()// send RST | - return 0; // no reconnect! | - sk_stream_wait_connect() | - sock_error() | - xchg(&sk->sk_err, 0) | - return -ECONNRESET - ... // wake up, see sk->sk_err == 0 - skb_entail() on TCP_CLOSE socket If the connection is reopened then we will send a brand new SYN packet after thread A has already queued a buffer. At this point I think the socket internal state (sequence numbers etc.) becomes messed up. When the new connection is closed, the FIN-ACK is rejected because the sequence number is outside the window. The other side tries to retransmit, but __tcp_retransmit_skb() calls tcp_trim_head() on an empty skb which corrupts the skb data length and hits a BUG() in copy_and_csum_bits(). +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Hence, this patch adds a check for AF_UNSPEC in the fastopen data path and return EOPNOTSUPP to user if such case happens. Fixes: cf60af03ca4e7 ("tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") Reported-by: Vegard Nossum Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e1d51370977b..4bd8678329d6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1071,9 +1071,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); + struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && + uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ @@ -1086,7 +1089,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, tp->fastopen_req->size = size; flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, msg->msg_name, + err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags); *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); From 97f54575ff57da3f93f95006802a7892450c1898 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 25 May 2017 19:14:56 +0200 Subject: [PATCH 21/52] sctp: fix ICMP processing if skb is non-linear [ Upstream commit 804ec7ebe8ea003999ca8d1bfc499edc6a9e07df ] sometimes ICMP replies to INIT chunks are ignored by the client, even if the encapsulated SCTP headers match an open socket. This happens when the ICMP packet is carried by a paged skb: use skb_header_pointer() to read packet contents beyond the SCTP header, so that chunk header and initiate tag are validated correctly. v2: - don't use skb_header_pointer() to read the transport header, since icmp_socket_deliver() already puts these 8 bytes in the linear area. - change commit message to make specific reference to INIT chunks. Signed-off-by: Davide Caratti Acked-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/input.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index b6493b3f11a9..2d7859c03fd2 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -472,15 +472,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctp_association **app, struct sctp_transport **tpp) { + struct sctp_init_chunk *chunkhdr, _chunkhdr; union sctp_addr saddr; union sctp_addr daddr; struct sctp_af *af; struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; - struct sctp_init_chunk *chunkhdr; __u32 vtag = ntohl(sctphdr->vtag); - int len = skb->len - ((void *)sctphdr - (void *)skb->data); *app = NULL; *tpp = NULL; @@ -515,13 +514,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); - if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) - + sizeof(__be32) || + /* chunk header + first 4 octects of init header */ + chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr), + sizeof(struct sctp_chunkhdr) + + sizeof(__be32), &_chunkhdr); + if (!chunkhdr || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || - ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { + ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) goto out; - } + } else if (vtag != asoc->c.peer_vtag) { goto out; } From 338f665acb4ba0b2c7656cc2487326497220168f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 May 2017 14:27:35 -0700 Subject: [PATCH 22/52] ipv4: add reference counting to metrics [ Upstream commit 3fb07daff8e99243366a081e5129560734de4ada ] Andrey Konovalov reported crashes in ipv4_mtu() I could reproduce the issue with KASAN kernels, between 10.246.7.151 and 10.246.7.152 : 1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 & 2) At the same time run following loop : while : do ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 done Cong Wang attempted to add back rt->fi in commit 82486aa6f1b9 ("ipv4: restore rt->fi for reference counting") but this proved to add some issues that were complex to solve. Instead, I suggested to add a refcount to the metrics themselves, being a standalone object (in particular, no reference to other objects) I tried to make this patch as small as possible to ease its backport, instead of being super clean. Note that we believe that only ipv4 dst need to take care of the metric refcount. But if this is wrong, this patch adds the basic infrastructure to extend this to other families. Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang for his efforts on this problem. Fixes: 2860583fe840 ("ipv4: Kill rt->fi") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Reviewed-by: Julian Anastasov Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst.h | 8 +++++++- include/net/ip_fib.h | 10 +++++----- net/core/dst.c | 23 ++++++++++++++--------- net/ipv4/fib_semantics.c | 17 ++++++++++------- net/ipv4/route.c | 10 +++++++++- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index c7329dcd90cc..e4f450617919 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,10 +110,16 @@ struct dst_entry { }; }; +struct dst_metrics { + u32 metrics[RTAX_MAX]; + atomic_t refcnt; +}; +extern const struct dst_metrics dst_default_metrics; + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL +#define DST_METRICS_REFCOUNTED 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3f98233388fb..bda1721e9622 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -112,11 +112,11 @@ struct fib_info { unsigned char fib_type; __be32 fib_prefsrc; u32 fib_priority; - u32 *fib_metrics; -#define fib_mtu fib_metrics[RTAX_MTU-1] -#define fib_window fib_metrics[RTAX_WINDOW-1] -#define fib_rtt fib_metrics[RTAX_RTT-1] -#define fib_advmss fib_metrics[RTAX_ADVMSS-1] + struct dst_metrics *fib_metrics; +#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] +#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] +#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] +#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; diff --git a/net/core/dst.c b/net/core/dst.c index a1656e3b8d72..d7ad628bf64e 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard_out); -const u32 dst_default_metrics[RTAX_MAX + 1] = { +const struct dst_metrics dst_default_metrics = { /* This initializer is needed to force linker to place this variable * into const section. Otherwise it might end into bss section. * We really want to avoid false sharing on this variable, and catch * any writes on it. */ - [RTAX_MAX] = 0xdeadbeef, + .refcnt = ATOMIC_INIT(1), }; void dst_init(struct dst_entry *dst, struct dst_ops *ops, @@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, if (dev) dev_hold(dev); dst->ops = ops; - dst_init_metrics(dst, dst_default_metrics, true); + dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; dst->path = dst; dst->from = NULL; @@ -315,25 +315,30 @@ EXPORT_SYMBOL(dst_release); u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { - u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p) { - u32 *old_p = __DST_METRICS_PTR(old); + struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + atomic_set(&p->refcnt, 1); + memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { kfree(p); - p = __DST_METRICS_PTR(prev); + p = (struct dst_metrics *)__DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; + } else if (prev & DST_METRICS_REFCOUNTED) { + if (atomic_dec_and_test(&old_p->refcnt)) + kfree(old_p); } } - return p; + BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); + return (u32 *)p; } EXPORT_SYMBOL(dst_cow_metrics_generic); @@ -342,7 +347,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) { unsigned long prev, new; - new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; + new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; prev = cmpxchg(&dst->_metrics, old, new); if (prev == old) kfree(__DST_METRICS_PTR(old)); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 67d44aa9e09f..b2504712259f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -204,6 +204,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@ -214,8 +215,9 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - if (fi->fib_metrics != (u32 *) dst_default_metrics) - kfree(fi->fib_metrics); + m = fi->fib_metrics; + if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + kfree(m); kfree(fi); } @@ -982,11 +984,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; - fi->fib_metrics[type - 1] = val; + fi->fib_metrics->metrics[type - 1] = val; } if (ecn_ca) - fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; return 0; } @@ -1044,11 +1046,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); if (!fi->fib_metrics) goto failure; + atomic_set(&fi->fib_metrics->refcnt, 1); } else - fi->fib_metrics = (u32 *) dst_default_metrics; + fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@ -1251,7 +1254,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) + if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc && diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 375248b900ba..c295d882c6e0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1356,8 +1356,12 @@ static void rt_add_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *) dst; + if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) + kfree(p); + if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1409,7 +1413,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); + if (fi->fib_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + atomic_inc(&fi->fib_metrics->refcnt); + } #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif From 605b6b2b4d8a8abdafb675abda2e43c11d1b3921 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 May 2017 15:24:41 -0700 Subject: [PATCH 23/52] netem: fix skb_orphan_partial() commit f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 upstream. I should have known that lowering skb->truesize was dangerous :/ In case packets are not leaving the host via a standard Ethernet device, but looped back to local sockets, bad things can happen, as reported by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 ) So instead of tweaking skb->truesize, lets change skb->destructor and keep a reference on the owner socket via its sk_refcnt. Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper") Signed-off-by: Eric Dumazet Reported-by: Michael Madsen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 9c708a5fb751..bd2fad27891e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1690,17 +1690,17 @@ EXPORT_SYMBOL(skb_set_owner_w); void skb_orphan_partial(struct sk_buff *skb) { - /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, - * so we do not completely orphan skb, but transfert all - * accounted bytes but one, to avoid unexpected reorders. - */ if (skb->destructor == sock_wfree #ifdef CONFIG_INET || skb->destructor == tcp_wfree #endif ) { - atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); - skb->truesize = 1; + struct sock *sk = skb->sk; + + if (atomic_inc_not_zero(&sk->sk_refcnt)) { + atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + skb->destructor = sock_efree; + } } else { skb_orphan(skb); } From e989f9bf2a9dc7064d71d7dcba7eb4bd1040f9a1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 23 May 2017 17:49:13 +0200 Subject: [PATCH 24/52] net: phy: marvell: Limit errata to 88m1101 commit f2899788353c13891412b273fdff5f02d49aa40f upstream. The 88m1101 has an errata when configuring autoneg. However, it was being applied to many other Marvell PHYs as well. Limit its scope to just the 88m1101. Fixes: 76884679c644 ("phylib: Add support for Marvell 88e1111S and 88e1145") Reported-by: Daniel Walker Signed-off-by: Andrew Lunn Acked-by: Harini Katakam Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/marvell.c | 66 ++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 0240552b50f3..d2701c53ed68 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -203,34 +203,6 @@ static int marvell_config_aneg(struct phy_device *phydev) { int err; - /* The Marvell PHY has an errata which requires - * that certain registers get written in order - * to restart autonegotiation */ - err = phy_write(phydev, MII_BMCR, BMCR_RESET); - - if (err < 0) - return err; - - err = phy_write(phydev, 0x1d, 0x1f); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0x200c); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1d, 0x5); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0); - if (err < 0) - return err; - - err = phy_write(phydev, 0x1e, 0x100); - if (err < 0) - return err; - err = marvell_set_polarity(phydev, phydev->mdix); if (err < 0) return err; @@ -264,6 +236,42 @@ static int marvell_config_aneg(struct phy_device *phydev) return 0; } +static int m88e1101_config_aneg(struct phy_device *phydev) +{ + int err; + + /* This Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation + */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x1f); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x200c); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x5); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x100); + if (err < 0) + return err; + + return marvell_config_aneg(phydev); +} + #ifdef CONFIG_OF_MDIO /* * Set and/or override some configuration registers based on the @@ -993,7 +1001,7 @@ static struct phy_driver marvell_drivers[] = { .name = "Marvell 88E1101", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1101_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, From 1b5286ba9f13073aa47893a3e28f570b71483c67 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:41 -0400 Subject: [PATCH 25/52] vlan: Fix tcp checksum offloads in Q-in-Q vlans commit 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff upstream. It appears that TCP checksum offloading has been broken for Q-in-Q vlans. The behavior was execerbated by the series commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'") that that enabled accleleration features on stacked vlans. However, event without that series, it is possible to trigger this issue. It just requires a lot more specialized configuration. The root cause is the interaction between how netdev_intersect_features() works, the features actually set on the vlan devices and HW having the ability to run checksum with longer headers. The issue starts when netdev_interesect_features() replaces NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM, if the HW advertises IP|IPV6 specific checksums. This happens for tagged and multi-tagged packets. However, HW that enables IP|IPV6 checksum offloading doesn't gurantee that packets with arbitrarily long headers can be checksummed. This patch disables IP|IPV6 checksums on the packet for multi-tagged packets. CC: Toshiaki Makita CC: Michal Kubecek Signed-off-by: Vladislav Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 67ce5bd3b56a..19db03dbbd00 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -616,15 +616,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, netdev_features_t features) { - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - + if (skb_vlan_tagged_multi(skb)) { + /* In the case of multi-tagged packets, use a direct mask + * instead of using netdev_interesect_features(), to make + * sure that only devices supporting NETIF_F_HW_CSUM will + * have checksum offloading support. + */ + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + } return features; } From 2ca57fc8243655022141ccb89c913abd0470f89a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 5 May 2017 11:06:50 +0200 Subject: [PATCH 26/52] i2c: i2c-tiny-usb: fix buffer not being DMA capable commit 5165da5923d6c7df6f2927b0113b2e4d9288661e upstream. Since v4.9 i2c-tiny-usb generates the below call trace and longer works, since it can't communicate with the USB device. The reason is, that since v4.9 the USB stack checks, that the buffer it should transfer is DMA capable. This was a requirement since v2.2 days, but it usually worked nevertheless. [ 17.504959] ------------[ cut here ]------------ [ 17.505488] WARNING: CPU: 0 PID: 93 at drivers/usb/core/hcd.c:1587 usb_hcd_map_urb_for_dma+0x37c/0x570 [ 17.506545] transfer buffer not dma capable [ 17.507022] Modules linked in: [ 17.507370] CPU: 0 PID: 93 Comm: i2cdetect Not tainted 4.11.0-rc8+ #10 [ 17.508103] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 17.509039] Call Trace: [ 17.509320] ? dump_stack+0x5c/0x78 [ 17.509714] ? __warn+0xbe/0xe0 [ 17.510073] ? warn_slowpath_fmt+0x5a/0x80 [ 17.510532] ? nommu_map_sg+0xb0/0xb0 [ 17.510949] ? usb_hcd_map_urb_for_dma+0x37c/0x570 [ 17.511482] ? usb_hcd_submit_urb+0x336/0xab0 [ 17.511976] ? wait_for_completion_timeout+0x12f/0x1a0 [ 17.512549] ? wait_for_completion_timeout+0x65/0x1a0 [ 17.513125] ? usb_start_wait_urb+0x65/0x160 [ 17.513604] ? usb_control_msg+0xdc/0x130 [ 17.514061] ? usb_xfer+0xa4/0x2a0 [ 17.514445] ? __i2c_transfer+0x108/0x3c0 [ 17.514899] ? i2c_transfer+0x57/0xb0 [ 17.515310] ? i2c_smbus_xfer_emulated+0x12f/0x590 [ 17.515851] ? _raw_spin_unlock_irqrestore+0x11/0x20 [ 17.516408] ? i2c_smbus_xfer+0x125/0x330 [ 17.516876] ? i2c_smbus_xfer+0x125/0x330 [ 17.517329] ? i2cdev_ioctl_smbus+0x1c1/0x2b0 [ 17.517824] ? i2cdev_ioctl+0x75/0x1c0 [ 17.518248] ? do_vfs_ioctl+0x9f/0x600 [ 17.518671] ? vfs_write+0x144/0x190 [ 17.519078] ? SyS_ioctl+0x74/0x80 [ 17.519463] ? entry_SYSCALL_64_fastpath+0x1e/0xad [ 17.519959] ---[ end trace d047c04982f5ac50 ]--- Signed-off-by: Sebastian Reichel Reviewed-by: Greg Kroah-Hartman Acked-by: Till Harbaum Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-tiny-usb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index 0ed77eeff31e..a2e3dd715380 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; + void *dmadata = kmalloc(len, GFP_KERNEL); + int ret; + + if (!dmadata) + return -ENOMEM; /* do control transfer */ - return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), + ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | - USB_DIR_IN, value, index, data, len, 2000); + USB_DIR_IN, value, index, dmadata, len, 2000); + + memcpy(data, dmadata, len); + kfree(dmadata); + return ret; } static int usb_write(struct i2c_adapter *adapter, int cmd, int value, int index, void *data, int len) { struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data; + void *dmadata = kmemdup(data, len, GFP_KERNEL); + int ret; + + if (!dmadata) + return -ENOMEM; /* do control transfer */ - return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), + ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE, - value, index, data, len, 2000); + value, index, dmadata, len, 2000); + + kfree(dmadata); + return ret; } static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev) From c0fd730b678decdc060782db3736b5467b644434 Mon Sep 17 00:00:00 2001 From: Srinath Mannam Date: Thu, 18 May 2017 22:27:40 +0530 Subject: [PATCH 27/52] mmc: sdhci-iproc: suppress spurious interrupt with Multiblock read commit f5f968f2371ccdebb8a365487649673c9af68d09 upstream. The stingray SDHCI hardware supports ACMD12 and automatically issues after multi block transfer completed. If ACMD12 in SDHCI is disabled, spurious tx done interrupts are seen on multi block read command with below error message: Got data interrupt 0x00000002 even though no data operation was in progress. This patch uses SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 to enable ACM12 support in SDHCI hardware and suppress spurious interrupt. Signed-off-by: Srinath Mannam Reviewed-by: Ray Jui Reviewed-by: Scott Branden Acked-by: Adrian Hunter Fixes: b580c52d58d9 ("mmc: sdhci-iproc: add IPROC SDHCI driver") Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-iproc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 3b423b0ad8e7..f280744578e4 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -156,7 +156,8 @@ static const struct sdhci_ops sdhci_iproc_ops = { }; static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = { - .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN, .ops = &sdhci_iproc_ops, }; From 58b7cb10f6e2ed0293401e2a8137e046fc71efa9 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 25 Apr 2017 11:29:56 -0700 Subject: [PATCH 28/52] HID: wacom: Have wacom_tpc_irq guard against possible NULL dereference commit 2ac97f0f6654da14312d125005c77a6010e0ea38 upstream. The following Smatch complaint was generated in response to commit 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device"): drivers/hid/wacom_wac.c:1586 wacom_tpc_irq() error: we previously assumed 'wacom->touch_input' could be null (see line 1577) The 'touch_input' and 'pen_input' variables point to the 'struct input_dev' used for relaying touch and pen events to userspace, respectively. If a device does not have a touch interface or pen interface, the associated input variable is NULL. The 'wacom_tpc_irq()' function is responsible for forwarding input reports to a more-specific IRQ handler function. An unknown report could theoretically be mistaken as e.g. a touch report on a device which does not have a touch interface. This can be prevented by only calling the pen/touch functions are called when the pen/touch pointers are valid. Fixes: 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device") Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 35e3fd9fadf6..b62c50d1b1e4 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1440,37 +1440,38 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len) { unsigned char *data = wacom->data; - if (wacom->pen_input) + if (wacom->pen_input) { dev_dbg(wacom->pen_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - else if (wacom->touch_input) + + if (len == WACOM_PKGLEN_PENABLED || + data[0] == WACOM_REPORT_PENABLED) + return wacom_tpc_pen(wacom); + } + else if (wacom->touch_input) { dev_dbg(wacom->touch_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - switch (len) { - case WACOM_PKGLEN_TPC1FG: - return wacom_tpc_single_touch(wacom, len); - - case WACOM_PKGLEN_TPC2FG: - return wacom_tpc_mt_touch(wacom); - - case WACOM_PKGLEN_PENABLED: - return wacom_tpc_pen(wacom); - - default: - switch (data[0]) { - case WACOM_REPORT_TPC1FG: - case WACOM_REPORT_TPCHID: - case WACOM_REPORT_TPCST: - case WACOM_REPORT_TPC1FGE: + switch (len) { + case WACOM_PKGLEN_TPC1FG: return wacom_tpc_single_touch(wacom, len); - case WACOM_REPORT_TPCMT: - case WACOM_REPORT_TPCMT2: - return wacom_mt_touch(wacom); + case WACOM_PKGLEN_TPC2FG: + return wacom_tpc_mt_touch(wacom); - case WACOM_REPORT_PENABLED: - return wacom_tpc_pen(wacom); + default: + switch (data[0]) { + case WACOM_REPORT_TPC1FG: + case WACOM_REPORT_TPCHID: + case WACOM_REPORT_TPCST: + case WACOM_REPORT_TPC1FGE: + return wacom_tpc_single_touch(wacom, len); + + case WACOM_REPORT_TPCMT: + case WACOM_REPORT_TPCMT2: + return wacom_mt_touch(wacom); + + } } } From 3529600b16018a1f831f54c32a2d7b2baa0ad036 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 26 Jan 2017 16:37:01 -0200 Subject: [PATCH 29/52] scsi: mpt3sas: Force request partial completion alignment commit f2e767bb5d6ee0d988cb7d4e54b0b21175802b6b upstream. The firmware or device, possibly under a heavy I/O load, can return on a partial unaligned boundary. Scsi-ml expects these requests to be completed on an alignment boundary. Scsi-ml blindly requeues the I/O without checking the alignment boundary of the I/O request for the remaining bytes. This leads to errors, since devices cannot perform non-aligned read/write operations. This patch fixes the issue in the driver. It aligns unaligned completions of FS requests, by truncating them to the nearest alignment boundary. [mkp: simplified if statement] Reported-by: Mauricio Faria De Oliveira Signed-off-by: Guilherme G. Piccoli Signed-off-by: Ram Pai Acked-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8a5fbdb45cfd..e333029e4b6c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4452,6 +4452,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) struct MPT3SAS_DEVICE *sas_device_priv_data; u32 response_code = 0; unsigned long flags; + unsigned int sector_sz; mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply); scmd = _scsih_scsi_lookup_get_clear(ioc, smid); @@ -4510,6 +4511,20 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } xfer_cnt = le32_to_cpu(mpi_reply->TransferCount); + + /* In case of bogus fw or device, we could end up having + * unaligned partial completion. We can force alignment here, + * then scsi-ml does not need to handle this misbehavior. + */ + sector_sz = scmd->device->sector_size; + if (unlikely(scmd->request->cmd_type == REQ_TYPE_FS && sector_sz && + xfer_cnt % sector_sz)) { + sdev_printk(KERN_INFO, scmd->device, + "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", + xfer_cnt, sector_sz); + xfer_cnt = round_down(xfer_cnt, sector_sz); + } + scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt); if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) log_info = le32_to_cpu(mpi_reply->IOCLogInfo); From 15de2e4c90b7f038240077b273dfc3377ceabc13 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 May 2017 13:14:14 -0400 Subject: [PATCH 30/52] drm/radeon/ci: disable mclk switching for high refresh rates (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 58d7e3e427db1bd68f33025519a9468140280a75 upstream. Even if the vblank period would allow it, it still seems to be problematic on some cards. v2: fix logic inversion (Nils) bug: https://bugs.freedesktop.org/show_bug.cgi?id=96868 Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ci_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 4a09947be244..3c32f095a873 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev) u32 vblank_time = r600_dpm_get_vblank_time(rdev); u32 switch_limit = pi->mem_gddr5 ? 450 : 300; + /* disable mclk switching if the refresh is >120Hz, even if the + * blanking period would allow it + */ + if (r600_dpm_get_vrefresh(rdev) > 120) + return true; + if (vblank_time < switch_limit) return true; else From 69877793e23da174ed6ac8ffda83809a60f059ac Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 11 May 2017 19:31:12 -0400 Subject: [PATCH 31/52] drm/radeon: Unbreak HPD handling for r600+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d18e33735a02b1a90aecf14410bf3edbfd4d3dc upstream. We end up reading the interrupt register for HPD5, and then writing it to HPD6 which on systems without anything using HPD5 results in permanently disabling hotplug on one of the display outputs after the first time we acknowledge a hotplug interrupt from the GPU. This code is really bad. But for now, let's just fix this. I will hopefully have a large patch series to refactor all of this soon. Reviewed-by: Christian König Signed-off-by: Lyude Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/evergreen.c | 4 ++-- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/si.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index f81fb2641097..134874cab4c7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7762,7 +7762,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -7792,7 +7792,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 32491355a1d4..ba9e6ed4ae54 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4924,7 +4924,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -4955,7 +4955,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cc2fdf0be37a..0e20c08f8977 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3945,7 +3945,7 @@ static void r600_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f878d6962da5..5cf3a2cbc07e 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6335,7 +6335,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } @@ -6366,7 +6366,7 @@ static inline void si_irq_ack(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, tmp); } if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - tmp = RREG32(DC_HPD5_INT_CONTROL); + tmp = RREG32(DC_HPD6_INT_CONTROL); tmp |= DC_HPDx_RX_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); } From 85ddc41a6c4ad78eab245f9c0d64090621da1392 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 2 Jun 2017 14:46:28 -0700 Subject: [PATCH 32/52] pcmcia: remove left-over %Z format commit ff5a20169b98d84ad8d7f99f27c5ebbb008204d6 upstream. Commit 5b5e0928f742 ("lib/vsprintf.c: remove %Z support") removed some usages of format %Z but forgot "%.2Zx". This makes clang 4.0 reports a -Wformat-extra-args warning because it does not know about %Z. Replace %Z with %z. Link: http://lkml.kernel.org/r/20170520090946.22562-1-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss Cc: Harald Welte Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/char/pcmcia/cm4040_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index fc061f7c2bd1..a7de8ae185a5 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_START, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; @@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, for (i = 0; i < bytes_to_write; i++) { rc = wait_for_bulk_out_ready(dev); if (rc <= 0) { - DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n", + DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) @@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; From 023a8b0925bef16bf36d1740f41f6fa74167b0ef Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 22 May 2017 20:58:11 +0300 Subject: [PATCH 33/52] ALSA: hda - apply STAC_9200_DELL_M22 quirk for Dell Latitude D430 commit 1fc2e41f7af4572b07190f9dec28396b418e9a36 upstream. This model is actually called 92XXM2-8 in Windows driver. But since pin configs for M22 and M28 are identical, just reuse M22 quirk. Fixes external microphone (tested) and probably docking station ports (not tested). Signed-off-by: Alexander Tsoy Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 37b70f8e878f..0abab7926dca 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1537,6 +1537,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = { "Dell Inspiron 1501", STAC_9200_DELL_M26), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6, "unknown Dell", STAC_9200_DELL_M26), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201, + "Dell Latitude D430", STAC_9200_DELL_M22), /* Panasonic */ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC), /* Gateway machines needs EAPD to be set on resume */ From 14bfe118dd7d2ee8a00dcf0b880889922b365563 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Jun 2017 14:46:25 -0700 Subject: [PATCH 34/52] slub/memcg: cure the brainless abuse of sysfs attributes commit 478fe3037b2278d276d4cd9cd0ab06c4cb2e9b32 upstream. memcg_propagate_slab_attrs() abuses the sysfs attribute file functions to propagate settings from the root kmem_cache to a newly created kmem_cache. It does that with: attr->show(root, buf); attr->store(new, buf, strlen(bug); Aside of being a lazy and absurd hackery this is broken because it does not check the return value of the show() function. Some of the show() functions return 0 w/o touching the buffer. That means in such a case the store function is called with the stale content of the previous show(). That causes nonsense like invoking kmem_cache_shrink() on a newly created kmem_cache. In the worst case it would cause handing in an uninitialized buffer. This should be rewritten proper by adding a propagate() callback to those slub_attributes which must be propagated and avoid that insane conversion to and from ASCII, but that's too large for a hot fix. Check at least the return value of the show() function, so calling store() with stale content is prevented. Steven said: "It can cause a deadlock with get_online_cpus() that has been uncovered by recent cpu hotplug and lockdep changes that Thomas and Peter have been doing. Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(slab_mutex); lock(cpu_hotplug.lock); lock(slab_mutex); *** DEADLOCK ***" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1705201244540.2255@nanos Signed-off-by: Thomas Gleixner Reported-by: Steven Rostedt Acked-by: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 65d5f92d51d2..4cf3a9c768b1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5261,6 +5261,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) char mbuf[64]; char *buf; struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); + ssize_t len; if (!attr || !attr->store || !attr->show) continue; @@ -5285,8 +5286,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(root_cache, buf); - attr->store(s, buf, strlen(buf)); + len = attr->show(root_cache, buf); + if (len > 0) + attr->store(s, buf, len); } if (buffer) From 4e4b72c0ee3d549e62d92d4ace69ddccd9a7136b Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 18 Apr 2017 13:43:32 +0200 Subject: [PATCH 35/52] drm/gma500/psb: Actually use VBT mode when it is found commit 82bc9a42cf854fdf63155759c0aa790bd1f361b0 upstream. With LVDS we were incorrectly picking the pre-programmed mode instead of the prefered mode provided by VBT. Make sure we pick the VBT mode if one is provided. It is likely that the mode read-out code is still wrong but this patch fixes the immediate problem on most machines. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78562 Signed-off-by: Patrik Jakobsson Link: http://patchwork.freedesktop.org/patch/msgid/20170418114332.12183-1-patrik.r.jakobsson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_intel_lvds.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index ce0645d0c1e5..61e3a097a478 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -783,20 +783,23 @@ void psb_intel_lvds_init(struct drm_device *dev, if (scan->type & DRM_MODE_TYPE_PREFERRED) { mode_dev->panel_fixed_mode = drm_mode_duplicate(dev, scan); + DRM_DEBUG_KMS("Using mode from DDC\n"); goto out; /* FIXME: check for quirks */ } } /* Failed to get EDID, what about VBT? do we need this? */ - if (mode_dev->vbt_mode) + if (dev_priv->lfp_lvds_vbt_mode) { mode_dev->panel_fixed_mode = - drm_mode_duplicate(dev, mode_dev->vbt_mode); + drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode); - if (!mode_dev->panel_fixed_mode) - if (dev_priv->lfp_lvds_vbt_mode) - mode_dev->panel_fixed_mode = - drm_mode_duplicate(dev, - dev_priv->lfp_lvds_vbt_mode); + if (mode_dev->panel_fixed_mode) { + mode_dev->panel_fixed_mode->type |= + DRM_MODE_TYPE_PREFERRED; + DRM_DEBUG_KMS("Using mode from VBT\n"); + goto out; + } + } /* * If we didn't get EDID, try checking if the panel is already turned @@ -813,6 +816,7 @@ void psb_intel_lvds_init(struct drm_device *dev, if (mode_dev->panel_fixed_mode) { mode_dev->panel_fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; + DRM_DEBUG_KMS("Using pre-programmed mode\n"); goto out; /* FIXME: check for quirks */ } } From 7e13bab109eafbd2ce239205b41f38011ec77285 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 2 Jun 2017 14:46:40 -0700 Subject: [PATCH 36/52] mm/migrate: fix refcount handling when !hugepage_migration_supported() commit 30809f559a0d348c2dfd7ab05e9a451e2384962e upstream. On failing to migrate a page, soft_offline_huge_page() performs the necessary update to the hugepage ref-count. But when !hugepage_migration_supported() , unmap_and_move_hugepage() also decrements the page ref-count for the hugepage. The combined behaviour leaves the ref-count in an inconsistent state. This leads to soft lockups when running the overcommitted hugepage test from mce-tests suite. Soft offlining pfn 0x83ed600 at process virtual address 0x400000000000 soft offline: 0x83ed600: migration failed 1, type 1fffc00000008008 (uptodate|head) INFO: rcu_preempt detected stalls on CPUs/tasks: Tasks blocked on level-0 rcu_node (CPUs 0-7): P2715 (detected by 7, t=5254 jiffies, g=963, c=962, q=321) thugetlb_overco R running task 0 2715 2685 0x00000008 Call trace: dump_backtrace+0x0/0x268 show_stack+0x24/0x30 sched_show_task+0x134/0x180 rcu_print_detail_task_stall_rnp+0x54/0x7c rcu_check_callbacks+0xa74/0xb08 update_process_times+0x34/0x60 tick_sched_handle.isra.7+0x38/0x70 tick_sched_timer+0x4c/0x98 __hrtimer_run_queues+0xc0/0x300 hrtimer_interrupt+0xac/0x228 arch_timer_handler_phys+0x3c/0x50 handle_percpu_devid_irq+0x8c/0x290 generic_handle_irq+0x34/0x50 __handle_domain_irq+0x68/0xc0 gic_handle_irq+0x5c/0xb0 Address this by changing the putback_active_hugepage() in soft_offline_huge_page() to putback_movable_pages(). This only triggers on systems that enable memory failure handling (ARCH_SUPPORTS_MEMORY_FAILURE) but not hugepage migration (!ARCH_ENABLE_HUGEPAGE_MIGRATION). I imagine this wasn't triggered as there aren't many systems running this configuration. [akpm@linux-foundation.org: remove dead comment, per Naoya] Link: http://lkml.kernel.org/r/20170525135146.32011-1-punit.agrawal@arm.com Reported-by: Manoj Iyer Tested-by: Manoj Iyer Suggested-by: Naoya Horiguchi Signed-off-by: Punit Agrawal Cc: Joonsoo Kim Cc: Wanpeng Li Cc: Christoph Lameter Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 750b7893ee3a..43aee7ab143e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1619,12 +1619,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); - /* - * We know that soft_offline_huge_page() tries to migrate - * only one hugepage pointed to by hpage, so we need not - * run through the pagelist here. - */ - putback_active_hugepage(hpage); + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); if (ret > 0) ret = -EIO; } else { From 03489bfc78304a0be057ec827a67c0d87dd97b2e Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 2 Jun 2017 14:46:43 -0700 Subject: [PATCH 37/52] mlock: fix mlock count can not decrease in race condition commit 70feee0e1ef331b22cc51f383d532a0d043fbdcc upstream. Kefeng reported that when running the follow test, the mlock count in meminfo will increase permanently: [1] testcase linux:~ # cat test_mlockal grep Mlocked /proc/meminfo for j in `seq 0 10` do for i in `seq 4 15` do ./p_mlockall >> log & done sleep 0.2 done # wait some time to let mlock counter decrease and 5s may not enough sleep 5 grep Mlocked /proc/meminfo linux:~ # cat p_mlockall.c #include #include #include #define SPACE_LEN 4096 int main(int argc, char ** argv) { int ret; void *adr = malloc(SPACE_LEN); if (!adr) return -1; ret = mlockall(MCL_CURRENT | MCL_FUTURE); printf("mlcokall ret = %d\n", ret); ret = munlockall(); printf("munlcokall ret = %d\n", ret); free(adr); return 0; } In __munlock_pagevec() we should decrement NR_MLOCK for each page where we clear the PageMlocked flag. Commit 1ebb7cc6a583 ("mm: munlock: batch NR_MLOCK zone state updates") has introduced a bug where we don't decrement NR_MLOCK for pages where we clear the flag, but fail to isolate them from the lru list (e.g. when the pages are on some other cpu's percpu pagevec). Since PageMlocked stays cleared, the NR_MLOCK accounting gets permanently disrupted by this. Fix it by counting the number of page whose PageMlock flag is cleared. Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates") Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Reported-by: Kefeng Wang Tested-by: Kefeng Wang Cc: Vlastimil Babka Cc: Joern Engel Cc: Mel Gorman Cc: Michel Lespinasse Cc: Hugh Dickins Cc: Rik van Riel Cc: Johannes Weiner Cc: Michal Hocko Cc: Xishi Qiu Cc: zhongjiang Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mlock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index d6006b146fea..9d2e773f3a95 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -277,7 +277,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked; + int delta_munlocked = -nr; struct pagevec pvec_putback; int pgrescued = 0; @@ -297,6 +297,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) continue; else __munlock_isolation_failed(page); + } else { + delta_munlocked++; } /* @@ -308,7 +310,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; } - delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(&zone->lru_lock); From b9a7816997a38e4c8643f86757cbc4023f285c51 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 May 2017 16:36:22 -0700 Subject: [PATCH 38/52] xfs: Fix missed holes in SEEK_HOLE implementation commit 5375023ae1266553a7baa0845e82917d8803f48c upstream. XFS SEEK_HOLE implementation could miss a hole in an unwritten extent as can be seen by the following command: xfs_io -c "falloc 0 256k" -c "pwrite 0 56k" -c "pwrite 128k 8k" -c "seek -h 0" file wrote 57344/57344 bytes at offset 0 56 KiB, 14 ops; 0.0000 sec (49.312 MiB/sec and 12623.9856 ops/sec) wrote 8192/8192 bytes at offset 131072 8 KiB, 2 ops; 0.0000 sec (70.383 MiB/sec and 18018.0180 ops/sec) Whence Result HOLE 139264 Where we can see that hole at offset 56k was just ignored by SEEK_HOLE implementation. The bug is in xfs_find_get_desired_pgoff() which does not properly detect the case when pages are not contiguous. Fix the problem by properly detecting when found page has larger offset than expected. Fixes: d126d43f631f996daeee5006714fed914be32368 Signed-off-by: Jan Kara Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f5392ab2def1..999761a914c2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1235,17 +1235,6 @@ xfs_find_get_desired_pgoff( break; } - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; - break; - } - for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; loff_t b_offset; @@ -1257,18 +1246,18 @@ xfs_find_get_desired_pgoff( * file mapping. However, page->index will not change * because we have a reference on the page. * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. + * If current page offset is beyond where we've ended, + * we've found a hole. */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } + if (type == HOLE_OFF && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { + found = true; + *offset = lastoff; goto out; } + /* Searching done if the page index is out of range. */ + if (page->index > end) + goto out; lock_page(page); /* From fe705621b9b43dcc1c2acd0f1c3af66ddeb9f617 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 23 May 2017 08:30:46 -0700 Subject: [PATCH 39/52] xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff() commit 8affebe16d79ebefb1d9d6d56a46dc89716f9453 upstream. xfs_find_get_desired_pgoff() is used to search for offset of hole or data in page range [index, end] (both inclusive), and the max number of pages to search should be at least one, if end == index. Otherwise the only page is missed and no hole or data is found, which is not correct. When block size is smaller than page size, this can be demonstrated by preallocating a file with size smaller than page size and writing data to the last block. E.g. run this xfs_io command on a 1k block size XFS on x86_64 host. # xfs_io -fc "falloc 0 3k" -c "pwrite 2k 1k" \ -c "seek -d 0" /mnt/xfs/testfile wrote 1024/1024 bytes at offset 2048 1 KiB, 1 ops; 0.0000 sec (33.675 MiB/sec and 34482.7586 ops/sec) Whence Result DATA EOF Data at offset 2k was missed, and lseek(2) returned ENXIO. This is uncovered by generic/285 subtest 07 and 08 on ppc64 host, where pagesize is 64k. Because a recent change to generic/285 reduced the preallocated file size to smaller than 64k. Signed-off-by: Eryu Guan Reviewed-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 999761a914c2..ceea444dafb4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1208,7 +1208,7 @@ xfs_find_get_desired_pgoff( unsigned nr_pages; unsigned int i; - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); /* From 0ace12c11401b813a60a2d8b3b95aee183312dde Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Apr 2017 15:17:57 -0700 Subject: [PATCH 40/52] xfs: fix over-copying of getbmap parameters from userspace commit be6324c00c4d1e0e665f03ed1fc18863a88da119 upstream. In xfs_ioc_getbmap, we should only copy the fields of struct getbmap from userspace, or else we end up copying random stack contents into the kernel. struct getbmap is a strict subset of getbmapx, so a partial structure copy should work fine. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d42738deec6d..08479daa6781 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1379,10 +1379,11 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmapx bmx; + struct getbmapx bmx = { 0 }; int error; - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + /* struct getbmap is a strict subset of struct getbmapx. */ + if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags))) return -EFAULT; if (bmx.bmv_count < 2) From 8caa9a54b32b950e82a9cbbeb83e1a73b2828b07 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 13 Apr 2017 15:15:47 -0700 Subject: [PATCH 41/52] xfs: handle array index overrun in xfs_dir2_leaf_readbuf() commit 023cc840b40fad95c6fe26fff1d380a8c9d45939 upstream. Carlos had a case where "find" seemed to start spinning forever and never return. This was on a filesystem with non-default multi-fsb (8k) directory blocks, and a fragmented directory with extents like this: 0:[0,133646,2,0] 1:[2,195888,1,0] 2:[3,195890,1,0] 3:[4,195892,1,0] 4:[5,195894,1,0] 5:[6,195896,1,0] 6:[7,195898,1,0] 7:[8,195900,1,0] 8:[9,195902,1,0] 9:[10,195908,1,0] 10:[11,195910,1,0] 11:[12,195912,1,0] 12:[13,195914,1,0] ... i.e. the first extent is a contiguous 2-fsb dir block, but after that it is fragmented into 1 block extents. At the top of the readdir path, we allocate a mapping array which (for this filesystem geometry) can hold 10 extents; see the assignment to map_info->map_size. During readdir, we are therefore able to map extents 0 through 9 above into the array for readahead purposes. If we count by 2, we see that the last mapped index (9) is the first block of a 2-fsb directory block. At the end of xfs_dir2_leaf_readbuf() we have 2 loops to fill more readahead; the outer loop assumes one full dir block is processed each loop iteration, and an inner loop that ensures that this is so by advancing to the next extent until a full directory block is mapped. The problem is that this inner loop may step past the last extent in the mapping array as it tries to reach the end of the directory block. This will read garbage for the extent length, and as a result the loop control variable 'j' may become corrupted and never fail the loop conditional. The number of valid mappings we have in our array is stored in map->map_valid, so stop this inner loop based on that limit. There is an ASSERT at the top of the outer loop for this same condition, but we never made it out of the inner loop, so the ASSERT never fired. Huge appreciation for Carlos for debugging and isolating the problem. Debugged-and-analyzed-by: Carlos Maiolino Signed-off-by: Eric Sandeen Tested-by: Carlos Maiolino Reviewed-by: Carlos Maiolino Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dir2_readdir.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 642d55d10075..786e35b636f1 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -406,6 +406,7 @@ xfs_dir2_leaf_readbuf( /* * Do we need more readahead? + * Each loop tries to process 1 full dir blk; last may be partial. */ blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; @@ -437,9 +438,14 @@ xfs_dir2_leaf_readbuf( } /* - * Advance offset through the mapping table. + * Advance offset through the mapping table, processing a full + * dir block even if it is fragmented into several extents. + * But stop if we have consumed all valid mappings, even if + * it's not yet a full directory block. */ - for (j = 0; j < geo->fsbcount; j += length ) { + for (j = 0; + j < geo->fsbcount && mip->ra_index < mip->map_valid; + j += length ) { /* * The rest of this extent but not more than a dir * block. From a76647a71c8e7287271bcaa2768e45a1bb107f8d Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 20 Apr 2017 08:06:47 -0700 Subject: [PATCH 42/52] xfs: prevent multi-fsb dir readahead from reading random blocks commit cb52ee334a45ae6c78a3999e4b473c43ddc528f4 upstream. Directory block readahead uses a complex iteration mechanism to map between high-level directory blocks and underlying physical extents. This mechanism attempts to traverse the higher-level dir blocks in a manner that handles multi-fsb directory blocks and simultaneously maintains a reference to the corresponding physical blocks. This logic doesn't handle certain (discontiguous) physical extent layouts correctly with multi-fsb directory blocks. For example, consider the case of a 4k FSB filesystem with a 2 FSB (8k) directory block size and a directory with the following extent layout: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL 0: [0..7]: 88..95 0 (88..95) 8 1: [8..15]: 80..87 0 (80..87) 8 2: [16..39]: 168..191 0 (168..191) 24 3: [40..63]: 5242952..5242975 1 (72..95) 24 Directory block 0 spans physical extents 0 and 1, dirblk 1 lies entirely within extent 2 and dirblk 2 spans extents 2 and 3. Because extent 2 is larger than the directory block size, the readahead code erroneously assumes the block is contiguous and issues a readahead based on the physical mapping of the first fsb of the dirblk. This results in read verifier failure and a spurious corruption or crc failure, depending on the filesystem format. Further, the subsequent readahead code responsible for walking through the physical table doesn't correctly advance the physical block reference for dirblk 2. Instead of advancing two physical filesystem blocks, the first iteration of the loop advances 1 block (correctly), but the subsequent iteration advances 2 more physical blocks because the next physical extent (extent 3, above) happens to cover more than dirblk 2. At this point, the higher-level directory block walking is completely off the rails of the actual physical layout of the directory for the respective mapping table. Update the contiguous dirblock logic to consider the current offset in the physical extent to avoid issuing directory readahead to unrelated blocks. Also, update the mapping table advancing code to consider the current offset within the current dirblock to avoid advancing the mapping reference too far beyond the dirblock. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dir2_readdir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 786e35b636f1..2fbf643fa10a 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -417,7 +417,8 @@ xfs_dir2_leaf_readbuf( * Read-ahead a contiguous directory block. */ if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= geo->fsbcount) { + (map[mip->ra_index].br_blockcount - mip->ra_offset) >= + geo->fsbcount) { xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, XFS_FSB_TO_DADDR(dp->i_mount, @@ -450,7 +451,7 @@ xfs_dir2_leaf_readbuf( * The rest of this extent but not more than a dir * block. */ - length = min_t(int, geo->fsbcount, + length = min_t(int, geo->fsbcount - j, map[mip->ra_index].br_blockcount - mip->ra_offset); mip->ra_offset += length; From cf55c35974e177dd3d9e7084528c47fe0dab2422 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 21 Apr 2017 12:40:44 -0700 Subject: [PATCH 43/52] xfs: fix up quotacheck buffer list error handling commit 20e8a063786050083fe05b4f45be338c60b49126 upstream. The quotacheck error handling of the delwri buffer list assumes the resident buffers are locked and doesn't clear the _XBF_DELWRI_Q flag on the buffers that are dequeued. This can lead to assert failures on buffer release and possibly other locking problems. Move this code to a delwri queue cancel helper function to encapsulate the logic required to properly release buffers from a delwri queue. Update the helper to clear the delwri queue flag and call it from quotacheck. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 24 ++++++++++++++++++++++++ fs/xfs/xfs_buf.h | 1 + fs/xfs/xfs_qm.c | 7 +------ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8146b0cf20ce..dcb70969ff1c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -979,6 +979,8 @@ void xfs_buf_unlock( struct xfs_buf *bp) { + ASSERT(xfs_buf_islocked(bp)); + XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1712,6 +1714,28 @@ error: return NULL; } +/* + * Cancel a delayed write list. + * + * Remove each buffer from the list, clear the delwri queue flag and drop the + * associated buffer reference. + */ +void +xfs_buf_delwri_cancel( + struct list_head *list) +{ + struct xfs_buf *bp; + + while (!list_empty(list)) { + bp = list_first_entry(list, struct xfs_buf, b_list); + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + } +} + /* * Add a buffer to the delayed write list. * diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c75721acd867..149bbd451731 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -304,6 +304,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, extern void *xfs_buf_offset(struct xfs_buf *, size_t); /* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 532ab79d38fe..572b64a135b3 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1355,12 +1355,7 @@ xfs_qm_quotacheck( mp->m_qflags |= flags; error_return: - while (!list_empty(&buffer_list)) { - struct xfs_buf *bp = - list_first_entry(&buffer_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_buf_relse(bp); - } + xfs_buf_delwri_cancel(&buffer_list); if (error) { xfs_warn(mp, From 8e25af0dc5adac8ee297256558c3ca1c06f15578 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 26 Apr 2017 08:30:39 -0700 Subject: [PATCH 44/52] xfs: support ability to wait on new inodes commit 756baca27fff3ecaeab9dbc7a5ee35a1d7bc0c7f upstream. Inodes that are inserted into the perag tree but still under construction are flagged with the XFS_INEW bit. Most contexts either skip such inodes when they are encountered or have the ability to handle them. The runtime quotaoff sequence introduces a context that must wait for construction of such inodes to correctly ensure that all dquots in the fs are released. In anticipation of this, support the ability to wait on new inodes. Wake the appropriate bit when XFS_INEW is cleared. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 5 ++++- fs/xfs/xfs_inode.h | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index d7a490f24ead..88d3933e4a3e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -210,14 +210,17 @@ xfs_iget_cache_hit( error = inode_init_always(mp->m_super, inode); if (error) { + bool wake; /* * Re-initializing the inode failed, and we are in deep * trouble. Try to re-add it to the reclaim list. */ rcu_read_lock(); spin_lock(&ip->i_flags_lock); - + wake = !!__xfs_iflags_test(ip, XFS_INEW); ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + if (wake) + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ca9e11989cbd..ae1a49845744 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -208,7 +208,8 @@ xfs_get_initial_prid(struct xfs_inode *dp) #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ -#define XFS_INEW (1 << 3) /* inode has just been allocated */ +#define __XFS_INEW_BIT 3 /* inode has just been allocated */ +#define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -453,6 +454,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip) xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(VFS_I(ip)); + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); } static inline void xfs_setup_existing_inode(struct xfs_inode *ip) From 9d97d6a152655bc58bc7833fd45d912eb966f189 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 26 Apr 2017 08:30:39 -0700 Subject: [PATCH 45/52] xfs: update ag iterator to support wait on new inodes commit ae2c4ac2dd39b23a87ddb14ceddc3f2872c6aef5 upstream. The AG inode iterator currently skips new inodes as such inodes are inserted into the inode radix tree before they are fully constructed. Certain contexts require the ability to wait on the construction of new inodes, however. The fs-wide dquot release from the quotaoff sequence is an example of this. Update the AG inode iterator to support the ability to wait on inodes flagged with XFS_INEW upon request. Create a new xfs_inode_ag_iterator_flags() interface and support a set of iteration flags to modify the iteration behavior. When the XFS_AGITER_INEW_WAIT flag is set, include XFS_INEW flags in the radix tree inode lookup and wait on them before the callback is executed. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 53 ++++++++++++++++++++++++++++++++++++++------- fs/xfs/xfs_icache.h | 8 +++++++ 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 88d3933e4a3e..adbc1f59969a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -366,6 +366,22 @@ out_destroy: return error; } +static void +xfs_inew_wait( + struct xfs_inode *ip) +{ + wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT); + DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); + + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + if (!xfs_iflags_test(ip, XFS_INEW)) + break; + schedule(); + } while (true); + finish_wait(wq, &wait.wait); +} + /* * Look up an inode by number in the given file system. * The inode is looked up in the cache held in each AG. @@ -470,9 +486,11 @@ out_error_or_again: STATIC int xfs_inode_ag_walk_grab( - struct xfs_inode *ip) + struct xfs_inode *ip, + int flags) { struct inode *inode = VFS_I(ip); + bool newinos = !!(flags & XFS_AGITER_INEW_WAIT); ASSERT(rcu_read_lock_held()); @@ -490,7 +508,8 @@ xfs_inode_ag_walk_grab( goto out_unlock_noent; /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) || + __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)) goto out_unlock_noent; spin_unlock(&ip->i_flags_lock); @@ -518,7 +537,8 @@ xfs_inode_ag_walk( void *args), int flags, void *args, - int tag) + int tag, + int iter_flags) { uint32_t first_index; int last_error = 0; @@ -560,7 +580,7 @@ restart: for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; - if (done || xfs_inode_ag_walk_grab(ip)) + if (done || xfs_inode_ag_walk_grab(ip, iter_flags)) batch[i] = NULL; /* @@ -588,6 +608,9 @@ restart: for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; + if ((iter_flags & XFS_AGITER_INEW_WAIT) && + xfs_iflags_test(batch[i], XFS_INEW)) + xfs_inew_wait(batch[i]); error = execute(batch[i], flags, args); IRELE(batch[i]); if (error == -EAGAIN) { @@ -640,12 +663,13 @@ xfs_eofblocks_worker( } int -xfs_inode_ag_iterator( +xfs_inode_ag_iterator_flags( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, - void *args) + void *args, + int iter_flags) { struct xfs_perag *pag; int error = 0; @@ -655,7 +679,8 @@ xfs_inode_ag_iterator( ag = 0; while ((pag = xfs_perag_get(mp, ag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, + iter_flags); xfs_perag_put(pag); if (error) { last_error = error; @@ -666,6 +691,17 @@ xfs_inode_ag_iterator( return last_error; } +int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int flags, + void *args) +{ + return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0); +} + int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, @@ -683,7 +719,8 @@ xfs_inode_ag_iterator_tag( ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, tag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, + 0); xfs_perag_put(pag); if (error) { last_error = error; diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 62f1f91c32cb..147a79212e63 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -48,6 +48,11 @@ struct xfs_eofblocks { #define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_DONTCACHE 0x4 +/* + * flags for AG inode iterator + */ +#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */ + int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp); @@ -72,6 +77,9 @@ void xfs_eofblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); +int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, void *args), + int flags, void *args, int iter_flags); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); From 1d41dd5c1fd6dca4f7fa5980de2e2194e4f6d4d3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 26 Apr 2017 08:30:40 -0700 Subject: [PATCH 46/52] xfs: wait on new inodes during quotaoff dquot release commit e20c8a517f259cb4d258e10b0cd5d4b30d4167a0 upstream. The quotaoff operation has a race with inode allocation that results in a livelock. An inode allocation that occurs before the quota status flags are updated acquires the appropriate dquots for the inode via xfs_qm_vop_dqalloc(). It then inserts the XFS_INEW inode into the perag radix tree, sometime later attaches the dquots to the inode and finally clears the XFS_INEW flag. Quotaoff expects to release the dquots from all inodes in the filesystem via xfs_qm_dqrele_all_inodes(). This invokes the AG inode iterator, which skips inodes in the XFS_INEW state because they are not fully constructed. If the scan occurs after dquots have been attached to an inode, but before XFS_INEW is cleared, the newly allocated inode will continue to hold a reference to the applicable dquots. When quotaoff invokes xfs_qm_dqpurge_all(), the reference count of those dquot(s) remain elevated and the dqpurge scan spins indefinitely. To address this problem, update the xfs_qm_dqrele_all_inodes() scan to wait on inodes marked on the XFS_INEW state. We wait on the inodes explicitly rather than skip and retry to avoid continuous retry loops due to a parallel inode allocation workload. Since quotaoff updates the quota state flags and uses a synchronous transaction before the dqrele scan, and dquots are attached to inodes after radix tree insertion iff quota is enabled, one INEW waiting pass through the AG guarantees that the scan has processed all inodes that could possibly hold dquot references. Reported-by: Eryu Guan Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_qm_syscalls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 3640c6e896af..4d334440bd94 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -764,5 +764,6 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); + xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL, + XFS_AGITER_INEW_WAIT); } From 3ba13d7f5b2bad97a2df48f9bb6e4d52c244a979 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 12 May 2017 10:44:08 -0700 Subject: [PATCH 47/52] xfs: fix indlen accounting error on partial delalloc conversion commit 0daaecacb83bc6b656a56393ab77a31c28139bc7 upstream. The delalloc -> real block conversion path uses an incorrect calculation in the case where the middle part of a delalloc extent is being converted. This is documented as a rare situation because XFS generally attempts to maximize contiguity by converting as much of a delalloc extent as possible. If this situation does occur, the indlen reservation for the two new delalloc extents left behind by the conversion of the middle range is calculated and compared with the original reservation. If more blocks are required, the delta is allocated from the global block pool. This delta value can be characterized as the difference between the new total requirement (temp + temp2) and the currently available reservation minus those blocks that have already been allocated (startblockval(PREV.br_startblock) - allocated). The problem is that the current code does not account for previously allocated blocks correctly. It subtracts the current allocation count from the (new - old) delta rather than the old indlen reservation. This means that more indlen blocks than have been allocated end up stashed in the remaining extents and free space accounting is broken as a result. Fix up the calculation to subtract the allocated block count from the original extent indlen and thus correctly allocate the reservation delta based on the difference between the new total requirement and the unused blocks from the original reservation. Also remove a bogus assert that contradicts the fact that the new indlen reservation can be larger than the original indlen reservation. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 119c2422aac7..75884aecf920 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2179,8 +2179,10 @@ xfs_bmap_add_extent_delay_real( } temp = xfs_bmap_worst_indlen(bma->ip, temp); temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + diff = (int)(temp + temp2 - + (startblockval(PREV.br_startblock) - + (bma->cur ? + bma->cur->bc_private.b.allocated : 0))); if (diff > 0) { error = xfs_mod_fdblocks(bma->ip->i_mount, -((int64_t)diff), false); @@ -2232,7 +2234,6 @@ xfs_bmap_add_extent_delay_real( temp = da_new; if (bma->cur) temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); if (temp < da_old) xfs_mod_fdblocks(bma->ip->i_mount, (int64_t)(da_old - temp), false); From 9f7b5da0570fe627badd43941b8d8b50cfda5c8a Mon Sep 17 00:00:00 2001 From: Zorro Lang Date: Mon, 15 May 2017 08:40:02 -0700 Subject: [PATCH 48/52] xfs: bad assertion for delalloc an extent that start at i_size commit 892d2a5f705723b2cb488bfb38bcbdcf83273184 upstream. By run fsstress long enough time enough in RHEL-7, I find an assertion failure (harder to reproduce on linux-4.11, but problem is still there): XFS: Assertion failed: (iflags & BMV_IF_DELALLOC) != 0, file: fs/xfs/xfs_bmap_util.c The assertion is in xfs_getbmap() funciton: if (map[i].br_startblock == DELAYSTARTBLOCK && --> map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); When map[i].br_startoff == XFS_B_TO_FSB(mp, XFS_ISIZE(ip)), the startoff is just at EOF. But we only need to make sure delalloc extents that are within EOF, not include EOF. Signed-off-by: Zorro Lang Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 832764ee035a..863e1bff403b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -682,7 +682,7 @@ xfs_getbmap( * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) + map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && From c56605c69ba66087fafe609bb9f936501bc7162e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 22 May 2017 19:54:10 -0700 Subject: [PATCH 49/52] xfs: fix unaligned access in xfs_btree_visit_blocks commit a4d768e702de224cc85e0c8eac9311763403b368 upstream. This structure copy was throwing unaligned access warnings on sparc64: Kernel unaligned access at TPC[1043c088] xfs_btree_visit_blocks+0x88/0xe0 [xfs] xfs_btree_copy_ptrs does a memcpy, which avoids it. Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index af1bbee5586e..28bc5e78b110 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4064,7 +4064,7 @@ xfs_btree_change_owner( xfs_btree_readahead_ptr(cur, ptr, 1); /* save for the next iteration of the loop */ - lptr = *ptr; + xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); } /* for each buffer in the level */ From 1b03d85a4f37af5889d11c8a300423fbad45aea4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 10:58:53 +1000 Subject: [PATCH 50/52] xfs: in _attrlist_by_handle, copy the cursor back to userspace commit 0facef7fb053be4353c0a48c2f48c9dbee91cb19 upstream. When we're iterating inode xattrs by handle, we have to copy the cursor back to userspace so that a subsequent invocation actually retrieves subsequent contents. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Cc: Nikolay Borisov Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 08479daa6781..e4a4f82ea13f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -403,6 +403,7 @@ xfs_attrlist_by_handle( { int error = -ENOMEM; attrlist_cursor_kern_t *cursor; + struct xfs_fsop_attrlist_handlereq __user *p = arg; xfs_fsop_attrlist_handlereq_t al_hreq; struct dentry *dentry; char *kbuf; @@ -435,6 +436,11 @@ xfs_attrlist_by_handle( if (error) goto out_kfree; + if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { + error = -EFAULT; + goto out_kfree; + } + if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) error = -EFAULT; From 9d65be36a7cc0f69666b71f27874fc058d720417 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 6 Apr 2016 07:57:18 +1000 Subject: [PATCH 51/52] xfs: only return -errno or success from attr ->put_listent commit 2a6fba6d2311151598abaa1e7c9abd5f8d024a43 upstream. Today, the put_listent formatters return either 1 or 0; if they return 1, some callers treat this as an error and return it up the stack, despite "1" not being a valid (negative) error code. The intent seems to be that if the input buffer is full, we set seen_enough or set count = -1, and return 1; but some callers check the return before checking the seen_enough or count fields of the context. Fix this by only returning non-zero for actual errors encountered, and rely on the caller to first check the return value, then check the values in the context to decide what to do. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Nikolay Borisov Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr.h | 1 + fs/xfs/xfs_attr_list.c | 8 +++----- fs/xfs/xfs_xattr.c | 15 ++++++++++----- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index dd4824589470..234331227c0c 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -112,6 +112,7 @@ typedef struct attrlist_cursor_kern { *========================================================================*/ +/* Return 0 on success, or -errno; other state communicated via *context */ typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, unsigned char *, int, int, unsigned char *); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 4fa14820e2e2..c8be331a3196 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -108,16 +108,14 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) (int)sfe->namelen, (int)sfe->valuelen, &sfe->nameval[sfe->namelen]); - + if (error) + return error; /* * Either search callback finished early or * didn't fit it all in the buffer after all. */ if (context->seen_enough) break; - - if (error) - return error; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } trace_xfs_attr_list_sf_all(context); @@ -581,7 +579,7 @@ xfs_attr_put_listent( trace_xfs_attr_list_full(context); alist->al_more = 1; context->seen_enough = 1; - return 1; + return 0; } aep = (attrlist_ent_t *)&context->alist[context->firstu]; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 839b35ca21c6..e6dae28dfa1a 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -180,7 +180,7 @@ xfs_xattr_put_listent( arraytop = context->count + prefix_len + namelen + 1; if (arraytop > context->firstu) { context->count = -1; /* insufficient space */ - return 1; + return 0; } offset = (char *)context->alist + context->count; strncpy(offset, xfs_xattr_prefix(flags), prefix_len); @@ -222,12 +222,15 @@ list_one_attr(const char *name, const size_t len, void *data, } ssize_t -xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) +xfs_vn_listxattr( + struct dentry *dentry, + char *data, + size_t size) { struct xfs_attr_list_context context; struct attrlist_cursor_kern cursor = { 0 }; - struct inode *inode = d_inode(dentry); - int error; + struct inode *inode = d_inode(dentry); + int error; /* * First read the regular on-disk attributes. @@ -245,7 +248,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) else context.put_listent = xfs_xattr_put_listent_sizes; - xfs_attr_list_int(&context); + error = xfs_attr_list_int(&context); + if (error) + return error; if (context.count < 0) return -ERANGE; From 4bbbc769640554a216a25b2dbaa3d4d2869bbf79 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 Jun 2017 12:06:14 +0200 Subject: [PATCH 52/52] Linux 4.4.71 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a5ecb29c6ed3..ad91a79aed51 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 70 +SUBLEVEL = 71 EXTRAVERSION = NAME = Blurry Fish Butt