From 392bd47d7ddca7bffa9efa782a50c26b9c223f61 Mon Sep 17 00:00:00 2001 From: Dwight Engen Date: Thu, 15 Aug 2013 14:08:03 -0400 Subject: [PATCH 01/92] xfs: add capability check to free eofblocks ioctl commit 8c567a7fab6e086a0284eee2db82348521e7120c upstream. Check for CAP_SYS_ADMIN since the caller can truncate preallocated blocks from files they do not own nor have write access to. A more fine grained access check was considered: require the caller to specify their own uid/gid and to use inode_permission to check for write, but this would not catch the case of an inode not reachable via path traversal from the callers mount namespace. Add check for read-only filesystem to free eofblocks ioctl. Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Gao feng Signed-off-by: Dwight Engen Signed-off-by: Ben Myers Cc: Kees Cook Signed-off-by: Kamal Mostafa --- fs/xfs/xfs_ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c1c3ef88a260..2fe3eb3abf2e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1606,6 +1606,12 @@ xfs_file_ioctl( case XFS_IOC_FREE_EOFBLOCKS: { struct xfs_eofblocks eofb; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + if (copy_from_user(&eofb, arg, sizeof(eofb))) return -XFS_ERROR(EFAULT); From d5ed660fcf235fa72f7d3eb1584cd444983893c7 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Thu, 7 Nov 2013 21:49:04 +0000 Subject: [PATCH 02/92] staging: vt6656: [BUG] Fix for TX USB resets from vendors driver. commit 9df682927c2e3a92f43803d6b52095992e3b2ab8 upstream. This fixes resets on heavy TX data traffic. Vendor driver VT6656_Linux_src_v1.21.03_x86_11.04.zip http://www.viaembedded.com/servlet/downloadSvl?id=1890&download_file_id=14704 This is GPL-licensed code. original code BBbVT3184Init ... //2007-0725, RobertChang add, Enable Squelch detect reset option(SQ_RST_Opt), USB (register4, bit1) CONTROLnsRequestIn(pDevice, MESSAGE_TYPE_READ, (WORD)0x600+4, // USB's Reg4's bit1 MESSAGE_REQUEST_MEM, 1, (PBYTE) &byData); byData = byData|2 ; CONTROLnsRequestOut(pDevice, MESSAGE_TYPE_WRITE, (WORD)0x600+4, // USB's Reg4's bit1 MESSAGE_REQUEST_MEM, 1, (PBYTE) &byData); return TRUE;//ntStatus; .... A back port patch is needed for kernels less than 3.10. Signed-off-by: Malcolm Priestley Signed-off-by: Greg Kroah-Hartman [ Malcolm Priestley: backport to 3.8-stable ] Signed-off-by: Kamal Mostafa --- drivers/staging/vt6656/baseband.c | 11 +++++++++++ drivers/staging/vt6656/rndis.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/staging/vt6656/baseband.c b/drivers/staging/vt6656/baseband.c index 385501595b4d..65503b9ba4f7 100644 --- a/drivers/staging/vt6656/baseband.c +++ b/drivers/staging/vt6656/baseband.c @@ -976,6 +976,7 @@ BOOL BBbVT3184Init(PSDevice pDevice) PBYTE pbyAgc; WORD wLengthAgc; BYTE abyArray[256]; + u8 data; ntStatus = CONTROLnsRequestIn(pDevice, MESSAGE_TYPE_READ, @@ -1144,6 +1145,16 @@ else { ControlvWriteByte(pDevice,MESSAGE_REQUEST_BBREG,0x0D,0x01); RFbRFTableDownload(pDevice); + + /* Fix for TX USB resets from vendors driver */ + CONTROLnsRequestIn(pDevice, MESSAGE_TYPE_READ, USB_REG4, + MESSAGE_REQUEST_MEM, sizeof(data), &data); + + data |= 0x2; + + CONTROLnsRequestOut(pDevice, MESSAGE_TYPE_WRITE, USB_REG4, + MESSAGE_REQUEST_MEM, sizeof(data), &data); + return TRUE;//ntStatus; } diff --git a/drivers/staging/vt6656/rndis.h b/drivers/staging/vt6656/rndis.h index fccf7e98eb68..dcf7bf54fb3e 100644 --- a/drivers/staging/vt6656/rndis.h +++ b/drivers/staging/vt6656/rndis.h @@ -69,6 +69,7 @@ #define VIAUSB20_PACKET_HEADER 0x04 +#define USB_REG4 0x604 /*--------------------- Export Classes ----------------------------*/ From 5b156be59537bdac42d7e1f33fa97dd87b1f68ad Mon Sep 17 00:00:00 2001 From: Andreas Henriksson Date: Thu, 7 Nov 2013 18:26:38 +0100 Subject: [PATCH 03/92] net: Fix "ip rule delete table 256" [ Upstream commit 13eb2ab2d33c57ebddc57437a7d341995fc9138c ] When trying to delete a table >= 256 using iproute2 the local table will be deleted. The table id is specified as a netlink attribute when it needs more then 8 bits and iproute2 then sets the table field to RT_TABLE_UNSPEC (0). Preconditions to matching the table id in the rule delete code doesn't seem to take the "table id in netlink attribute" into condition so the frh_get_table helper function never gets to do its job when matching against current rule. Use the helper function twice instead of peaking at the table value directly. Originally reported at: http://bugs.debian.org/724783 Reported-by: Nicolas HICHER Signed-off-by: Andreas Henriksson Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/core/fib_rules.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba27dfe3..8c9b04bb8e96 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -445,7 +445,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && From c405a5ac91a8b183c30974ad43d175689ee41f23 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Fri, 8 Nov 2013 09:56:53 +0800 Subject: [PATCH 04/92] ipv6: use rt6_get_dflt_router to get default router in rt6_route_rcv [ Upstream commit f104a567e673f382b09542a8dc3500aa689957b4 ] As the rfc 4191 said, the Router Preference and Lifetime values in a ::/0 Route Information Option should override the preference and lifetime values in the Router Advertisement header. But when the kernel deals with a ::/0 Route Information Option, the rt6_get_route_info() always return NULL, that means that overriding will not happen, because those default routers were added without flag RTF_ROUTEINFO in rt6_add_dflt_router(). In order to deal with that condition, we should call rt6_get_dflt_router when the prefix length is 0. Signed-off-by: Duan Jiong Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv6/route.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c9eb8ebd6653..abf355627dc2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -699,8 +699,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + if (rinfo->prefix_len == 0) + rt = rt6_get_dflt_router(gwaddr, dev); + else + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, + gwaddr, dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); From f255513706358344ffff84bc28280ff9546e2307 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Nov 2013 12:20:32 +0100 Subject: [PATCH 05/92] random32: fix off-by-one in seeding requirement [ Upstream commit 51c37a70aaa3f95773af560e6db3073520513912 ] For properly initialising the Tausworthe generator [1], we have a strict seeding requirement, that is, s1 > 1, s2 > 7, s3 > 15. Commit 697f8d0348 ("random32: seeding improvement") introduced a __seed() function that imposes boundary checks proposed by the errata paper [2] to properly ensure above conditions. However, we're off by one, as the function is implemented as: "return (x < m) ? x + m : x;", and called with __seed(X, 1), __seed(X, 7), __seed(X, 15). Thus, an unwanted seed of 1, 7, 15 would be possible, whereas the lower boundary should actually be of at least 2, 8, 16, just as GSL does. Fix this, as otherwise an initialization with an unwanted seed could have the effect that Tausworthe's PRNG properties cannot not be ensured. Note that this PRNG is *not* used for cryptography in the kernel. [1] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps [2] http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps Joint work with Hannes Frederic Sowa. Fixes: 697f8d0348a6 ("random32: seeding improvement") Cc: Stephen Hemminger Cc: Florian Weimer Cc: Theodore Ts'o Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- include/linux/random.h | 6 +++--- lib/random32.c | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/random.h b/include/linux/random.h index 606922525114..d024cc6dacae 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -57,9 +57,9 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) { u32 i = (seed >> 32) ^ (seed << 10) ^ seed; - state->s1 = __seed(i, 1); - state->s2 = __seed(i, 7); - state->s3 = __seed(i, 15); + state->s1 = __seed(i, 2); + state->s2 = __seed(i, 8); + state->s3 = __seed(i, 16); } #ifdef CONFIG_ARCH_RANDOM diff --git a/lib/random32.c b/lib/random32.c index 52280d5526be..01e8890d1089 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -141,7 +141,7 @@ void prandom_seed(u32 entropy) */ for_each_possible_cpu (i) { struct rnd_state *state = &per_cpu(net_rand_state, i); - state->s1 = __seed(state->s1 ^ entropy, 1); + state->s1 = __seed(state->s1 ^ entropy, 2); } } EXPORT_SYMBOL(prandom_seed); @@ -158,9 +158,9 @@ static int __init prandom_init(void) struct rnd_state *state = &per_cpu(net_rand_state,i); #define LCG(x) ((x) * 69069) /* super-duper LCG */ - state->s1 = __seed(LCG(i + jiffies), 1); - state->s2 = __seed(LCG(state->s1), 7); - state->s3 = __seed(LCG(state->s2), 15); + state->s1 = __seed(LCG(i + jiffies), 2); + state->s2 = __seed(LCG(state->s1), 8); + state->s3 = __seed(LCG(state->s2), 16); /* "warm it up" */ prandom_u32_state(state); @@ -187,9 +187,9 @@ static int __init prandom_reseed(void) u32 seeds[3]; get_random_bytes(&seeds, sizeof(seeds)); - state->s1 = __seed(seeds[0], 1); - state->s2 = __seed(seeds[1], 7); - state->s3 = __seed(seeds[2], 15); + state->s1 = __seed(seeds[0], 2); + state->s2 = __seed(seeds[1], 8); + state->s3 = __seed(seeds[2], 16); /* mix it in */ prandom_u32_state(state); From 2175c79a02e7d079c8e83298dceb83046000f4ce Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Tue, 12 Nov 2013 15:37:40 +0100 Subject: [PATCH 06/92] bonding: don't permit to use ARP monitoring in 802.3ad mode [ Upstream commit ec9f1d15db8185f63a2c3143dc1e90ba18541b08 ] Currently the ARP monitoring is not supported with 802.3ad, and it's prohibited to use it via the module params. However we still can set it afterwards via sysfs, cause we only check for *LB modes there. To fix this - add a check for 802.3ad mode in bonding_store_arp_interval. CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/bonding/bond_sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index ea7a388f4843..da7b37961565 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -533,8 +533,9 @@ static ssize_t bonding_store_arp_interval(struct device *d, goto out; } if (bond->params.mode == BOND_MODE_ALB || - bond->params.mode == BOND_MODE_TLB) { - pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n", + bond->params.mode == BOND_MODE_TLB || + bond->params.mode == BOND_MODE_8023AD) { + pr_info("%s: ARP monitoring cannot be used with ALB/TLB/802.3ad. Only MII monitoring is supported on %s.\n", bond->dev->name, bond->dev->name); ret = -EINVAL; goto out; From e675cded2dcd6917257e8309706be8e6029a417a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 12 Nov 2013 16:34:41 +0100 Subject: [PATCH 07/92] usbnet: fix status interrupt urb handling [ Upstream commit 52f48d0d9aaa621ffa5e08d79da99a3f8c93b848 ] Since commit 7b0c5f21f348a66de495868b8df0284e8dfd6bbf "sierra_net: keep status interrupt URB active", sierra_net triggers status interrupt polling before the net_device is opened (in order to properly receive the sync message response). To be able to receive further interrupts, the interrupt urb needs to be re-submitted, so this patch removes the bogus check for netif_running(). Signed-off-by: Felix Fietkau Tested-by: Dan Williams Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/usb/usbnet.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 5e33606c1366..e5812c0b6d00 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -206,9 +206,6 @@ static void intr_complete (struct urb *urb) break; } - if (!netif_running (dev->net)) - return; - status = usb_submit_urb (urb, GFP_ATOMIC); if (status != 0) netif_err(dev, timer, dev->net, From 130a71b4a55028622c517cd1d7c0cd7452066cab Mon Sep 17 00:00:00 2001 From: Jukka Rissanen Date: Wed, 13 Nov 2013 11:03:39 +0200 Subject: [PATCH 08/92] 6lowpan: Uncompression of traffic class field was incorrect [ Upstream commit 1188f05497e7bd2f2614b99c54adfbe7413d5749 ] If priority/traffic class field in IPv6 header is set (seen when using ssh), the uncompression sets the TC and Flow fields incorrectly. Example: This is IPv6 header of a sent packet. Note the priority/TC (=1) in the first byte. 00000000: 61 00 00 00 00 2c 06 40 fe 80 00 00 00 00 00 00 00000010: 02 02 72 ff fe c6 42 10 fe 80 00 00 00 00 00 00 00000020: 02 1e ab ff fe 4c 52 57 This gets compressed like this in the sending side 00000000: 72 31 04 06 02 1e ab ff fe 4c 52 57 ec c2 00 16 00000010: aa 2d fe 92 86 4e be c6 .... In the receiving end, the packet gets uncompressed to this IPv6 header 00000000: 60 06 06 02 00 2a 1e 40 fe 80 00 00 00 00 00 00 00000010: 02 02 72 ff fe c6 42 10 fe 80 00 00 00 00 00 00 00000020: ab ff fe 4c 52 57 ec c2 First four bytes are set incorrectly and we have also lost two bytes from destination address. The fix is to switch the case values in switch statement when checking the TC field. Signed-off-by: Jukka Rissanen Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ieee802154/6lowpan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 76c3d0aabd5e..f22706131af2 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -822,7 +822,7 @@ lowpan_process_data(struct sk_buff *skb) * Traffic class carried in-line * ECN + DSCP (1 byte), Flow Label is elided */ - case 1: /* 10b */ + case 2: /* 10b */ if (lowpan_fetch_skb_u8(skb, &tmp)) goto drop; @@ -835,7 +835,7 @@ lowpan_process_data(struct sk_buff *skb) * Flow Label carried in-line * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided */ - case 2: /* 01b */ + case 1: /* 01b */ if (lowpan_fetch_skb_u8(skb, &tmp)) goto drop; From bb1bd822c89f7172347d9fb670d3b7d8221fb455 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 13 Nov 2013 14:00:39 +0800 Subject: [PATCH 09/92] tuntap: limit head length of skb allocated [ Upstream commit 96f8d9ecf227638c89f98ccdcdd50b569891976c ] We currently use hdr_len as a hint of head length which is advertised by guest. But when guest advertise a very big value, it can lead to an 64K+ allocating of kmalloc() which has a very high possibility of failure when host memory is fragmented or under heavy stress. The huge hdr_len also reduce the effect of zerocopy or even disable if a gso skb is linearized in guest. To solves those issues, this patch introduces an upper limit (PAGE_SIZE) of the head, which guarantees an order 0 allocation each time. Cc: Stefan Hajnoczi Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/tun.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 66b461374adc..1bff29f22373 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1070,6 +1070,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb; size_t len = total_len, align = NET_SKB_PAD, linear; struct virtio_net_hdr gso = { 0 }; + int good_linear; int offset = 0; int copylen; bool zerocopy = false; @@ -1110,12 +1111,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } + good_linear = SKB_MAX_HEAD(align); + if (msg_control) { /* There are 256 bytes to be copied in skb, so there is * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; + if (copylen > good_linear) + copylen = good_linear; linear = copylen; if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) zerocopy = true; @@ -1123,7 +1128,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (!zerocopy) { copylen = len; - linear = gso.hdr_len; + if (gso.hdr_len > good_linear) + linear = good_linear; + else + linear = gso.hdr_len; } skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); From 874b26c21d1cd55e470719cbd66d5eb1777d028c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 13 Nov 2013 14:00:40 +0800 Subject: [PATCH 10/92] macvtap: limit head length of skb allocated [ Upstream commit 16a3fa28630331e28208872fa5341ce210b901c7 ] We currently use hdr_len as a hint of head length which is advertised by guest. But when guest advertise a very big value, it can lead to an 64K+ allocating of kmalloc() which has a very high possibility of failure when host memory is fragmented or under heavy stress. The huge hdr_len also reduce the effect of zerocopy or even disable if a gso skb is linearized in guest. To solves those issues, this patch introduces an upper limit (PAGE_SIZE) of the head, which guarantees an order 0 allocation each time. Cc: Stefan Hajnoczi Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/macvtap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 987220279686..15820ab3363e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -672,6 +672,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, const struct iovec *iv, unsigned long total_len, size_t count, int noblock) { + int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN); struct sk_buff *skb; struct macvlan_dev *vlan; unsigned long len = total_len; @@ -714,6 +715,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; + if (copylen > good_linear) + copylen = good_linear; linear = copylen; if (iov_pages(iv, vnet_hdr_len + copylen, count) <= MAX_SKB_FRAGS) @@ -722,7 +725,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (!zerocopy) { copylen = len; - linear = vnet_hdr.hdr_len; + if (vnet_hdr.hdr_len > good_linear) + linear = good_linear; + else + linear = vnet_hdr.hdr_len; } skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, From dde211380cdff161da2fcba7e61ae71446786631 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Nov 2013 06:32:54 -0800 Subject: [PATCH 11/92] tcp: tsq: restore minimal amount of queueing [ Upstream commit 98e09386c0ef4dfd48af7ba60ff908f0d525cdee ] After commit c9eeec26e32e ("tcp: TSQ can use a dynamic limit"), several users reported throughput regressions, notably on mvneta and wifi adapters. 802.11 AMPDU requires a fair amount of queueing to be effective. This patch partially reverts the change done in tcp_write_xmit() so that the minimal amount is sysctl_tcp_limit_output_bytes. It also remove the use of this sysctl while building skb stored in write queue, as TSO autosizing does the right thing anyway. Users with well behaving NICS and correct qdisc (like sch_fq), can then lower the default sysctl_tcp_limit_output_bytes value from 128KB to 8KB. This new usage of sysctl_tcp_limit_output_bytes permits each driver authors to check how their driver performs when/if the value is set to a minimum of 4KB. Normally, line rate for a single TCP flow should be possible, but some drivers rely on timers to perform TX completion and too long TX completion delays prevent reaching full throughput. Fixes: c9eeec26e32e ("tcp: TSQ can use a dynamic limit") Signed-off-by: Eric Dumazet Reported-by: Sujith Manoharan Reported-by: Arnaud Ebalard Tested-by: Sujith Manoharan Cc: Felix Fietkau Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- Documentation/networking/ip-sysctl.txt | 3 --- net/ipv4/tcp.c | 6 ------ net/ipv4/tcp_output.c | 6 +++++- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 62b9a6196e43..2c9ddb151826 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -603,9 +603,6 @@ tcp_limit_output_bytes - INTEGER typical pfifo_fast qdiscs. tcp_limit_output_bytes limits the number of bytes on qdisc or device to reduce artificial RTT/cwnd and reduce bufferbloat. - Note: For GSO/TSO enabled flows, we try to have at least two - packets in flight. Reducing tcp_limit_output_bytes might also - reduce the size of individual GSO packet (64KB being the max) Default: 131072 tcp_challenge_ack_limit - INTEGER diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c2aaeab7ccba..8cfdf2b84781 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -814,12 +814,6 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, xmit_size_goal = min_t(u32, gso_size, sk->sk_gso_max_size - 1 - hlen); - /* TSQ : try to have at least two segments in flight - * (one in NIC TX ring, another in Qdisc) - */ - xmit_size_goal = min_t(u32, xmit_size_goal, - sysctl_tcp_limit_output_bytes >> 1); - xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); /* We try hard to avoid divides here */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0eaef9ae3905..168a0d05acf7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2019,8 +2019,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, * - better RTT estimation and ACK scheduling * - faster recovery * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) */ - limit = max(skb->truesize, sk->sk_pacing_rate >> 10); + limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, + sk->sk_pacing_rate >> 10); if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); From 4864b5965002236092c41cc955c3d2f3303c1196 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 13 Nov 2013 17:07:46 +0100 Subject: [PATCH 12/92] bonding: fix two race conditions in bond_store_updelay/downdelay [ Upstream commit b869ccfab1e324507fa3596e3e1308444fb68227 ] This patch fixes two race conditions between bond_store_updelay/downdelay and bond_store_miimon which could lead to division by zero as miimon can be set to 0 while either updelay/downdelay are being set and thus miss the zero check in the beginning, the zero div happens because updelay/downdelay are stored as new_value / bond->params.miimon. Use rtnl to synchronize with miimon setting. CC: Jay Vosburgh CC: Andy Gospodarek CC: Veaceslav Falico Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/bonding/bond_sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index da7b37961565..f0a65edef078 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -693,6 +693,8 @@ static ssize_t bonding_store_downdelay(struct device *d, int new_value, ret = count; struct bonding *bond = to_bond(d); + if (!rtnl_trylock()) + return restart_syscall(); if (!(bond->params.miimon)) { pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", bond->dev->name); @@ -726,6 +728,7 @@ static ssize_t bonding_store_downdelay(struct device *d, } out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR, @@ -748,6 +751,8 @@ static ssize_t bonding_store_updelay(struct device *d, int new_value, ret = count; struct bonding *bond = to_bond(d); + if (!rtnl_trylock()) + return restart_syscall(); if (!(bond->params.miimon)) { pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", bond->dev->name); @@ -781,6 +786,7 @@ static ssize_t bonding_store_updelay(struct device *d, } out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR, From 43f7553c7743286e42930678f5eb29e573044440 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Nov 2013 15:00:46 -0800 Subject: [PATCH 13/92] net-tcp: fix panic in tcp_fastopen_cache_set() [ Upstream commit dccf76ca6b626c0c4a4e09bb221adee3270ab0ef ] We had some reports of crashes using TCP fastopen, and Dave Jones gave a nice stack trace pointing to the error. Issue is that tcp_get_metrics() should not be called with a NULL dst Fixes: 1fe4c481ba637 ("net-tcp: Fast Open client - cookie cache") Signed-off-by: Eric Dumazet Reported-by: Dave Jones Cc: Yuchung Cheng Acked-by: Yuchung Cheng Tested-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv4/tcp_metrics.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f696d7c2e9fa..f19041907a94 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -662,10 +662,13 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost) { + struct dst_entry *dst = __sk_dst_get(sk); struct tcp_metrics_block *tm; + if (!dst) + return; rcu_read_lock(); - tm = tcp_get_metrics(sk, __sk_dst_get(sk), true); + tm = tcp_get_metrics(sk, dst, true); if (tm) { struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; From 37d05c1346f2a8ae362db963f52b7d6925646f39 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 Nov 2013 11:21:10 +0300 Subject: [PATCH 14/92] isdnloop: use strlcpy() instead of strcpy() [ Upstream commit f9a23c84486ed350cce7bb1b2828abd1f6658796 ] These strings come from a copy_from_user() and there is no way to be sure they are NUL terminated. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/isdn/isdnloop/isdnloop.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index baf2686aa8eb..02125e6a9109 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -1083,8 +1083,10 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp) spin_unlock_irqrestore(&card->isdnloop_lock, flags); return -ENOMEM; } - for (i = 0; i < 3; i++) - strcpy(card->s0num[i], sdef.num[i]); + for (i = 0; i < 3; i++) { + strlcpy(card->s0num[i], sdef.num[i], + sizeof(card->s0num[0])); + } break; case ISDN_PTYPE_1TR6: if (isdnloop_fake(card, "DRV1.04TC-1TR6-CAPI-CNS-BASIS-29.11.95", @@ -1097,7 +1099,7 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp) spin_unlock_irqrestore(&card->isdnloop_lock, flags); return -ENOMEM; } - strcpy(card->s0num[0], sdef.num[0]); + strlcpy(card->s0num[0], sdef.num[0], sizeof(card->s0num[0])); card->s0num[1][0] = '\0'; card->s0num[2][0] = '\0'; break; From 4b7f5b037343dad86ad0a118be46090fbb8bb15f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Nov 2013 12:09:21 -0500 Subject: [PATCH 15/92] connector: improved unaligned access error fix [ Upstream commit 1ca1a4cf59ea343a1a70084fe7cc96f37f3cf5b1 ] In af3e095a1fb4, Erik Jacobsen fixed one type of unaligned access bug for ia64 by converting a 64-bit write to use put_unaligned(). Unfortunately, since gcc will convert a short memset() to a series of appropriately-aligned stores, the problem is now visible again on tilegx, where the memset that zeros out proc_event is converted to three 64-bit stores, causing an unaligned access panic. A better fix for the original problem is to ensure that proc_event is aligned to 8 bytes here. We can do that relatively easily by arranging to start the struct cn_msg aligned to 8 bytes and then offset by 4 bytes. Doing so means that the immediately following proc_event structure is then correctly aligned to 8 bytes. The result is that the memset() stores are now aligned, and as an added benefit, we can remove the put_unaligned() calls in the code. Signed-off-by: Chris Metcalf Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/connector/cn_proc.c | 66 ++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 1b1d255f0e15..ccc6e8e2d1cc 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -32,11 +32,23 @@ #include #include -#include - #include -#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event)) +/* + * Size of a cn_msg followed by a proc_event structure. Since the + * sizeof struct cn_msg is a multiple of 4 bytes, but not 8 bytes, we + * add one 4-byte word to the size here, and then start the actual + * cn_msg structure 4 bytes into the stack buffer. The result is that + * the immediately following proc_event structure is aligned to 8 bytes. + */ +#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event) + 4) + +/* See comment above; we test our assumption about sizeof struct cn_msg here. */ +static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer) +{ + BUILD_BUG_ON(sizeof(struct cn_msg) != 20); + return (struct cn_msg *)(buffer + 4); +} static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; @@ -56,19 +68,19 @@ void proc_fork_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; struct task_struct *parent; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_FORK; rcu_read_lock(); parent = rcu_dereference(task->real_parent); @@ -91,17 +103,17 @@ void proc_exec_connector(struct task_struct *task) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; ev->event_data.exec.process_tgid = task->tgid; @@ -117,14 +129,14 @@ void proc_id_connector(struct task_struct *task, int which_id) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; const struct cred *cred; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->what = which_id; @@ -145,7 +157,7 @@ void proc_id_connector(struct task_struct *task, int which_id) rcu_read_unlock(); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ @@ -159,17 +171,17 @@ void proc_sid_connector(struct task_struct *task) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_SID; ev->event_data.sid.process_pid = task->pid; ev->event_data.sid.process_tgid = task->tgid; @@ -186,17 +198,17 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_PTRACE; ev->event_data.ptrace.process_pid = task->pid; ev->event_data.ptrace.process_tgid = task->tgid; @@ -221,17 +233,17 @@ void proc_comm_connector(struct task_struct *task) struct cn_msg *msg; struct proc_event *ev; struct timespec ts; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_COMM; ev->event_data.comm.process_pid = task->pid; ev->event_data.comm.process_tgid = task->tgid; @@ -248,18 +260,18 @@ void proc_exit_connector(struct task_struct *task) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; ev->event_data.exit.process_tgid = task->tgid; @@ -286,18 +298,18 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) { struct cn_msg *msg; struct proc_event *ev; - __u8 buffer[CN_PROC_MSG_SIZE]; + __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); struct timespec ts; if (atomic_read(&proc_event_num_listeners) < 1) return; - msg = (struct cn_msg *)buffer; + msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); msg->seq = rcvd_seq; ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); + ev->timestamp_ns = timespec_to_ns(&ts); ev->cpu = -1; ev->what = PROC_EVENT_NONE; ev->event_data.ack.err = err; From bdd05946880135bc816ad90e817f191a327be0a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Nov 2013 13:37:54 -0800 Subject: [PATCH 16/92] ipv4: fix possible seqlock deadlock [ Upstream commit c9e9042994d37cbc1ee538c500e9da1bb9d1bcdf ] ip4_datagram_connect() being called from process context, it should use IP_INC_STATS() instead of IP_INC_STATS_BH() otherwise we can deadlock on 32bit arches, or get corruptions of SNMP counters. Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Signed-off-by: Eric Dumazet Reported-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv4/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index b28e863fe0a7..19e36376d2a0 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -57,7 +57,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); goto out; } From 53a215ad49061007a21987b76f252ed1d1e77dfb Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 18 Nov 2013 04:20:45 +0100 Subject: [PATCH 17/92] inet: prevent leakage of uninitialized memory to user in recv syscalls [ Upstream commit bceaa90240b6019ed73b49965eac7d167610be69 ] Only update *addr_len when we actually fill in sockaddr, otherwise we can return uninitialized memory from the stack to the caller in the recvfrom, recvmmsg and recvmsg syscalls. Drop the the (addr_len == NULL) checks because we only get called with a valid addr_len pointer either from sock_common_recvmsg or inet_recvmsg. If a blocking read waits on a socket which is concurrently shut down we now return zero and set msg_msgnamelen to 0. Reported-by: mpb Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller [ kamal: backport to 3.8 (context) ] Signed-off-by: Kamal Mostafa --- net/ipv4/ping.c | 9 ++++----- net/ipv4/raw.c | 4 +--- net/ipv4/udp.c | 7 +------ net/ipv6/raw.c | 4 +--- net/ipv6/udp.c | 5 +---- net/l2tp/l2tp_ip.c | 4 +--- net/phonet/datagram.c | 9 ++++----- 7 files changed, 13 insertions(+), 29 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index dc454ccc5039..a690121c5e3b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -627,7 +627,6 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; int copied, err; @@ -637,9 +636,6 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); - if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len); @@ -661,11 +657,14 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ - if (sin) { + if (msg->msg_name) { + struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (isk->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9b4ef3bafbe..5f42d6d351cd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -694,9 +694,6 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); - if (flags & MSG_ERRQUEUE) { err = ip_recv_error(sk, msg, len); goto out; @@ -724,6 +721,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 21e1eecae675..83e327303694 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1180,12 +1180,6 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int is_udplite = IS_UDPLITE(sk); bool slow; - /* - * Check any passed addresses - */ - if (addr_len) - *addr_len = sizeof(*sin); - if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len); @@ -1247,6 +1241,7 @@ try_again: sin->sin_port = udp_hdr(skb)->source; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 70fa81449997..276faf68424e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -460,9 +460,6 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_OOB) return -EOPNOTSUPP; - if (addr_len) - *addr_len=sizeof(*sin6); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); @@ -502,6 +499,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_scope_id = 0; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6->sin6_scope_id = IP6CB(skb)->iif; + *addr_len = sizeof(*sin6); } sock_recv_ts_and_drops(msg, sk, skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4ca80fc21706..f8236a356d15 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -349,9 +349,6 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int is_udp4; bool slow; - if (addr_len) - *addr_len = sizeof(struct sockaddr_in6); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); @@ -436,7 +433,7 @@ try_again: if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6->sin6_scope_id = IP6CB(skb)->iif; } - + *addr_len = sizeof(*sin6); } if (is_udp4) { if (inet->cmsg_flags) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 61d8b75d2686..2bc35bc1cc6e 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -510,9 +510,6 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); - skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto out; @@ -535,6 +532,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 12c30f3e643e..38946b26e471 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -139,9 +139,6 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, MSG_CMSG_COMPAT)) goto out_nofree; - if (addr_len) - *addr_len = sizeof(sa); - skb = skb_recv_datagram(sk, flags, noblock, &rval); if (skb == NULL) goto out_nofree; @@ -162,8 +159,10 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, rval = (flags & MSG_TRUNC) ? skb->len : copylen; - if (msg->msg_name != NULL) - memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); + if (msg->msg_name != NULL) { + memcpy(msg->msg_name, &sa, sizeof(sa)); + *addr_len = sizeof(sa); + } out: skb_free_datagram(sk, skb); From acb095d4c5b5ae3d9d62f2057bef477ada8f8b4a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 Nov 2013 03:14:22 +0100 Subject: [PATCH 18/92] net: rework recvmsg handler msg_name and msg_namelen logic [ Upstream commit f3d3342602f8bcbf37d7c46641cb9bca7618eb1c ] This patch now always passes msg->msg_namelen as 0. recvmsg handlers must set msg_namelen to the proper size <= sizeof(struct sockaddr_storage) to return msg_name to the user. This prevents numerous uninitialized memory leaks we had in the recvmsg handlers and makes it harder for new code to accidentally leak uninitialized memory. Optimize for the case recvfrom is called with NULL as address. We don't need to copy the address at all, so set it to NULL before invoking the recvmsg handler. We can do so, because all the recvmsg handlers must cope with the case a plain read() is called on them. read() also sets msg_name to NULL. Also document these changes in include/linux/net.h as suggested by David Miller. Changes since RFC: Set msg->msg_name = NULL if user specified a NULL in msg_name but had a non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't affect sendto as it would bail out earlier while trying to copy-in the address. It also more naturally reflects the logic by the callers of verify_iovec. With this change in place I could remove " if (!uaddr || msg_sys->msg_namelen == 0) msg->msg_name = NULL ". This change does not alter the user visible error logic as we ignore msg_namelen as long as msg_name is NULL. Also remove two unnecessary curly brackets in ___sys_recvmsg and change comments to netdev style. Cc: David Miller Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- crypto/algif_hash.c | 2 -- crypto/algif_skcipher.c | 1 - drivers/isdn/mISDN/socket.c | 13 ++++--------- drivers/net/ppp/pppoe.c | 2 -- include/linux/net.h | 8 ++++++++ net/appletalk/ddp.c | 16 +++++++--------- net/atm/common.c | 2 -- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/af_bluetooth.c | 4 ---- net/bluetooth/hci_sock.c | 2 -- net/bluetooth/rfcomm/sock.c | 1 - net/bluetooth/sco.c | 1 - net/caif/caif_socket.c | 4 ---- net/compat.c | 3 ++- net/core/iovec.c | 3 ++- net/ipx/af_ipx.c | 3 +-- net/irda/af_irda.c | 4 ---- net/iucv/af_iucv.c | 2 -- net/key/af_key.c | 1 - net/l2tp/l2tp_ppp.c | 2 -- net/llc/af_llc.c | 2 -- net/netlink/af_netlink.c | 2 -- net/netrom/af_netrom.c | 3 +-- net/nfc/llcp/sock.c | 2 -- net/nfc/rawsock.c | 2 -- net/packet/af_packet.c | 32 +++++++++++++++----------------- net/rds/recv.c | 2 -- net/rose/af_rose.c | 8 +++++--- net/rxrpc/ar-recvmsg.c | 9 ++++++--- net/socket.c | 19 +++++++++++-------- net/tipc/socket.c | 6 ------ net/unix/af_unix.c | 5 ----- net/x25/af_x25.c | 3 +-- 33 files changed, 65 insertions(+), 108 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0262210cad38..ef5356cd280a 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock, else if (len < ds) msg->msg_flags |= MSG_TRUNC; - msg->msg_namelen = 0; - lock_sock(sk); if (ctx->more) { ctx->more = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a1c4f0a55583..6a6dfc062d2a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, long copied = 0; lock_sock(sk); - msg->msg_namelen = 0; for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; iovlen--, iov++) { unsigned long seglen = iov->iov_len; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index abe2d699b6f3..ade1bcfb0e6d 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; - struct sockaddr_mISDN *maddr; int copied, err; @@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) { - msg->msg_namelen = sizeof(struct sockaddr_mISDN); - maddr = (struct sockaddr_mISDN *)msg->msg_name; + if (msg->msg_name) { + struct sockaddr_mISDN *maddr = msg->msg_name; + maddr->family = AF_ISDN; maddr->dev = _pms(sk)->dev->id; if ((sk->sk_protocol == ISDN_P_LAPD_TE) || @@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, maddr->sapi = _pms(sk)->ch.addr & 0xFF; maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xFF; } - } else { - if (msg->msg_namelen) - printk(KERN_WARNING "%s: too small namelen %d\n", - __func__, msg->msg_namelen); - msg->msg_namelen = 0; + msg->msg_namelen = sizeof(*maddr); } copied = skb->len + MISDN_HEADER_LEN; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 20f31d0d1536..56b52afcd69b 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, if (error < 0) goto end; - m->msg_namelen = 0; - if (skb) { total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); diff --git a/include/linux/net.h b/include/linux/net.h index aa1673160a45..5ea38d9ac969 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -163,6 +163,14 @@ struct proto_ops { #endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); + /* Notes for implementing recvmsg: + * =============================== + * msg->msg_namelen should get updated by the recvmsg handlers + * iff msg_name != NULL. It is by default 0 to prevent + * returning uninitialized memory to user space. The recvfrom + * handlers can assume that msg.msg_name is either NULL or has + * a minimum size of sizeof(struct sockaddr_storage). + */ int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 33475291c9c1..ea1700a35eb4 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1738,7 +1738,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1767,14 +1766,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/common.c b/net/atm/common.c index cf4b7e667a6e..806fc0a40051 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -532,8 +532,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d53a123e36a0..eb373274e342 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1641,11 +1641,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index b04795e7aec5..4de2557bd6c7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -230,8 +230,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -296,8 +294,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & MSG_OOB) return -EOPNOTSUPP; - msg->msg_namelen = 0; - BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 07f073935811..616dd836ae70 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -757,8 +757,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 970fc13d8e39..ce3f6658f4b2 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -610,7 +610,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); - msg->msg_namelen = 0; return 0; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index cc16d1bccf54..aaf1957bc4fe 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -667,7 +667,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { hci_conn_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; - msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index ff2ff3ce6965..59d73aaf0a08 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; - m->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg diff --git a/net/compat.c b/net/compat.c index 89032580bd1d..618c6a8a911b 100644 --- a/net/compat.c +++ b/net/compat.c @@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else kern_msg->msg_name = NULL; diff --git a/net/core/iovec.c b/net/core/iovec.c index 7e7aeb01de45..7fd34a56aeb3 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index dfd6faaf0ea7..624520defe35 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1827,8 +1827,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->tstamp.tv64) sk->sk_stamp = skb->tstamp; - msg->msg_namelen = sizeof(*sipx); - if (sipx) { sipx->sipx_family = AF_IPX; sipx->sipx_port = ipx->ipx_source.sock; @@ -1836,6 +1834,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; sipx->sipx_type = ipx->ipx_type; sipx->sipx_zero = 0; + msg->msg_namelen = sizeof(*sipx); } rc = copied; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 3c9bd5949d7a..58eb843a5244 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1386,8 +1386,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1452,8 +1450,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); - msg->msg_namelen = 0; - do { int chunk; struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 625bc50391cc..cd6f7a991d80 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1331,8 +1331,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb, *rskb, *cskb; int err = 0; - msg->msg_namelen = 0; - if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/key/af_key.c b/net/key/af_key.c index 2c698fcd0668..2f5ac7ee4c7b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3598,7 +3598,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - msg->msg_namelen = 0; skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6920a6fbf25b..ddf43a3a04ba 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -196,8 +196,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state & PPPOX_BOUND) goto end; - msg->msg_namelen = 0; - err = 0; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 48aaa89253e0..88709882c464 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; - msg->msg_namelen = 0; - lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c0353d55d56f..015dec014760 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1483,8 +1483,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; - copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index f334fbde50e8..ee2a818c9133 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1181,10 +1181,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 48fb1ded383a..9d24fc1f70e1 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -644,8 +644,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 313bf1bc848a..5d11f4ac3ecb 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -241,8 +241,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return rc; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index dbddeb46c413..517d154a4051 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2612,7 +2612,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2695,22 +2694,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. - */ - copied = skb->len; if (copied > len) { copied = len; @@ -2723,9 +2710,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; diff --git a/net/rds/recv.c b/net/rds/recv.c index 9f0f17cf6bf9..de339b24ca14 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); - msg->msg_namelen = 0; - if (msg_flags & MSG_OOB) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 7f645d115795..ce5f5b934ea1 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1220,7 +1220,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1256,8 +1255,11 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4b48687c3890..898492a8d61b 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/socket.c b/net/socket.c index 809e94116b74..5b1b9bdf418a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1856,8 +1856,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); @@ -2237,16 +2239,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { + if (MSG_CMSG_COMPAT & flags) err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); - } else + else err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; @@ -2255,6 +2255,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f6a8b59a7115..64ade972bb86 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -921,9 +921,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1033,9 +1030,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4f4095999bac..6cc5803f082e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1764,7 +1764,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1788,8 +1787,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); if (err) { err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); @@ -1931,8 +1928,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - msg->msg_namelen = 0; - /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index b943e3e71bd2..92aed9e45c73 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1343,10 +1343,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: From fd13c5d600c2d47ed8e04a53b1c4092c2dd9d36e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 Nov 2013 03:14:34 +0100 Subject: [PATCH 19/92] net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) [ Upstream commit 68c6beb373955da0886d8f4f5995b3922ceda4be ] In that case it is probable that kernel code overwrote part of the stack. So we should bail out loudly here. The BUG_ON may be removed in future if we are sure all protocols are conformant. Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 5b1b9bdf418a..f7129f3bb2a1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -216,12 +216,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) From abcc4a81f1eb3965d42d2b440d0443a55d35fc1b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: [PATCH 20/92] inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions [ Upstream commit 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 ] Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller [ kamal: backport to 3.8 ] Signed-off-by: Kamal Mostafa --- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++++-- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 10 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index edfa59174d9a..788f1d8a796f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -454,7 +454,7 @@ extern int compat_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); +extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5a6791905ce2..b09c053b9bd7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -693,8 +693,10 @@ extern int compat_ipv6_getsockopt(struct sock *sk, extern int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -extern int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); -extern int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); +extern int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); +extern int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); extern void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); extern void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d9c4f113d709..23e6ab0a2dc0 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -368,7 +368,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -405,6 +405,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a690121c5e3b..eeaf5132f490 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -637,7 +637,7 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5f42d6d351cd..4af3fc12eaa7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -695,7 +695,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 83e327303694..c8cf2f185b2c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1181,7 +1181,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7a778b9a7b85..6aaf0b96b4be 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -317,7 +317,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -368,6 +368,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), &sin->sin6_addr); } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -421,7 +422,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -455,6 +457,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 276faf68424e..5ce77eeee12a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -461,10 +461,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f8236a356d15..97cb164fed5b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -350,10 +350,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9e1822e81136..388b4dcf22aa 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -659,7 +659,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) From 3893cff4706b6f55c229363577160ba66a1a7f87 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Nov 2013 15:40:21 +0300 Subject: [PATCH 21/92] net: clamp ->msg_namelen instead of returning an error [ Upstream commit db31c55a6fb245fdbb752a2ca4aefec89afabb06 ] If kmsg->msg_namelen > sizeof(struct sockaddr_storage) then in the original code that would lead to memory corruption in the kernel if you had audit configured. If you didn't have audit configured it was harmless. There are some programs such as beta versions of Ruby which use too large of a buffer and returning an error code breaks them. We should clamp the ->msg_namelen value instead. Fixes: 1661bf364ae9 ("net: heap overflow in __audit_sockaddr()") Reported-by: Eric Wong Signed-off-by: Dan Carpenter Tested-by: Eric Wong Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/compat.c | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/compat.c b/net/compat.c index 618c6a8a911b..dd32e34c1e2c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index f7129f3bb2a1..57d7a8570b09 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1989,7 +1989,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } From 2368f94a9c7cf70af5f2ee6d142758e050e518ae Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 07:22:33 +0100 Subject: [PATCH 22/92] ipv6: fix leaking uninitialized port number of offender sockaddr [ Upstream commit 1fa4c710b6fe7b0aac9907240291b6fe6aafc3b8 ] Offenders don't have port numbers, so set it to 0. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 6aaf0b96b4be..2877b1699a0d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -377,6 +377,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; From 347f6bc909703e70f41eaa39467efcc615f9afd4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Nov 2013 17:52:19 +0100 Subject: [PATCH 23/92] ip6_output: fragment outgoing reassembled skb properly [ Upstream commit 9037c3579a277f3a23ba476664629fda8c35f7c4 ] If reassembled packet would fit into outdev MTU, it is not fragmented according the original frag size and it is send as single big packet. The second case is if skb is gso. In that case fragmentation does not happen according to the original frag size. This patch fixes these. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv6/ip6_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f477a89fad24..ac9f072bbae4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -137,7 +137,8 @@ static int ip6_finish_output2(struct sk_buff *skb) static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb))) + dst_allfrag(skb_dst(skb)) || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(skb, ip6_finish_output2); else return ip6_finish_output2(skb); From b200cf6e987c64a57674d002c3f3f2c43eae3d8c Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Tue, 19 Nov 2013 16:53:28 +0800 Subject: [PATCH 24/92] xfrm: Release dst if this dst is improper for vti tunnel [ Upstream commit 236c9f84868534c718b6889aa624de64763281f9 ] After searching rt by the vti tunnel dst/src parameter, if this rt has neither attached to any transformation nor the transformation is not tunnel oriented, this rt should be released back to ip layer. otherwise causing dst memory leakage. Signed-off-by: Fan Du Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv4/ip_vti.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d011525e97fe..331a0750ba40 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -388,6 +388,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!rt->dst.xfrm || rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { dev->stats.tx_carrier_errors++; + ip_rt_put(rt); goto tx_error_icmp; } tdev = rt->dst.dev; From 499544db0c9bc4db0a88cf40eb757ad7da5e6d2f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 19 Nov 2013 18:09:27 +0800 Subject: [PATCH 25/92] atm: idt77252: fix dev refcnt leak [ Upstream commit b5de4a22f157ca345cdb3575207bf46402414bc1 ] init_card() calls dev_get_by_name() to get a network deceive. But it doesn't decrease network device reference count after the device is used. Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/atm/idt77252.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 272f00927761..1bdf104e90bb 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3511,7 +3511,7 @@ static int init_card(struct atm_dev *dev) tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */ if (tmp) { memcpy(card->atmdev->esi, tmp->dev_addr, 6); - + dev_put(tmp); printk("%s: ESI %pM\n", card->name, card->atmdev->esi); } /* From 55d5d7e3da8269bfc69b4949c0981d3b3adacb27 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 19 Nov 2013 22:10:06 +0400 Subject: [PATCH 26/92] tcp: don't update snd_nxt, when a socket is switched from repair mode [ Upstream commit dbde497966804e63a38fdedc1e3815e77097efc2 ] snd_nxt must be updated synchronously with sk_send_head. Otherwise tp->packets_out may be updated incorrectly, what may bring a kernel panic. Here is a kernel panic from my host. [ 103.043194] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 103.044025] IP: [] tcp_rearm_rto+0xcf/0x150 ... [ 146.301158] Call Trace: [ 146.301158] [] tcp_ack+0xcc0/0x12c0 Before this panic a tcp socket was restored. This socket had sent and unsent data in the write queue. Sent data was restored in repair mode, then the socket was switched from reapair mode and unsent data was restored. After that the socket was switched back into repair mode. In that moment we had a socket where write queue looks like this: snd_una snd_nxt write_seq |_________|________| | sk_send_head After a second switching from repair mode the state of socket was changed: snd_una snd_nxt, write_seq |_________ ________| | sk_send_head This state is inconsistent, because snd_nxt and sk_send_head are not synchronized. Bellow you can find a call trace, how packets_out can be incremented twice for one skb, if snd_nxt and sk_send_head are not synchronized. In this case packets_out will be always positive, even when sk_write_queue is empty. tcp_write_wakeup skb = tcp_send_head(sk); tcp_fragment if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) tcp_adjust_pcount(sk, skb, diff); tcp_event_new_data_sent tp->packets_out += tcp_skb_pcount(skb); I think update of snd_nxt isn't required, when a socket is switched from repair mode. Because it's initialized in tcp_connect_init. Then when a write queue is restored, snd_nxt is incremented in tcp_event_new_data_sent, so it's always is in consistent state. I have checked, that the bug is not reproduced with this patch and all tests about restoring tcp connections work fine. Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Andrey Vagin Acked-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv4/tcp_output.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 168a0d05acf7..aa215b867116 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3178,7 +3178,6 @@ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; - tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; tcp_xmit_probe_skb(sk, 0); } } From 7036902d8f1cbe1158342d5c1cb190a36c5bfc78 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 19 Nov 2013 19:12:34 -0800 Subject: [PATCH 27/92] ipv4: fix race in concurrent ip_route_input_slow() [ Upstream commit dcdfdf56b4a6c9437fc37dbc9cee94a788f9b0c4 ] CPUs can ask for local route via ip_route_input_noref() concurrently. if nh_rth_input is not cached yet, CPUs will proceed to allocate equivalent DSTs on 'lo' and then will try to cache them in nh_rth_input via rt_cache_route() Most of the time they succeed, but on occasion the following two lines: orig = *p; prev = cmpxchg(p, orig, rt); in rt_cache_route() do race and one of the cpus fails to complete cmpxchg. But ip_route_input_slow() doesn't check the return code of rt_cache_route(), so dst is leaking. dst_destroy() is never called and 'lo' device refcnt doesn't go to zero, which can be seen in the logs as: unregister_netdevice: waiting for lo to become free. Usage count = 1 Adding mdelay() between above two lines makes it easily reproducible. Fix it similar to nh_pcpu_rth_output case. Fixes: d2d68ba9fe8b ("ipv4: Cache input routes in fib_info nexthops.") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0bf88e431460..c219f8b43b3f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1724,8 +1724,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; From 590d24e621a7f1a2ce050dba5a67c687e7b6e328 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 19 Nov 2013 20:47:15 -0500 Subject: [PATCH 28/92] net: core: Always propagate flag changes to interfaces [ Upstream commit d2615bf450694c1302d86b9cc8a8958edfe4c3a4 ] The following commit: b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 net: only invoke dev->change_rx_flags when device is UP tried to fix a problem with VLAN devices and promiscuouse flag setting. The issue was that VLAN device was setting a flag on an interface that was down, thus resulting in bad promiscuity count. This commit blocked flag propagation to any device that is currently down. A later commit: deede2fabe24e00bd7e246eb81cd5767dc6fcfc7 vlan: Don't propagate flag changes on down interfaces fixed VLAN code to only propagate flags when the VLAN interface is up, thus fixing the same issue as above, only localized to VLAN. The problem we have now is that if we have create a complex stack involving multiple software devices like bridges, bonds, and vlans, then it is possible that the flags would not propagate properly to the physical devices. A simple examle of the scenario is the following: eth0----> bond0 ----> bridge0 ---> vlan50 If bond0 or eth0 happen to be down at the time bond0 is added to the bridge, then eth0 will never have promisc mode set which is currently required for operation as part of the bridge. As a result, packets with vlan50 will be dropped by the interface. The only 2 devices that implement the special flag handling are VLAN and DSA and they both have required code to prevent incorrect flag propagation. As a result we can remove the generic solution introduced in b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 and leave it to the individual devices to decide whether they will block flag propagation or not. Reported-by: Stefan Priebe Suggested-by: Veaceslav Falico Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index e55fb9171e7d..4fe768936a12 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4673,7 +4673,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } From 6c636e1e15bd922fd3f817b4a6d6c5075421dbc2 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Sat, 7 Dec 2013 22:12:05 +0800 Subject: [PATCH 29/92] bridge: flush br's address entry in fdb when remove the bridge dev [ Upstream commit f873042093c0b418d2351fe142222b625c740149 ] When the following commands are executed: brctl addbr br0 ifconfig br0 hw ether rmmod bridge The calltrace will occur: [ 563.312114] device eth1 left promiscuous mode [ 563.312188] br0: port 1(eth1) entered disabled state [ 563.468190] kmem_cache_destroy bridge_fdb_cache: Slab cache still has objects [ 563.468197] CPU: 6 PID: 6982 Comm: rmmod Tainted: G O 3.12.0-0.7-default+ #9 [ 563.468199] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 563.468200] 0000000000000880 ffff88010f111e98 ffffffff814d1c92 ffff88010f111eb8 [ 563.468204] ffffffff81148efd ffff88010f111eb8 0000000000000000 ffff88010f111ec8 [ 563.468206] ffffffffa062a270 ffff88010f111ed8 ffffffffa063ac76 ffff88010f111f78 [ 563.468209] Call Trace: [ 563.468218] [] dump_stack+0x6a/0x78 [ 563.468234] [] kmem_cache_destroy+0xfd/0x100 [ 563.468242] [] br_fdb_fini+0x10/0x20 [bridge] [ 563.468247] [] br_deinit+0x4e/0x50 [bridge] [ 563.468254] [] SyS_delete_module+0x199/0x2b0 [ 563.468259] [] system_call_fastpath+0x16/0x1b [ 570.377958] Bridge firewalling registered --------------------------- cut here ------------------------------- The reason is that when the bridge dev's address is changed, the br_fdb_change_mac_address() will add new address in fdb, but when the bridge was removed, the address entry in the fdb did not free, the bridge_fdb_cache still has objects when destroy the cache, Fix this by flushing the bridge address entry when removing the bridge. v2: according to the Toshiaki Makita and Vlad's suggestion, I only delete the vlan0 entry, it still have a leak here if the vlan id is other number, so I need to call fdb_delete_by_port(br, NULL, 1) to flush all entries whose dst is NULL for the bridge. Suggested-by: Toshiaki Makita Suggested-by: Vlad Yasevich Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/bridge/br_if.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 37fe693471a8..d3f4f79b1a6f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -170,6 +170,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); From b11201abd8c218234ff6312d5c7630cf76c4c34c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 21 Nov 2013 16:50:58 +0100 Subject: [PATCH 30/92] packet: fix use after free race in send path when dev is released [ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ] Salam reported a use after free bug in PF_PACKET that occurs when we're sending out frames on a socket bound device and suddenly the net device is being unregistered. It appears that commit 827d9780 introduced a possible race condition between {t,}packet_snd() and packet_notifier(). In the case of a bound socket, packet_notifier() can drop the last reference to the net_device and {t,}packet_snd() might end up suddenly sending a packet over a freed net_device. To avoid reverting 827d9780 and thus introducing a performance regression compared to the current state of things, we decided to hold a cached RCU protected pointer to the net device and maintain it on write side via bind spin_lock protected register_prot_hook() and __unregister_prot_hook() calls. In {t,}packet_snd() path, we access this pointer under rcu_read_lock through packet_cached_dev_get() that holds reference to the device to prevent it from being freed through packet_notifier() while we're in send path. This is okay to do as dev_put()/dev_hold() are per-cpu counters, so this should not be a performance issue. Also, the code simplifies a bit as we don't need need_rls_dev anymore. Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.") Reported-by: Salam Noureddine Signed-off-by: Daniel Borkmann Signed-off-by: Salam Noureddine Cc: Ben Greear Cc: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/packet/af_packet.c | 61 +++++++++++++++++++++++++----------------- net/packet/internal.h | 1 + 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 517d154a4051..2d5b24ab3827 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -236,11 +236,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -258,10 +262,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -1960,12 +1967,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -1978,7 +1997,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -1992,19 +2011,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2081,8 +2098,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2120,7 +2136,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2136,7 +2151,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2148,19 +2163,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; + err = -ENETDOWN; + if (unlikely(!(dev->flags & IFF_UP))) + goto out_unlock; + if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; - - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out_unlock; - if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2293,15 +2306,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2521,6 +2533,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); diff --git a/net/packet/internal.h b/net/packet/internal.h index e84cab8cb7a9..821dfeef23c3 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -111,6 +111,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; From b82f9b89ec819360eb2cbfa817d337d011ea4ef6 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 29 Nov 2013 09:53:23 +0100 Subject: [PATCH 31/92] af_packet: block BH in prb_shutdown_retire_blk_timer() [ Upstream commit ec6f809ff6f19fafba3212f6aff0dda71dfac8e8 ] Currently we're using plain spin_lock() in prb_shutdown_retire_blk_timer(), however the timer might fire right in the middle and thus try to re-aquire the same spinlock, leaving us in a endless loop. To fix that, use the spin_lock_bh() to block it. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") CC: "David S. Miller" CC: Daniel Borkmann CC: Willem de Bruijn CC: Phil Sutter CC: Eric Dumazet Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Veaceslav Falico Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2d5b24ab3827..90c1a21a5a29 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -390,9 +390,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } From 36f4f04992db7fcce3f623f0c39b1811772717da Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 27 Nov 2013 15:48:36 +0800 Subject: [PATCH 32/92] r8169: check ALDPS bit and disable it if enabled for the 8168g [ Upstream commit 1bac1072425c86f1ac85bd5967910706677ef8b3 ] Windows driver will enable ALDPS function, but linux driver and firmware do not have any configuration related to ALDPS function for 8168g. So restart system to linux and remove the NIC cable, LAN enter ALDPS, then LAN RX will be disabled. This issue can be easily reproduced on dual boot windows and linux system with RTL_GIGA_MAC_VER_40 chip. Realtek said, ALDPS function can be disabled by configuring to PHY, switch to page 0x0A43, reg0x10 bit2=0. Signed-off-by: David Chang Acked-by: Hayes Wang Signed-off-by: David S. Miller [ David Chang: backport to 3.8 ] Signed-off-by: Kamal Mostafa --- drivers/net/ethernet/realtek/r8169.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 996318b140e7..ef0226a7b245 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -3414,6 +3414,11 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) rtl_w1w0_phy_ocp(tp, 0xa438, 0x8000, 0x0000); rtl_w1w0_phy_ocp(tp, 0xc422, 0x4000, 0x2000); + + /* Check ALDPS bit, disable it if enabled */ + rtl_writephy(tp, 0x1f, 0x0a43); + if (rtl_readphy(tp, 0x10) & 0x0004) + rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0004); } static void rtl8102e_hw_phy_config(struct rtl8169_private *tp) From 3bcb5aa180a31daa0a93acdae8e53fa8ca1cd5b4 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 27 Nov 2013 14:32:52 +0800 Subject: [PATCH 33/92] net: 8139cp: fix a BUG_ON triggered by wrong bytes_compl [ Upstream commit 7fe0ee099ad5e3dea88d4ee1b6f20246b1ca57c3 ] Using iperf to send packets(GSO mode is on), a bug is triggered: [ 212.672781] kernel BUG at lib/dynamic_queue_limits.c:26! [ 212.673396] invalid opcode: 0000 [#1] SMP [ 212.673882] Modules linked in: 8139cp(O) nls_utf8 edd fuse loop dm_mod ipv6 i2c_piix4 8139too i2c_core intel_agp joydev pcspkr hid_generic intel_gtt floppy sr_mod mii button sg cdrom ext3 jbd mbcache usbhid hid uhci_hcd ehci_hcd usbcore sd_mod usb_common crc_t10dif crct10dif_common processor thermal_sys hwmon scsi_dh_emc scsi_dh_rdac scsi_dh_hp_sw scsi_dh ata_generic ata_piix libata scsi_mod [last unloaded: 8139cp] [ 212.676084] CPU: 0 PID: 4124 Comm: iperf Tainted: G O 3.12.0-0.7-default+ #16 [ 212.676084] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 212.676084] task: ffff8800d83966c0 ti: ffff8800db4c8000 task.ti: ffff8800db4c8000 [ 212.676084] RIP: 0010:[] [] dql_completed+0x17f/0x190 [ 212.676084] RSP: 0018:ffff880116e03e30 EFLAGS: 00010083 [ 212.676084] RAX: 00000000000005ea RBX: 0000000000000f7c RCX: 0000000000000002 [ 212.676084] RDX: ffff880111dd0dc0 RSI: 0000000000000bd4 RDI: ffff8800db6ffcc0 [ 212.676084] RBP: ffff880116e03e48 R08: 0000000000000992 R09: 0000000000000000 [ 212.676084] R10: ffffffff8181e400 R11: 0000000000000004 R12: 000000000000000f [ 212.676084] R13: ffff8800d94ec840 R14: ffff8800db440c80 R15: 000000000000000e [ 212.676084] FS: 00007f6685a3c700(0000) GS:ffff880116e00000(0000) knlGS:0000000000000000 [ 212.676084] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 212.676084] CR2: 00007f6685ad6460 CR3: 00000000db714000 CR4: 00000000000006f0 [ 212.676084] Stack: [ 212.676084] ffff8800db6ffc00 000000000000000f ffff8800d94ec840 ffff880116e03eb8 [ 212.676084] ffffffffa041509f ffff880116e03e88 0000000f16e03e88 ffff8800d94ec000 [ 212.676084] 00000bd400059858 000000050000000f ffffffff81094c36 ffff880116e03eb8 [ 212.676084] Call Trace: [ 212.676084] [ 212.676084] [] cp_interrupt+0x4ef/0x590 [8139cp] [ 212.676084] [] ? ktime_get+0x56/0xd0 [ 212.676084] [] handle_irq_event_percpu+0x53/0x170 [ 212.676084] [] handle_irq_event+0x3c/0x60 [ 212.676084] [] handle_fasteoi_irq+0x55/0xf0 [ 212.676084] [] handle_irq+0x1f/0x30 [ 212.676084] [] do_IRQ+0x5b/0xe0 [ 212.676084] [] common_interrupt+0x6a/0x6a [ 212.676084] [ 212.676084] [] ? cp_start_xmit+0x621/0x97c [8139cp] [ 212.676084] [] ? cp_start_xmit+0x609/0x97c [8139cp] [ 212.676084] [] dev_hard_start_xmit+0x2c9/0x550 [ 212.676084] [] sch_direct_xmit+0x179/0x1d0 [ 212.676084] [] dev_queue_xmit+0x293/0x440 [ 212.676084] [] ip_finish_output+0x236/0x450 [ 212.676084] [] ? __alloc_pages_nodemask+0x187/0xb10 [ 212.676084] [] ip_output+0x88/0x90 [ 212.676084] [] ip_local_out+0x24/0x30 [ 212.676084] [] ip_queue_xmit+0x14d/0x3e0 [ 212.676084] [] tcp_transmit_skb+0x501/0x840 [ 212.676084] [] tcp_write_xmit+0x1e3/0xb20 [ 212.676084] [] ? skb_page_frag_refill+0x87/0xd0 [ 212.676084] [] tcp_push_one+0x2b/0x40 [ 212.676084] [] tcp_sendmsg+0x926/0xc90 [ 212.676084] [] inet_sendmsg+0x61/0xc0 [ 212.676084] [] sock_aio_write+0x101/0x120 [ 212.676084] [] ? vma_adjust+0x2e1/0x5d0 [ 212.676084] [] ? timerqueue_add+0x60/0xb0 [ 212.676084] [] do_sync_write+0x60/0x90 [ 212.676084] [] ? rw_verify_area+0x54/0xf0 [ 212.676084] [] vfs_write+0x186/0x190 [ 212.676084] [] SyS_write+0x5d/0xa0 [ 212.676084] [] system_call_fastpath+0x16/0x1b [ 212.676084] Code: ca 41 89 dc 41 29 cc 45 31 db 29 c2 41 89 c5 89 d0 45 29 c5 f7 d0 c1 e8 1f e9 43 ff ff ff 66 0f 1f 44 00 00 31 c0 e9 7b ff ff ff <0f> 0b eb fe 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 c7 47 40 00 [ 212.676084] RIP [] dql_completed+0x17f/0x190 ------------[ cut here ]------------ When a skb has frags, bytes_compl plus skb->len nr_frags times in cp_tx(). It's not the correct value(actually, it should plus skb->len once) and it will trigger the BUG_ON(bytes_compl > num_queued - dql->num_completed). So only increase bytes_compl when finish sending all frags. pkts_compl also has a wrong value, fix it too. It's introduced by commit 871f0d4c ("8139cp: enable bql"). Suggested-by: Eric Dumazet Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/ethernet/realtek/8139cp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 35014cf56240..200b6becc881 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -678,9 +678,6 @@ static void cp_tx (struct cp_private *cp) le32_to_cpu(txd->opts1) & 0xffff, PCI_DMA_TODEVICE); - bytes_compl += skb->len; - pkts_compl++; - if (status & LastFrag) { if (status & (TxError | TxFIFOUnder)) { netif_dbg(cp, tx_err, cp->dev, @@ -702,6 +699,8 @@ static void cp_tx (struct cp_private *cp) netif_dbg(cp, tx_done, cp->dev, "tx done, slot %d\n", tx_tail); } + bytes_compl += skb->len; + pkts_compl++; dev_kfree_skb_irq(skb); } From 5cddc093c37fa271f5358ccd91c65bdbf93b895f Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 24 Nov 2013 22:36:28 -0800 Subject: [PATCH 34/92] net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST [ Upstream commit d3f7d56a7a4671d395e8af87071068a195257bf6 ] Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag MSG_SENDPAGE_NOTLAST, similar to MSG_MORE. algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() and need to see the new flag as identical to MSG_MORE. This fixes sendfile() on AF_ALG. v3: also fix udp Cc: Tom Herbert Cc: Eric Dumazet Cc: David S. Miller Reported-and-tested-by: Shawn Landden Original-patch: Richard Weinberger Signed-off-by: Shawn Landden Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- crypto/algif_hash.c | 3 +++ crypto/algif_skcipher.c | 3 +++ net/ipv4/udp.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index ef5356cd280a..850246206b12 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, struct hash_ctx *ctx = ask->private; int err; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); sg_init_table(ctx->sgl.sg, 1); sg_set_page(ctx->sgl.sg, page, size, offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6a6dfc062d2a..a19c027b29bd 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_sg_list *sgl; int err = -EINVAL; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); if (!ctx->more && ctx->used) goto unlock; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c8cf2f185b2c..9de564177860 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1045,6 +1045,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; From 513a0e2a2c3f222ca3a7714ffb9491687962b736 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Nov 2013 09:51:22 -0800 Subject: [PATCH 35/92] inet: fix possible seqlock deadlocks [ Upstream commit f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 ] In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left another places where IP_INC_STATS_BH() were improperly used. udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from process context, not from softirq context. This was detected by lockdep seqlock support. Reported-by: jongman heo Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index eeaf5132f490..3d8ddd926cac 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -571,7 +571,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 47b459aa7a26..94b273cecfb6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -176,7 +176,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9de564177860..f9824180f594 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -946,7 +946,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } From 2ea67624ab03402ad34ce03e41619de04e687aac Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 29 Nov 2013 06:39:44 +0100 Subject: [PATCH 36/92] ipv6: fix possible seqlock deadlock in ip6_finish_output2 [ Upstream commit 7f88c6b23afbd31545c676dea77ba9593a1a14bf ] IPv6 stats are 64 bits and thus are protected with a seqlock. By not disabling bottom-half we could deadlock here if we don't disable bh and a softirq reentrantly updates the same mib. Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ac9f072bbae4..09cf37e42f11 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -128,8 +128,8 @@ static int ip6_finish_output2(struct sk_buff *skb) if (neigh) return dst_neigh_output(dst, neigh, skb); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } From 3a5b82bb07592f0c6c81761fbc005c9b0a48f573 Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Sun, 1 Dec 2013 16:28:48 +0800 Subject: [PATCH 37/92] {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation [ Upstream commit 3868204d6b89ea373a273e760609cb08020beb1a ] commit a553e4a6317b2cfc7659542c10fe43184ffe53da ("[PKTGEN]: IPSEC support") tried to support IPsec ESP transport transformation for pktgen, but acctually this doesn't work at all for two reasons(The orignal transformed packet has bad IPv4 checksum value, as well as wrong auth value, reported by wireshark) - After transpormation, IPv4 header total length needs update, because encrypted payload's length is NOT same as that of plain text. - After transformation, IPv4 checksum needs re-caculate because of payload has been changed. With this patch, armmed pktgen with below cofiguration, Wireshark is able to decrypted ESP packet generated by pktgen without any IPv4 checksum error or auth value error. pgset "flag IPSEC" pgset "flows 1" Signed-off-by: Fan Du Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- net/core/pktgen.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e6e1cbe863f5..3659f1ecbd02 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2503,6 +2503,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2524,6 +2526,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; From 3870190fd642d368580a6682a2824f332b4c042c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 5 Nov 2013 19:36:27 +0800 Subject: [PATCH 38/92] crypto: s390 - Fix aes-cbc IV corruption commit f262f0f5cad0c9eca61d1d383e3b67b57dcbe5ea upstream. The cbc-aes-s390 algorithm incorrectly places the IV in the tfm data structure. As the tfm is shared between multiple threads, this introduces a possibility of data corruption. This patch fixes this by moving the parameter block containing the IV and key onto the stack (the block is 48 bytes long). The same bug exists elsewhere in the s390 crypto system and they will be fixed in subsequent patches. Signed-off-by: Herbert Xu Signed-off-by: Kamal Mostafa --- arch/s390/crypto/aes_s390.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b4dbade8ca24..2e4b5be31a1b 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -35,7 +35,6 @@ static u8 *ctrblk; static char keylen_flag; struct s390_aes_ctx { - u8 iv[AES_BLOCK_SIZE]; u8 key[AES_MAX_KEY_SIZE]; long enc; long dec; @@ -441,30 +440,36 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return aes_set_key(tfm, in_key, key_len); } -static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param, +static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, struct blkcipher_walk *walk) { + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; + struct { + u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE]; + } param; if (!nbytes) goto out; - memcpy(param, walk->iv, AES_BLOCK_SIZE); + memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); + memcpy(param.key, sctx->key, sctx->key_len); do { /* only use complete blocks */ unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, param, out, in, n); + ret = crypt_s390_kmc(func, ¶m, out, in, n); if (ret < 0 || ret != n) return -EIO; nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, param, AES_BLOCK_SIZE); + memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); out: return ret; @@ -481,7 +486,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk); + return cbc_aes_crypt(desc, sctx->enc, &walk); } static int cbc_aes_decrypt(struct blkcipher_desc *desc, @@ -495,7 +500,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk); + return cbc_aes_crypt(desc, sctx->dec, &walk); } static struct crypto_alg cbc_aes_alg = { From cf339cce481f6eb92c2d71a1ca768ab415efdb91 Mon Sep 17 00:00:00 2001 From: Girish K S Date: Tue, 16 Apr 2013 14:58:02 +0530 Subject: [PATCH 39/92] ahci: sata: add support for exynos5440 sata commit 1e8f5f761ca3636bb6e281d9dd8adfa887b38eea upstream. This patch adds the compatible string of the exynos5440 sata controller compliant with the ahci 1.3 and sata 3.0 specification. changes in v2: changed the compatible string by adding the actual IP owners name instead of the SoC vendor name. Signed-off-by: Girish K S Signed-off-by: Tejun Heo Signed-off-by: Kamal Mostafa --- drivers/ata/ahci_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 7a8a2841fe64..2daaee05cab1 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -327,6 +327,7 @@ static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_suspend, ahci_resume); static const struct of_device_id ahci_of_match[] = { { .compatible = "snps,spear-ahci", }, + { .compatible = "snps,exynos5440-ahci", }, {}, }; MODULE_DEVICE_TABLE(of, ahci_of_match); From ee89d3ed15ebd49dc23eb3f1552f554a97bb09aa Mon Sep 17 00:00:00 2001 From: Samir Benmendil Date: Sun, 17 Nov 2013 23:56:17 +0100 Subject: [PATCH 40/92] ahci: add Marvell 9230 to the AHCI PCI device list commit 6d5278a68a75891db1df5ae1ecf83d288fc58c65 upstream. Tested with a DAWICONTROL DC-624e on 3.10.10 Signed-off-by: Samir Benmendil Signed-off-by: Tejun Heo Reviewed-by: Levente Kurusa [ kamal: backport to 3.8 ] Signed-off-by: Kamal Mostafa --- drivers/ata/ahci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 2a870e307fcd..6468d4306f99 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -431,6 +431,8 @@ static const struct pci_device_id ahci_pci_tbl[] = { .driver_data = board_ahci_yes_fbs }, /* 88se9172 on some Gigabyte */ { PCI_DEVICE(0x1b4b, 0x91a3), .driver_data = board_ahci_yes_fbs }, + { PCI_DEVICE(0x1b4b, 0x9230), + .driver_data = board_ahci_yes_fbs }, /* Promise */ { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ From 9db608932a12040339bfbceb445e9a6c5aeb53fb Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 25 Nov 2013 11:12:20 +1100 Subject: [PATCH 41/92] powerpc/signals: Improved mark VSX not saved with small contexts fix commit ec67ad82814bee92251fd963bf01c7a173856555 upstream. In a recent patch: commit c13f20ac48328b05cd3b8c19e31ed6c132b44b42 Author: Michael Neuling powerpc/signals: Mark VSX not saved with small contexts We fixed an issue but an improved solution was later discussed after the patch was merged. Firstly, this patch doesn't handle the 64bit signals case, which could also hit this issue (but has never been reported). Secondly, the original patch isn't clear what MSR VSX should be set to. The new approach below always clears the MSR VSX bit (to indicate no VSX is in the context) and sets it only in the specific case where VSX is available (ie. when VSX has been used and the signal context passed has space to provide the state). This reverts the original patch and replaces it with the improved solution. It also adds a 64 bit version. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Kamal Mostafa --- arch/powerpc/kernel/signal_32.c | 16 +++++++--------- arch/powerpc/kernel/signal_64.c | 6 ++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d344c0362312..2069a28ce106 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -437,6 +437,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, #endif /* CONFIG_ALTIVEC */ if (copy_fpr_to_user(&frame->mc_fregs, current)) return 1; + + /* + * Clear the MSR VSX bit to indicate there is no valid state attached + * to this context, except in the specific case below where we set it. + */ + msr &= ~MSR_VSX; #ifdef CONFIG_VSX /* * Copy VSR 0-31 upper half from thread_struct to local @@ -449,15 +455,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; msr |= MSR_VSX; - } else if (!ctx_has_vsx_region) - /* - * With a small context structure we can't hold the VSX - * registers, hence clear the MSR value to indicate the state - * was not saved. - */ - msr &= ~MSR_VSX; - - + } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1ca045d44324..cca30461467d 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -117,6 +117,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, flush_fp_to_thread(current); /* copy fpr regs and fpscr */ err |= copy_fpr_to_user(&sc->fp_regs, current); + + /* + * Clear the MSR VSX bit to indicate there is no valid state attached + * to this context, except in the specific case below where we set it. + */ + msr &= ~MSR_VSX; #ifdef CONFIG_VSX /* * Copy VSX low doubleword to local buffer for formatting, From e103a0fd51da46c164da44c9a8242044cf4dda06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Nov 2013 10:50:19 +0300 Subject: [PATCH 42/92] gpio: mvebu: make mvchip->irqbase signed for error handling commit d535922691fc026479fcc03e78ac3d931a54e75a upstream. There is a bug in mvebu_gpio_probe() where we do: mvchip->irqbase = irq_alloc_descs(-1, 0, ngpios, -1); if (mvchip->irqbase < 0) { The problem is that mvchip->irqbase is unsigned so the error handling doesn't work. I have changed it to be a regular int. Signed-off-by: Dan Carpenter Signed-off-by: Linus Walleij Signed-off-by: Kamal Mostafa --- drivers/gpio/gpio-mvebu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 456663c02997..6300159267e4 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -78,7 +78,7 @@ struct mvebu_gpio_chip { spinlock_t lock; void __iomem *membase; void __iomem *percpu_membase; - unsigned int irqbase; + int irqbase; struct irq_domain *domain; int soc_variant; }; From f521cabce323d67d6efeb1aafb5013f2272e9e24 Mon Sep 17 00:00:00 2001 From: Olav Haugan Date: Fri, 22 Nov 2013 09:30:41 -0800 Subject: [PATCH 43/92] staging: zsmalloc: Ensure handle is never 0 on success commit 67296874eb1cc80317bf2a8fba22b494e21eb29b upstream. zsmalloc encodes a handle using the pfn and an object index. On hardware platforms with physical memory starting at 0x0 the pfn can be 0. This causes the encoded handle to be 0 and is incorrectly interpreted as an allocation failure. This issue affects all current and future SoCs with physical memory starting at 0x0. All MSM8974 SoCs which includes Google Nexus 5 devices are affected. To prevent this false error we ensure that the encoded handle will not be 0 when allocation succeeds. Signed-off-by: Olav Haugan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Kamal Mostafa --- drivers/staging/zsmalloc/zsmalloc-main.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 851a2fff3705..60f77ff0dcb3 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -431,7 +431,12 @@ static struct page *get_next_page(struct page *page) return next; } -/* Encode as a single handle value */ +/* + * Encode as a single handle value. + * On hardware platforms with physical memory starting at 0x0 the pfn + * could be 0 so we ensure that the handle will never be 0 by adjusting the + * encoded obj_idx value before encoding. + */ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) { unsigned long handle; @@ -442,17 +447,21 @@ static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) } handle = page_to_pfn(page) << OBJ_INDEX_BITS; - handle |= (obj_idx & OBJ_INDEX_MASK); + handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); return (void *)handle; } -/* Decode pair from the given object handle */ +/* + * Decode pair from the given object handle. We adjust the + * decoded obj_idx back to its original value since it was adjusted in + * obj_location_to_handle(). + */ static void obj_handle_to_location(unsigned long handle, struct page **page, unsigned long *obj_idx) { *page = pfn_to_page(handle >> OBJ_INDEX_BITS); - *obj_idx = handle & OBJ_INDEX_MASK; + *obj_idx = (handle & OBJ_INDEX_MASK) - 1; } static unsigned long obj_idx_to_offset(struct page *page, From ca27a91c3c1c1d6aa6d6b1b218fb62345ddb6f2b Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 26 Nov 2013 15:17:50 +0800 Subject: [PATCH 44/92] ALSA: hda/realtek - Add support of ALC231 codec commit ba4c4d0a9021ab034554d532a98133d668b87599 upstream. It's compatible with ALC269. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Kamal Mostafa --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 876948fba563..ff053ddd709d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7220,6 +7220,7 @@ static int patch_alc680(struct hda_codec *codec) */ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0221, .name = "ALC221", .patch = patch_alc269 }, + { .id = 0x10ec0231, .name = "ALC231", .patch = patch_alc269 }, { .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 }, { .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 }, { .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 }, From b97ccbb7b3d34e520f06d215a407f266f4127ef5 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 26 Nov 2013 15:41:40 +0800 Subject: [PATCH 45/92] ALSA: hda/realtek - Set pcbeep amp for ALC668 commit 9ad54547cf6f4410eba83bb95dfd2a0966718d6d upstream. Set the missing pcbeep default amp for ALC668. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai Signed-off-by: Kamal Mostafa --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ff053ddd709d..62de6f1451dd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7163,6 +7163,7 @@ static int patch_alc662(struct hda_codec *codec) case 0x10ec0272: case 0x10ec0663: case 0x10ec0665: + case 0x10ec0668: set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT); break; case 0x10ec0273: From e40b4761fbf4e7acba134b0b0257153ae51da030 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 26 Nov 2013 09:22:54 -0500 Subject: [PATCH 46/92] tracing: Allow events to have NULL strings commit 4e58e54754dc1fec21c3a9e824bc108b05fdf46e upstream. If an TRACE_EVENT() uses __assign_str() or __get_str on a NULL pointer then the following oops will happen: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] strlen+0x10/0x1a *pde = 00000000 ^M Oops: 0000 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.13.0-rc1-test+ #2 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006^M task: f5cde9f0 ti: f5e5e000 task.ti: f5e5e000 EIP: 0060:[] EFLAGS: 00210046 CPU: 1 EIP is at strlen+0x10/0x1a EAX: 00000000 EBX: c2472da8 ECX: ffffffff EDX: c2472da8 ESI: c1c5e5fc EDI: 00000000 EBP: f5e5fe84 ESP: f5e5fe80 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 00000000 CR3: 01f32000 CR4: 000007d0 Stack: f5f18b90 f5e5feb8 c10687a8 0759004f 00000005 00000005 00000005 00200046 00000002 00000000 c1082a93 f56c7e28 c2472da8 c1082a93 f5e5fee4 c106bc61^M 00000000 c1082a93 00000000 00000000 00000001 00200046 00200082 00000000 Call Trace: [] ftrace_raw_event_lock+0x39/0xc0 [] ? ktime_get+0x29/0x69 [] ? ktime_get+0x29/0x69 [] lock_release+0x57/0x1a5 [] ? ktime_get+0x29/0x69 [] read_seqcount_begin.constprop.7+0x4d/0x75 [] ? ktime_get+0x29/0x69^M [] ktime_get+0x29/0x69 [] __tick_nohz_idle_enter+0x1e/0x426 [] ? lock_release_holdtime.part.19+0x48/0x4d [] ? time_hardirqs_off+0xe/0x28 [] ? trace_hardirqs_off_caller+0x3f/0xaf [] tick_nohz_idle_enter+0x59/0x62 [] cpu_startup_entry+0x64/0x192 [] start_secondary+0x277/0x27c Code: 90 89 c6 89 d0 88 c4 ac 38 e0 74 09 84 c0 75 f7 be 01 00 00 00 89 f0 48 5e 5d c3 55 89 e5 57 66 66 66 66 90 83 c9 ff 89 c7 31 c0 ae f7 d1 8d 41 ff 5f 5d c3 55 89 e5 57 66 66 66 66 90 31 ff EIP: [] strlen+0x10/0x1a SS:ESP 0068:f5e5fe80 CR2: 0000000000000000 ---[ end trace 01bc47bf519ec1b2 ]--- New tracepoints have been added that have allowed for NULL pointers being assigned to strings. To fix this, change the TRACE_EVENT() code to check for NULL and if it is, it will assign "(null)" to it instead (similar to what glibc printf does). Reported-by: Shuah Khan Reported-by: Jovi Zhangwei Link: http://lkml.kernel.org/r/CAGdX0WFeEuy+DtpsJzyzn0343qEEjLX97+o1VREFkUEhndC+5Q@mail.gmail.com Link: http://lkml.kernel.org/r/528D6972.9010702@samsung.com Fixes: 9cbf117662e2 ("tracing/events: provide string with undefined size support") Signed-off-by: Steven Rostedt Signed-off-by: Kamal Mostafa --- include/trace/ftrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 40dc5e8fe340..17e6221392a3 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -379,7 +379,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) +#define __string(item, src) __dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -504,7 +505,7 @@ static inline notrace int ftrace_get_offsets_##call( \ #undef __assign_str #define __assign_str(dst, src) \ - strcpy(__get_str(dst), src); + strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); #undef TP_fast_assign #define TP_fast_assign(args...) args From 882ac068d2c7c36e22f63730775ebf749991b543 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Nov 2013 09:32:49 -0800 Subject: [PATCH 47/92] Staging: tidspbridge: disable driver commit 930ba4a374b96560ef9fde2145cdc454a164ddcc upstream. There seems to be no active maintainer for the driver, and there is an unfixed security bug, so disable the driver for now. Hopefully someone steps up to be the maintainer, and works to get this out of staging, otherwise it will be deleted soon. Reported-by: Nico Golde Reported-by: Dan Carpenter Cc: Omar Ramirez Luna Cc: Omar Ramirez Luna Cc: Kanigeri, Hari Cc: Ameya Palande Cc: Guzman Lugo, Fernando Cc: Hebbar, Shivananda Cc: Ramos Falcon, Ernesto Cc: Felipe Contreras Cc: Anna, Suman Cc: Gupta, Ramesh Cc: Gomez Castellanos, Ivan Cc: Andy Shevchenko Cc: Armando Uribe De Leon Cc: Deepak Chitriki Cc: Menon, Nishanth Cc: Phil Carmody Cc: Ohad Ben-Cohen Signed-off-by: Greg Kroah-Hartman [ kamal: backport to 3.8 (context) ] Signed-off-by: Kamal Mostafa --- drivers/staging/tidspbridge/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/tidspbridge/Kconfig b/drivers/staging/tidspbridge/Kconfig index 0dd479f5638d..f67c78e50561 100644 --- a/drivers/staging/tidspbridge/Kconfig +++ b/drivers/staging/tidspbridge/Kconfig @@ -4,7 +4,7 @@ menuconfig TIDSPBRIDGE tristate "DSP Bridge driver" - depends on ARCH_OMAP3 + depends on ARCH_OMAP3 && BROKEN select OMAP_MBOX_FWK help DSP/BIOS Bridge is designed for platforms that contain a GPP and From 47588e0b15773b71883a7237f3be3f0bd9f39394 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Nov 2013 15:03:41 +0100 Subject: [PATCH 48/92] cpuset: Fix memory allocator deadlock commit 0fc0287c9ed1ffd3706f8b4d9b314aa102ef1245 upstream. Juri hit the below lockdep report: [ 4.303391] ====================================================== [ 4.303392] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] [ 4.303394] 3.12.0-dl-peterz+ #144 Not tainted [ 4.303395] ------------------------------------------------------ [ 4.303397] kworker/u4:3/689 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 4.303399] (&p->mems_allowed_seq){+.+...}, at: [] new_slab+0x6c/0x290 [ 4.303417] [ 4.303417] and this task is already holding: [ 4.303418] (&(&q->__queue_lock)->rlock){..-...}, at: [] blk_execute_rq_nowait+0x5b/0x100 [ 4.303431] which would create a new lock dependency: [ 4.303432] (&(&q->__queue_lock)->rlock){..-...} -> (&p->mems_allowed_seq){+.+...} [ 4.303436] [ 4.303898] the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: [ 4.303918] -> (&p->mems_allowed_seq){+.+...} ops: 2762 { [ 4.303922] HARDIRQ-ON-W at: [ 4.303923] [] __lock_acquire+0x65a/0x1ff0 [ 4.303926] [] lock_acquire+0x93/0x140 [ 4.303929] [] kthreadd+0x86/0x180 [ 4.303931] [] ret_from_fork+0x7c/0xb0 [ 4.303933] SOFTIRQ-ON-W at: [ 4.303933] [] __lock_acquire+0x68c/0x1ff0 [ 4.303935] [] lock_acquire+0x93/0x140 [ 4.303940] [] kthreadd+0x86/0x180 [ 4.303955] [] ret_from_fork+0x7c/0xb0 [ 4.303959] INITIAL USE at: [ 4.303960] [] __lock_acquire+0x344/0x1ff0 [ 4.303963] [] lock_acquire+0x93/0x140 [ 4.303966] [] kthreadd+0x86/0x180 [ 4.303969] [] ret_from_fork+0x7c/0xb0 [ 4.303972] } Which reports that we take mems_allowed_seq with interrupts enabled. A little digging found that this can only be from cpuset_change_task_nodemask(). This is an actual deadlock because an interrupt doing an allocation will hit get_mems_allowed()->...->__read_seqcount_begin(), which will spin forever waiting for the write side to complete. Cc: John Stultz Cc: Mel Gorman Reported-by: Juri Lelli Signed-off-by: Peter Zijlstra Tested-by: Juri Lelli Acked-by: Li Zefan Acked-by: Mel Gorman Signed-off-by: Tejun Heo Signed-off-by: Kamal Mostafa --- kernel/cpuset.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5bb9bf18438c..aa4ec0185dbc 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -989,8 +989,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, need_loop = task_has_mempolicy(tsk) || !nodes_intersects(*newmems, tsk->mems_allowed); - if (need_loop) + if (need_loop) { + local_irq_disable(); write_seqcount_begin(&tsk->mems_allowed_seq); + } nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); @@ -998,8 +1000,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); tsk->mems_allowed = *newmems; - if (need_loop) + if (need_loop) { write_seqcount_end(&tsk->mems_allowed_seq); + local_irq_enable(); + } task_unlock(tsk); } From 81785c71cc0a2c0c571e71f013d3bcc2ddd33a3c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 28 Nov 2013 15:21:21 +0100 Subject: [PATCH 49/92] ALSA: hda - Initialize missing bass speaker pin for ASUS AIO ET2700 commit 1f0bbf03cb829162ec8e6d03c98aaaed88c6f534 upstream. Add a fixup entry for the missing bass speaker pin 0x16 on ASUS ET2700 AiO desktop. The channel map will be added in the next patch, so that this can be backported easily to stable kernels. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=65961 Signed-off-by: Takashi Iwai Signed-off-by: Kamal Mostafa --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 62de6f1451dd..8a4764afa06f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5080,6 +5080,7 @@ enum { ALC889_FIXUP_IMAC91_VREF, ALC882_FIXUP_INV_DMIC, ALC882_FIXUP_NO_PRIMARY_HP, + ALC887_FIXUP_ASUS_BASS, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -5402,6 +5403,13 @@ static const struct alc_fixup alc882_fixups[] = { .type = ALC_FIXUP_FUNC, .v.func = alc882_fixup_no_primary_hp, }, + [ALC887_FIXUP_ASUS_BASS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + {0x16, 0x99130130}, /* bass speaker */ + {} + }, + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -5435,6 +5443,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1873, "ASUS W90V", ALC882_FIXUP_ASUS_W90V), SND_PCI_QUIRK(0x1043, 0x1971, "Asus W2JC", ALC882_FIXUP_ASUS_W2JC), SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), + SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), From 17d94dc78297c6e961550c132095fe524b615afd Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 10 Dec 2013 09:35:10 -0500 Subject: [PATCH 50/92] NFSv4 wait on recovery for async session errors commit 4a82fd7c4e78a1b7a224f9ae8bb7e1fd95f670e0 upstream. When the state manager is processing the NFS4CLNT_DELEGRETURN flag, session draining is off, but DELEGRETURN can still get a session error. The async handler calls nfs4_schedule_session_recovery returns -EAGAIN, and the DELEGRETURN done then restarts the RPC task in the prepare state. With the state manager still processing the NFS4CLNT_DELEGRETURN flag with session draining off, these DELEGRETURNs will cycle with errors filling up the session slots. This prevents OPEN reclaims (from nfs_delegation_claim_opens) required by the NFS4CLNT_DELEGRETURN state manager processing from completing, hanging the state manager in the __rpc_wait_for_completion_task in nfs4_run_open_task as seen in this kernel thread dump: kernel: 4.12.32.53-ma D 0000000000000000 0 3393 2 0x00000000 kernel: ffff88013995fb60 0000000000000046 ffff880138cc5400 ffff88013a9df140 kernel: ffff8800000265c0 ffffffff8116eef0 ffff88013fc10080 0000000300000001 kernel: ffff88013a4ad058 ffff88013995ffd8 000000000000fbc8 ffff88013a4ad058 kernel: Call Trace: kernel: [] ? cache_alloc_refill+0x1c0/0x240 kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] kernel: [] rpc_wait_bit_killable+0x42/0xa0 [sunrpc] kernel: [] __wait_on_bit+0x5f/0x90 kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] kernel: [] out_of_line_wait_on_bit+0x78/0x90 kernel: [] ? wake_bit_function+0x0/0x50 kernel: [] __rpc_wait_for_completion_task+0x2d/0x30 [sunrpc] kernel: [] nfs4_run_open_task+0x11c/0x160 [nfs] kernel: [] nfs4_open_recover_helper+0x87/0x120 [nfs] kernel: [] nfs4_open_recover+0xc6/0x150 [nfs] kernel: [] ? nfs4_open_recoverdata_alloc+0x2f/0x60 [nfs] kernel: [] nfs4_open_delegation_recall+0x6a/0xa0 [nfs] kernel: [] nfs_end_delegation_return+0x120/0x2e0 [nfs] kernel: [] ? queue_work+0x1f/0x30 kernel: [] nfs_client_return_marked_delegations+0xd7/0x110 [nfs] kernel: [] nfs4_run_state_manager+0x548/0x620 [nfs] kernel: [] ? nfs4_run_state_manager+0x0/0x620 [nfs] kernel: [] kthread+0x96/0xa0 kernel: [] child_rip+0xa/0x20 kernel: [] ? kthread+0x0/0xa0 kernel: [] ? child_rip+0x0/0x20 The state manager can not therefore process the DELEGRETURN session errors. Change the async handler to wait for recovery on session errors. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust [ Andy Adamson: backport to <=3.12 ] Signed-off-by: Kamal Mostafa --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef31b6bf0c58..4568c7a107f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3975,8 +3975,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - task->tk_status = 0; - return -EAGAIN; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); From db19c96cf65df6b5faf70fc58f8d12cce5d1e8a1 Mon Sep 17 00:00:00 2001 From: Ben Segall Date: Wed, 16 Oct 2013 11:16:32 -0700 Subject: [PATCH 51/92] sched: Avoid throttle_cfs_rq() racing with period_timer stopping commit f9f9ffc237dd924f048204e8799da74f9ecf40cf upstream. throttle_cfs_rq() doesn't check to make sure that period_timer is running, and while update_curr/assign_cfs_runtime does, a concurrently running period_timer on another cpu could cancel itself between this cpu's update_curr and throttle_cfs_rq(). If there are no other cfs_rqs running in the tg to restart the timer, this causes the cfs_rq to be stranded forever. Fix this by calling __start_cfs_bandwidth() in throttle if the timer is inactive. (Also add some sched_debug lines for cfs_bandwidth.) Tested: make a run/sleep task in a cgroup, loop switching the cgroup between 1ms/100ms quota and unlimited, checking for timer_active=0 and throttled=1 as a failure. With the throttle_cfs_rq() change commented out this fails, with the full patch it passes. Signed-off-by: Ben Segall Signed-off-by: Peter Zijlstra Cc: pjt@google.com Link: http://lkml.kernel.org/r/20131016181632.22647.84174.stgit@sword-of-the-dawn.mtv.corp.google.com Signed-off-by: Ingo Molnar Signed-off-by: Chris J Arges [ Chris J Arges: backport to 3.8 ] Signed-off-by: Kamal Mostafa --- kernel/sched/debug.c | 10 ++++++++++ kernel/sched/fair.c | 2 ++ 2 files changed, 12 insertions(+) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 7ae4c4c5420e..353902961711 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -231,7 +231,17 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg", atomic_read(&cfs_rq->tg->runnable_avg)); #endif +#endif +#ifdef CONFIG_CFS_BANDWIDTH + SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active", + cfs_rq->tg->cfs_bandwidth.timer_active); + SEQ_printf(m, " .%-30s: %d\n", "throttled", + cfs_rq->throttled); + SEQ_printf(m, " .%-30s: %d\n", "throttle_count", + cfs_rq->throttle_count); +#endif +#ifdef CONFIG_FAIR_GROUP_SCHED print_cfs_group_stats(m, cpu, cfs_rq->tg); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e9ab160eea3a..6c8930e2bfcf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2268,6 +2268,8 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->throttled_clock = rq->clock; raw_spin_lock(&cfs_b->lock); list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (!cfs_b->timer_active) + __start_cfs_bandwidth(cfs_b); raw_spin_unlock(&cfs_b->lock); } From c2a0107c238b1ef46c8c33be512073fbf3aedf3e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 13 Dec 2013 10:06:35 +0100 Subject: [PATCH 52/92] ip6tnl: fix use after free of fb_tnl_dev The upstream commit bb8140947a24 ("ip6tnl: allow to use rtnl ops on fb tunnel") (backported into linux-3.10.y) left a bug which was fixed upstream by commit 1e9f3d6f1c40 ("ip6tnl: fix use after free of fb_tnl_dev"). The problem is a bit different in linux-3.10.y, because there is no x-netns support (upstream commit 0bd8762824e7 ("ip6tnl: add x-netns support")). When ip6_tunnel.ko is unloaded, FB device is deleted by rtnl_link_unregister() and then we try to delete it again in ip6_tnl_destroy_tunnels(). This patch removes the second deletion. Reported-by: Steven Rostedt Suggested-by: Steven Rostedt Signed-off-by: Nicolas Dichtel Cc: David Miller Signed-off-by: Luis Henriques Signed-off-by: Kamal Mostafa --- net/ipv6/ip6_tunnel.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a953bfbc93de..fa3fe7013487 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1725,8 +1725,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) } } - t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); unregister_netdevice_many(&list); } From 5fc9d12fb4a7c3a42a82f63f6597090a66538551 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 12 Dec 2013 11:32:19 +0000 Subject: [PATCH 53/92] staging: comedi: ssv_dnp: use comedi_dio_update_state() commit f6b316bcd8c421acd6fa5a6e18b4c846ecb9d965 upstream. Use comedi_dio_update_state() to handle the boilerplate code to update the subdevice s->state. Also, fix a bug where the state of the channels is returned in data[0]. The comedi core expects it to be returned in data[1]. Signed-off-by: H Hartley Sweeten Reviewed-by: Ian Abbott [ Ian: Applies to all stable kernels from 2.6.32.y to 3.12.y ] Signed-off-by: Luis Henriques Signed-off-by: Kamal Mostafa --- drivers/staging/comedi/drivers/ssv_dnp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ssv_dnp.c b/drivers/staging/comedi/drivers/ssv_dnp.c index afa4016f906a..217d5b68ede0 100644 --- a/drivers/staging/comedi/drivers/ssv_dnp.c +++ b/drivers/staging/comedi/drivers/ssv_dnp.c @@ -87,11 +87,11 @@ static int dnp_dio_insn_bits(struct comedi_device *dev, /* on return, data[1] contains the value of the digital input lines. */ outb(PADR, CSCIR); - data[0] = inb(CSCDR); + data[1] = inb(CSCDR); outb(PBDR, CSCIR); - data[0] += inb(CSCDR) << 8; + data[1] += inb(CSCDR) << 8; outb(PCDR, CSCIR); - data[0] += ((inb(CSCDR) & 0xF0) << 12); + data[1] += ((inb(CSCDR) & 0xF0) << 12); return insn->n; From 88e0c15fdcebb4f6a70c26e9f84abadb29aef888 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 12 Dec 2013 12:10:32 +0000 Subject: [PATCH 54/92] staging: comedi: amplc_pc263: correct insn_bits result commit 97f4289ad08cffe55de06d4ac4f89ac540450aee upstream. [Split from original patch subject: "staging: comedi: drivers: use comedi_dio_update_state() for simple cases"] Also, fix a bug in the amplc_pc263 and amplc_pci263 drivers where the current state is not returned in data[1]. Signed-off-by: Ian Abbott Signed-off-by: Kamal Mostafa --- drivers/staging/comedi/drivers/amplc_pc263.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/comedi/drivers/amplc_pc263.c b/drivers/staging/comedi/drivers/amplc_pc263.c index dfbff77cd795..53b2ad902485 100644 --- a/drivers/staging/comedi/drivers/amplc_pc263.c +++ b/drivers/staging/comedi/drivers/amplc_pc263.c @@ -192,6 +192,9 @@ static int pc263_do_insn_bits(struct comedi_device *dev, outb(s->state & 0xFF, dev->iobase); outb(s->state >> 8, dev->iobase + 1); } + + data[1] = s->state; + return insn->n; } From d9fd5fca2ff6404d897ef9c4089baf6c07aae9aa Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 12 Dec 2013 13:20:50 +0000 Subject: [PATCH 55/92] staging: comedi: pcmuio: fix possible NULL deref on detach commit 2fd2bdfccae61efe18f6b92b6a45fbf936d75b48 upstream. pcmuio_detach() is called by the comedi core even if pcmuio_attach() returned an error, so `dev->private` might be `NULL`. Check for that before dereferencing it. Also, as pointed out by Dan Carpenter, there is no need to check the pointer passed to `kfree()` is non-NULL, so remove that check. Signed-off-by: Ian Abbott Signed-off-by: Kamal Mostafa --- drivers/staging/comedi/drivers/pcmuio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/staging/comedi/drivers/pcmuio.c b/drivers/staging/comedi/drivers/pcmuio.c index 31ea20c2d39e..2c44647cd3d0 100644 --- a/drivers/staging/comedi/drivers/pcmuio.c +++ b/drivers/staging/comedi/drivers/pcmuio.c @@ -908,12 +908,13 @@ static void pcmuio_detach(struct comedi_device *dev) if (dev->iobase) release_region(dev->iobase, ASIC_IOSIZE * board->num_asics); - for (i = 0; i < MAX_ASICS; ++i) { - if (devpriv->asics[i].irq) - free_irq(devpriv->asics[i].irq, dev); - } - if (devpriv && devpriv->sprivs) + if (devpriv) { + for (i = 0; i < MAX_ASICS; ++i) { + if (devpriv->asics[i].irq) + free_irq(devpriv->asics[i].irq, dev); + } kfree(devpriv->sprivs); + } } static const struct pcmuio_board pcmuio_boards[] = { From e94b8b61cbb4f830bc9963f1f52d6c82b2a3c46b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 4 Oct 2013 11:01:43 -0300 Subject: [PATCH 56/92] [media] cxd2820r_core: fix sparse warnings commit 0db3fa2741ad8371c21b3a6785416a4afc0cc1d4 upstream. drivers/media/dvb-frontends/cxd2820r_core.c:34:32: error: cannot size expression drivers/media/dvb-frontends/cxd2820r_core.c:68:32: error: cannot size expression Signed-off-by: Hans Verkuil Acked-by: Antti Palosaari Reviewed-by: Antti Palosaari Reviewed-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab Cc: Frederik Himpe Signed-off-by: Luis Henriques Signed-off-by: Kamal Mostafa --- drivers/media/dvb-frontends/cxd2820r_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c index 9b658c1cf39a..52323decb2bf 100644 --- a/drivers/media/dvb-frontends/cxd2820r_core.c +++ b/drivers/media/dvb-frontends/cxd2820r_core.c @@ -31,7 +31,7 @@ static int cxd2820r_wr_regs_i2c(struct cxd2820r_priv *priv, u8 i2c, u8 reg, { .addr = i2c, .flags = 0, - .len = sizeof(buf), + .len = len + 1, .buf = buf, } }; @@ -65,7 +65,7 @@ static int cxd2820r_rd_regs_i2c(struct cxd2820r_priv *priv, u8 i2c, u8 reg, }, { .addr = i2c, .flags = I2C_M_RD, - .len = sizeof(buf), + .len = len, .buf = buf, } }; From f78a6f780bfa3bdd192f3849380302fb9fc97533 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 28 Nov 2013 12:48:09 +0000 Subject: [PATCH 57/92] xen-netback: fix refcnt unbalance for 3.11 and earlier versions With the introduction of "xen-netback: Don't destroy the netdev until the vif is shut down" (upstream commit id 279f438e36), vif disconnect and free are separated. However in the backported verion reference counting code was not correctly modified, and the reset of vif->tx_irq was lost. If frontend goes through vif life cycle more than once the reference counting is skewed. This patch adds back the missing tx_irq reset line. It also moves several lines of the reference counting code to vif_free, so the moved code corresponds to the counterpart in vif_alloc, thus the reference counting is balanced. 3.12 and onward versions are not affected by this bug, because reference counting code was removed due to the introduction of 1:1 model. This pacth should be backported to all stable verions which are lower than 3.12 and have 279f438e36. Reported-and-tested-by: Tomasz Wroblewski Signed-off-by: Wei Liu Cc: Ian Campbell Cc: Konrad Wilk Cc: David Vrabel [ kamal: backport to 3.8 ] Signed-off-by: Kamal Mostafa --- drivers/net/xen-netback/interface.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index c63cbd0f3873..b3cbaa98e434 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -363,17 +363,19 @@ void xenvif_disconnect(struct xenvif *vif) if (netif_carrier_ok(vif->dev)) xenvif_carrier_off(vif); - atomic_dec(&vif->refcnt); - wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0); - - if (vif->irq) + if (vif->irq) { unbind_from_irqhandler(vif->irq, vif); + vif->irq = 0; + } xen_netbk_unmap_frontend_rings(vif); } void xenvif_free(struct xenvif *vif) { + atomic_dec(&vif->refcnt); + wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0); + unregister_netdev(vif->dev); free_netdev(vif->dev); From 51c575a14f672b33506776f391078ea47b19a66e Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 18 Dec 2013 17:08:59 -0800 Subject: [PATCH 58/92] mm/hugetlb: check for pte NULL pointer in __page_check_address() commit 98398c32f6687ee1e1f3ae084effb4b75adb0747 upstream. In __page_check_address(), if address's pud is not present, huge_pte_offset() will return NULL, we should check the return value. Signed-off-by: Jianguo Wu Cc: Naoya Horiguchi Cc: Mel Gorman Cc: qiuxishi Cc: Hanjun Guo Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Kamal Mostafa --- mm/rmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/rmap.c b/mm/rmap.c index 92b4529ad541..c2088370392e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -600,7 +600,11 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, spinlock_t *ptl; if (unlikely(PageHuge(page))) { + /* when pud is not present, pte will be NULL */ pte = huge_pte_offset(mm, address); + if (!pte) + return NULL; + ptl = &mm->page_table_lock; goto check; } From ddb6a2c0fbe2a913241336318fda2b4bf5da0502 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Nov 2013 16:34:14 -0500 Subject: [PATCH 59/92] NFSv4: Update list of irrecoverable errors on DELEGRETURN commit c97cf606e43b85a6cf158b810375dd77312024db upstream. If the DELEGRETURN errors out with something like NFS4ERR_BAD_STATEID then there is no recovery possible. Just quit without returning an error. Also, note that the client must not assume that the NFSv4 lease has been renewed when it sees an error on DELEGRETURN. Signed-off-by: Trond Myklebust Signed-off-by: Kamal Mostafa --- fs/nfs/nfs4proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4568c7a107f8..3125a9e9a2fb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4160,11 +4160,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) return; switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + task->tk_status = 0; + break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { From 4654a4adc09fed01e37b03d15ed6688bc08ef490 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Wed, 20 Nov 2013 10:35:05 -0700 Subject: [PATCH 60/92] Update of blkg_stat and blkg_rwstat may happen in bh context. While u64_stats_fetch_retry is only preempt_disable on 32bit UP system. This is not enough to avoid preemption by bh and may read strange 64 bit value. commit 2c575026fae6e63771bd2a4c1d407214a8096a89 upstream. Signed-off-by: Hong Zhiguo Acked-by: Tejun Heo Signed-off-by: Jens Axboe Signed-off-by: Kamal Mostafa --- block/blk-cgroup.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 24597309e23d..6f33c3f5f25e 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -370,9 +370,9 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat) uint64_t v; do { - start = u64_stats_fetch_begin(&stat->syncp); + start = u64_stats_fetch_begin_bh(&stat->syncp); v = stat->cnt; - } while (u64_stats_fetch_retry(&stat->syncp, start)); + } while (u64_stats_fetch_retry_bh(&stat->syncp, start)); return v; } @@ -426,9 +426,9 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) struct blkg_rwstat tmp; do { - start = u64_stats_fetch_begin(&rwstat->syncp); + start = u64_stats_fetch_begin_bh(&rwstat->syncp); tmp = *rwstat; - } while (u64_stats_fetch_retry(&rwstat->syncp, start)); + } while (u64_stats_fetch_retry_bh(&rwstat->syncp, start)); return tmp; } From 4a6b21cdcab8f4b83f8ca0c000a39920a82d7f3a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 22 Nov 2013 11:44:51 -0800 Subject: [PATCH 61/92] time: Fix 1ns/tick drift w/ GENERIC_TIME_VSYSCALL_OLD commit 4be77398ac9d948773116b6be4a3c91b3d6ea18c upstream. Since commit 1e75fa8be9f (time: Condense timekeeper.xtime into xtime_sec - merged in v3.6), there has been an problem with the error accounting in the timekeeping code, such that when truncating to nanoseconds, we round up to the next nsec, but the balancing adjustment to the ntp_error value was dropped. This causes 1ns per tick drift forward of the clock. In 3.7, this logic was isolated to only GENERIC_TIME_VSYSCALL_OLD architectures (s390, ia64, powerpc). The fix is simply to balance the accounting and to subtract the added nanosecond from ntp_error. This allows the internal long-term clock steering to keep the clock accurate. While this fix removes the regression added in 1e75fa8be9f, the ideal solution is to move away from GENERIC_TIME_VSYSCALL_OLD and use the new VSYSCALL method, which avoids entirely the nanosecond granular rounding, and the resulting short-term clock adjustment oscillation needed to keep long term accurate time. [ jstultz: Many thanks to Martin for his efforts identifying this subtle bug, and providing the fix. ] Originally-from: Martin Schwidefsky Cc: Tony Luck Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Andy Lutomirski Cc: Paul Turner Cc: Steven Rostedt Cc: Richard Cochran Cc: Prarit Bhargava Cc: Fenghua Yu Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1385149491-20307-1-git-send-email-john.stultz@linaro.org Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: Kamal Mostafa --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cbc6acb0db3f..16c444d51488 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1127,7 +1127,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) tk->xtime_nsec -= remainder; tk->xtime_nsec += 1ULL << tk->shift; tk->ntp_error += remainder << tk->ntp_error_shift; - + tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift; } #else #define old_vsyscall_fixup(tk) From 369b78179b3e4dc70b129b8a04eb9db2a0f118d9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Nov 2013 14:17:18 +0000 Subject: [PATCH 62/92] ASoC: wm8990: Mark the register map as dirty when powering down commit 2ab2b74277a86afe0dd92976db695a2bb8b93366 upstream. Otherwise we'll skip sync on resume. Signed-off-by: Mark Brown Acked-by: Charles Keepax Signed-off-by: Kamal Mostafa --- sound/soc/codecs/wm8990.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index 837978e16e9d..ded9ed854a1f 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -1264,6 +1264,8 @@ static int wm8990_set_bias_level(struct snd_soc_codec *codec, /* disable POBCTRL, SOFT_ST and BUFDCOPEN */ snd_soc_write(codec, WM8990_ANTIPOP2, 0x0); + + codec->cache_sync = 1; break; } From 5f1c1cbda929884ed99cfccc48998cce13dd1a24 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 25 Nov 2013 17:26:46 +0100 Subject: [PATCH 63/92] ARM: mvebu: use the virtual CPU registers to access coherency registers commit b6dda00cddcc71d2030668bc0cc0fed758c411c2 upstream. The Armada XP provides a mechanism called "virtual CPU registers" or "per-CPU register banking", to access the per-CPU registers of the current CPU, without having to worry about finding on which CPU we're running. CPU0 has its registers at 0x21800, CPU1 at 0x21900, CPU2 at 0x21A00 and CPU3 at 0x21B00. The virtual registers accessing the current CPU registers are at 0x21000. However, in the Device Tree node that provides the register addresses for the coherency unit (which is responsible for ensuring coherency between processors, and I/O coherency between processors and the DMA-capable devices), a mistake was made: the CPU0-specific registers were specified instead of the virtual CPU registers. This means that the coherency barrier needed for I/O coherency was not behaving properly when executed from a CPU different from CPU0. This patch fixes that by using the virtual CPU registers. Signed-off-by: Gregory CLEMENT Signed-off-by: Thomas Petazzoni Fixes: e60304f8cb7bb5 "arm: mvebu: Add hardware I/O Coherency support" Signed-off-by: Jason Cooper [ kamal: backport to 3.8 (context) ] Signed-off-by: Kamal Mostafa --- arch/arm/boot/dts/armada-370-xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 4c0abe85405f..269b6b037865 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -39,7 +39,7 @@ coherency-fabric@d0020200 { compatible = "marvell,coherency-fabric"; reg = <0xd0020200 0xb0>, - <0xd0021810 0x1c>; + <0xd0021010 0x1c>; }; soc { From 4fc40d8e76d0c4b2319f4a6ef1baf6a47e8a4f2c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 21 Nov 2013 18:03:07 +0100 Subject: [PATCH 64/92] can: sja1000: fix {pre,post}_irq() handling and IRQ handler return value commit 2fea6cd303c0d0cd9067da31d873b6a6d5bd75e7 upstream. This patch fixes the issue that the sja1000_interrupt() function may have returned IRQ_NONE without processing the optional pre_irq() and post_irq() function before. Further the irq processing counter 'n' is moved to the end of the while statement to return correct IRQ_[NONE|HANDLED] values at error conditions. Reported-by: Wolfgang Grandegger Acked-by: Wolfgang Grandegger Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde [ kamal: backport to 3.8 (renamed constant) ] Signed-off-by: Kamal Mostafa --- drivers/net/can/sja1000/sja1000.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 43921f91411d..8eb5a321d159 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -490,19 +490,19 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) uint8_t isrc, status; int n = 0; + if (priv->pre_irq) + priv->pre_irq(priv); + /* Shared interrupts and IRQ off? */ if (priv->read_reg(priv, REG_IER) == IRQ_OFF) return IRQ_NONE; - if (priv->pre_irq) - priv->pre_irq(priv); - while ((isrc = priv->read_reg(priv, REG_IR)) && (n < SJA1000_MAX_IRQ)) { - n++; + status = priv->read_reg(priv, SJA1000_REG_SR); /* check for absent controller due to hw unplug */ if (status == 0xFF && sja1000_is_absent(priv)) - return IRQ_NONE; + goto out; if (isrc & IRQ_WUI) netdev_warn(dev, "wakeup interrupt\n"); @@ -529,7 +529,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) status = priv->read_reg(priv, SJA1000_REG_SR); /* check for absent controller */ if (status == 0xFF && sja1000_is_absent(priv)) - return IRQ_NONE; + goto out; } } if (isrc & (IRQ_DOI | IRQ_EI | IRQ_BEI | IRQ_EPI | IRQ_ALI)) { @@ -537,8 +537,9 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) if (sja1000_err(dev, isrc, status)) break; } + n++; } - +out: if (priv->post_irq) priv->post_irq(priv); From 87e47ba3e35988dc43d554f4b9f16c83ecb1bacd Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 24 Nov 2013 23:31:24 +0100 Subject: [PATCH 65/92] can: c_can: don't call pm_runtime_get_sync() from interrupt context commit e35d46adc49b469fd92bdb64fea8af93640e6651 upstream. The c_can driver contians a callpath (c_can_poll -> c_can_state_change -> c_can_get_berr_counter) which may call pm_runtime_get_sync() from the IRQ handler, which is not allowed and results in "BUG: scheduling while atomic". This problem is fixed by introducing __c_can_get_berr_counter, which will not call pm_runtime_get_sync(). Reported-by: Andrew Glen Tested-by: Andrew Glen Signed-off-by: Andrew Glen Signed-off-by: Marc Kleine-Budde Signed-off-by: Kamal Mostafa --- drivers/net/can/c_can/c_can.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 915847d19a37..aa92283d9e82 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -709,22 +709,31 @@ static int c_can_set_mode(struct net_device *dev, enum can_mode mode) return 0; } -static int c_can_get_berr_counter(const struct net_device *dev, - struct can_berr_counter *bec) +static int __c_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) { unsigned int reg_err_counter; struct c_can_priv *priv = netdev_priv(dev); - c_can_pm_runtime_get_sync(priv); - reg_err_counter = priv->read_reg(priv, C_CAN_ERR_CNT_REG); bec->rxerr = (reg_err_counter & ERR_CNT_REC_MASK) >> ERR_CNT_REC_SHIFT; bec->txerr = reg_err_counter & ERR_CNT_TEC_MASK; + return 0; +} + +static int c_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) +{ + struct c_can_priv *priv = netdev_priv(dev); + int err; + + c_can_pm_runtime_get_sync(priv); + err = __c_can_get_berr_counter(dev, bec); c_can_pm_runtime_put_sync(priv); - return 0; + return err; } /* @@ -868,7 +877,7 @@ static int c_can_handle_state_change(struct net_device *dev, if (unlikely(!skb)) return 0; - c_can_get_berr_counter(dev, &bec); + __c_can_get_berr_counter(dev, &bec); reg_err_counter = priv->read_reg(priv, C_CAN_ERR_CNT_REG); rx_err_passive = (reg_err_counter & ERR_CNT_RP_MASK) >> ERR_CNT_RP_SHIFT; From 6068b4981b340b99f3e02eca723a060dba60e8cc Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Mon, 25 Nov 2013 19:39:47 +0530 Subject: [PATCH 66/92] irq: Enable all irqs unconditionally in irq_resume commit ac01810c9d2814238f08a227062e66a35a0e1ea2 upstream. When the system enters suspend, it disables all interrupts in suspend_device_irqs(), including the interrupts marked EARLY_RESUME. On the resume side things are different. The EARLY_RESUME interrupts are reenabled in sys_core_ops->resume and the non EARLY_RESUME interrupts are reenabled in the normal system resume path. When suspend_noirq() failed or suspend is aborted for any other reason, we might omit the resume side call to sys_core_ops->resume() and therefor the interrupts marked EARLY_RESUME are not reenabled and stay disabled forever. To solve this, enable all irqs unconditionally in irq_resume() regardless whether interrupts marked EARLY_RESUMEhave been already enabled or not. This might try to reenable already enabled interrupts in the non failure case, but the only affected platform is XEN and it has been confirmed that it does not cause any side effects. [ tglx: Massaged changelog. ] Signed-off-by: Laxman Dewangan Acked-by-and-tested-by: Konrad Rzeszutek Wilk Acked-by: Heiko Stuebner Reviewed-by: Pavel Machek Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1385388587-16442-1-git-send-email-ldewangan@nvidia.com Signed-off-by: Thomas Gleixner Signed-off-by: Kamal Mostafa --- kernel/irq/pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index cb228bf21760..abcd6ca86cb7 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -50,7 +50,7 @@ static void resume_irqs(bool want_early) bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; - if (is_early != want_early) + if (!is_early && want_early) continue; raw_spin_lock_irqsave(&desc->lock, flags); From 699f24ad32342f64f51df98521131ab2de25a612 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 25 Nov 2013 22:15:20 +0100 Subject: [PATCH 67/92] can: flexcan: use correct clock as base for bit rate calculation commit 1a3e5173f5e72cbf7f0c8927b33082e361c16d72 upstream. The flexcan IP core uses the peripheral clock ("per") as basic clock for the bit timing calculation. However the driver uses the the wrong clock ("ipg"). This leads to wrong bit rates if the rates on both clock are different. This patch fixes the problem by using the correct clock for the bit rate calculation. Signed-off-by: Marc Kleine-Budde Signed-off-by: Kamal Mostafa --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 4c90a27de6e6..b12fce686bc6 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1010,7 +1010,6 @@ static int flexcan_probe(struct platform_device *pdev) err = PTR_ERR(clk_ipg); goto failed_clock; } - clock_freq = clk_get_rate(clk_ipg); clk_per = devm_clk_get(&pdev->dev, "per"); if (IS_ERR(clk_per)) { @@ -1018,6 +1017,7 @@ static int flexcan_probe(struct platform_device *pdev) err = PTR_ERR(clk_per); goto failed_clock; } + clock_freq = clk_get_rate(clk_per); } mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); From f8fab7bbd1aa39a684fb212cd8721e8896e779f4 Mon Sep 17 00:00:00 2001 From: Matt Wilson Date: Wed, 20 Nov 2013 12:11:35 -0800 Subject: [PATCH 68/92] xen/gnttab: leave lazy MMU mode in the case of a m2p override failure commit 14883a75ec76b44759385fb12629f4a0f1aef4e3 upstream. Commit f62805f1 introduced a bug where lazy MMU mode isn't exited if a m2p_add/remove_override call fails. Acked-by: Stefano Stabellini Cc: Boris Ostrovsky Reviewed-by: David Vrabel Reviewed-by: Anthony Liguori Cc: xen-devel@lists.xenproject.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Matt Wilson Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Kamal Mostafa --- drivers/xen/grant-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 51be226b0f20..b146bfdbdf6f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -920,9 +920,10 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, ret = m2p_add_override(mfn, pages[i], kmap_ops ? &kmap_ops[i] : NULL); if (ret) - return ret; + goto out; } + out: if (lazy) arch_leave_lazy_mmu_mode(); @@ -953,9 +954,10 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, ret = m2p_remove_override(pages[i], kmap_ops ? &kmap_ops[i] : NULL); if (ret) - return ret; + goto out; } + out: if (lazy) arch_leave_lazy_mmu_mode(); From 832f8486e8087a057c8fd4729d6004ab6b698446 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 26 Nov 2013 15:03:36 -0800 Subject: [PATCH 69/92] ARM: OMAP2+: Disable POSTED mode for errata i103 and i767 commit 912e663127c5f700da6805ce21635dce703cb2cc upstream. Enabling of Posted mode is seen to cause problems on dmtimer modules on AM33xx (much like other OMAPs). Reference discussions on forums [1] [2]. Earlier patch solving this on other OMAPs [3]. For OMAP SoCs with this errata, the fix has been to not enable Posted mode. However, on some SoCs (atleast AM33xx) which carry this errata, Posted mode is enabled on reset. So we not only need to ignore enabling of the POSTED bit when the timer is requested, but also disable Posted mode if errata is present. [1] http://e2e.ti.com/support/arm/sitara_arm/f/791/t/285744.aspx [2] http://e2e.ti.com/support/arm/sitara_arm/f/791/t/270632.aspx [3] http://www.spinics.net/lists/linux-omap/msg81770.html Reported-by: Russ Dill Cc: Santosh Shilimkar Signed-off-by: Joel Fernandes Signed-off-by: Tony Lindgren Signed-off-by: Kamal Mostafa --- arch/arm/plat-omap/include/plat/dmtimer.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/include/plat/dmtimer.h b/arch/arm/plat-omap/include/plat/dmtimer.h index a3fbc48c332e..31e1715d43f3 100644 --- a/arch/arm/plat-omap/include/plat/dmtimer.h +++ b/arch/arm/plat-omap/include/plat/dmtimer.h @@ -335,8 +335,11 @@ static inline void __omap_dm_timer_enable_posted(struct omap_dm_timer *timer) if (timer->posted) return; - if (timer->errata & OMAP_TIMER_ERRATA_I103_I767) + if (timer->errata & OMAP_TIMER_ERRATA_I103_I767) { + timer->posted = OMAP_TIMER_NONPOSTED; + __omap_dm_timer_write(timer, OMAP_TIMER_IF_CTRL_REG, 0, 0); return; + } __omap_dm_timer_write(timer, OMAP_TIMER_IF_CTRL_REG, OMAP_TIMER_CTRL_POSTED, 0); From 4b20734087f9ab5ea442dc48976a70eae9261fab Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Oct 2013 18:35:19 -0700 Subject: [PATCH 70/92] [SCSI] libsas: fix usage of ata_tf_to_fis commit ae5fbae0ccd982dfca0ce363036ed92f5b13f150 upstream. Since commit 110dd8f19df5 "[SCSI] libsas: fix scr_read/write users and update the libata documentation" we have been passing pmp=1 and is_cmd=0 to ata_tf_to_fis(). Praveen reports that eSATA attached drives do not discover correctly. His investigation found that the BIOS was passing pmp=0 while Linux was passing pmp=1 and failing to discover the drives. Update libsas to follow the libata example of pulling the pmp setting from the ata_link and correct is_cmd to be 1 since all tf's submitted through ->qc_issue are commands. Presumably libsas lldds do not care about is_cmd as they have sideband mechanisms to perform link management. http://marc.info/?l=linux-scsi&m=138179681726990 [jejb: checkpatch fix] Signed-off-by: Dan Williams Reported-by: Praveen Murali Tested-by: Praveen Murali Signed-off-by: James Bottomley Signed-off-by: Kamal Mostafa --- drivers/scsi/libsas/sas_ata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index bdb81cda8401..0cc7886e262b 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -211,7 +211,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) qc->tf.nsect = 0; } - ata_tf_to_fis(&qc->tf, 1, 0, (u8*)&task->ata_task.fis); + ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, (u8 *)&task->ata_task.fis); task->uldd_task = qc; if (ata_is_atapi(qc->tf.protocol)) { memcpy(task->ata_task.atapi_packet, qc->cdb, qc->dev->cdb_len); From 5db56f7858a2d41c33ccd118d9dc6c400022c453 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 12 Nov 2013 11:46:04 -0600 Subject: [PATCH 71/92] crypto: authenc - Find proper IV address in ablkcipher callback commit fc019c7122dfcd69c50142b57a735539aec5da95 upstream. When performing an asynchronous ablkcipher operation the authenc completion callback routine is invoked, but it does not locate and use the proper IV. The callback routine, crypto_authenc_encrypt_done, is updated to use the same method of calculating the address of the IV as is done in crypto_authenc_encrypt function which sets up the callback. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Kamal Mostafa --- crypto/authenc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index d0583a4489e6..a8aaae679f20 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -368,9 +368,10 @@ static void crypto_authenc_encrypt_done(struct crypto_async_request *req, if (!err) { struct crypto_aead *authenc = crypto_aead_reqtfm(areq); struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc); - struct ablkcipher_request *abreq = aead_request_ctx(areq); - u8 *iv = (u8 *)(abreq + 1) + - crypto_ablkcipher_reqsize(ctx->enc); + struct authenc_request_ctx *areq_ctx = aead_request_ctx(areq); + struct ablkcipher_request *abreq = (void *)(areq_ctx->tail + + ctx->reqoff); + u8 *iv = (u8 *)abreq - crypto_ablkcipher_ivsize(ctx->enc); err = crypto_authenc_genicv(areq, iv, 0); } From ef845ea48f30c4475934b9e71b5ca3186719e458 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 12 Nov 2013 11:46:10 -0600 Subject: [PATCH 72/92] crypto: scatterwalk - Set the chain pointer indication bit commit 41da8b5adba77e22584f8b45f9641504fa885308 upstream. The scatterwalk_crypto_chain function invokes the scatterwalk_sg_chain function to chain two scatterlists, but the chain pointer indication bit is not set. When the resulting scatterlist is used, for example, by sg_nents to count the number of scatterlist entries, a segfault occurs because sg_nents does not follow the chain pointer to the chained scatterlist. Update scatterwalk_sg_chain to set the chain pointer indication bit as is done by the sg_chain function. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu Signed-off-by: Kamal Mostafa --- include/crypto/scatterwalk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 3744d2a642df..09ef1a0c5499 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -36,6 +36,7 @@ static inline void scatterwalk_sg_chain(struct scatterlist *sg1, int num, { sg_set_page(&sg1[num - 1], (void *)sg2, 0, 0); sg1[num - 1].page_link &= ~0x02; + sg1[num - 1].page_link |= 0x01; } static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg) From d1fdbda26e16fcd32631fd435d15b173e1ed1c95 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 19 Nov 2013 17:12:47 +0100 Subject: [PATCH 73/92] crypto: s390 - Fix aes-xts parameter corruption commit 9dda2769af4f3f3093434648c409bb351120d9e8 upstream. Some s390 crypto algorithms incorrectly use the crypto_tfm structure to store private data. As the tfm can be shared among multiple threads, this can result in data corruption. This patch fixes aes-xts by moving the xts and pcc parameter blocks from the tfm onto the stack (48 + 96 bytes). Signed-off-by: Gerald Schaefer Signed-off-by: Herbert Xu Signed-off-by: Kamal Mostafa --- arch/s390/crypto/aes_s390.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 2e4b5be31a1b..94e20dd2729f 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -55,8 +55,7 @@ struct pcc_param { struct s390_xts_ctx { u8 key[32]; - u8 xts_param[16]; - struct pcc_param pcc; + u8 pcc_key[32]; long enc; long dec; int key_len; @@ -591,7 +590,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, xts_ctx->enc = KM_XTS_128_ENCRYPT; xts_ctx->dec = KM_XTS_128_DECRYPT; memcpy(xts_ctx->key + 16, in_key, 16); - memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16); + memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); break; case 48: xts_ctx->enc = 0; @@ -602,7 +601,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, xts_ctx->enc = KM_XTS_256_ENCRYPT; xts_ctx->dec = KM_XTS_256_DECRYPT; memcpy(xts_ctx->key, in_key, 32); - memcpy(xts_ctx->pcc.key, in_key + 32, 32); + memcpy(xts_ctx->pcc_key, in_key + 32, 32); break; default: *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; @@ -621,29 +620,33 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, unsigned int nbytes = walk->nbytes; unsigned int n; u8 *in, *out; - void *param; + struct pcc_param pcc_param; + struct { + u8 key[32]; + u8 init[16]; + } xts_param; if (!nbytes) goto out; - memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block)); - memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit)); - memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts)); - memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak)); - param = xts_ctx->pcc.key + offset; - ret = crypt_s390_pcc(func, param); + memset(pcc_param.block, 0, sizeof(pcc_param.block)); + memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); + memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + memcpy(pcc_param.key, xts_ctx->pcc_key, 32); + ret = crypt_s390_pcc(func, &pcc_param.key[offset]); if (ret < 0) return -EIO; - memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16); - param = xts_ctx->key + offset; + memcpy(xts_param.key, xts_ctx->key, 32); + memcpy(xts_param.init, pcc_param.xts, 16); do { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = crypt_s390_km(func, param, out, in, n); + ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n); if (ret < 0 || ret != n) return -EIO; From cc1cebf8678e6014b12719152fb16f2182822ced Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Thu, 28 Nov 2013 15:11:15 +0200 Subject: [PATCH 74/92] crypto: ccm - Fix handling of zero plaintext when computing mac commit 5638cabf3e4883f38dfb246c30980cebf694fbda upstream. There are cases when cryptlen can be zero in crypto_ccm_auth(): -encryptiom: input scatterlist length is zero (no plaintext) -decryption: input scatterlist contains only the mac plus the condition of having different source and destination buffers (or else scatterlist length = max(plaintext_len, ciphertext_len)). These are not handled correctly, leading to crashes like: root@p4080ds:~/crypto# insmod tcrypt.ko mode=45 ------------[ cut here ]------------ kernel BUG at crypto/scatterwalk.c:37! Oops: Exception in kernel mode, sig: 5 [#1] SMP NR_CPUS=8 P4080 DS Modules linked in: tcrypt(+) crc32c xts xcbc vmac pcbc ecb gcm ghash_generic gf128mul ccm ctr seqiv CPU: 3 PID: 1082 Comm: cryptomgr_test Not tainted 3.11.0 #14 task: ee12c5b0 ti: eecd0000 task.ti: eecd0000 NIP: c0204d98 LR: f9225848 CTR: c0204d80 REGS: eecd1b70 TRAP: 0700 Not tainted (3.11.0) MSR: 00029002 CR: 22044022 XER: 20000000 GPR00: f9225c94 eecd1c20 ee12c5b0 eecd1c28 ee879400 ee879400 00000000 ee607464 GPR08: 00000001 00000001 00000000 006b0000 c0204d80 00000000 00000002 c0698e20 GPR16: ee987000 ee895000 fffffff4 ee879500 00000100 eecd1d58 00000001 00000000 GPR24: ee879400 00000020 00000000 00000000 ee5b2800 ee607430 00000004 ee607460 NIP [c0204d98] scatterwalk_start+0x18/0x30 LR [f9225848] get_data_to_compute+0x28/0x2f0 [ccm] Call Trace: [eecd1c20] [f9225974] get_data_to_compute+0x154/0x2f0 [ccm] (unreliable) [eecd1c70] [f9225c94] crypto_ccm_auth+0x184/0x1d0 [ccm] [eecd1cb0] [f9225d40] crypto_ccm_encrypt+0x60/0x2d0 [ccm] [eecd1cf0] [c020d77c] __test_aead+0x3ec/0xe20 [eecd1e20] [c020f35c] test_aead+0x6c/0xe0 [eecd1e40] [c020f420] alg_test_aead+0x50/0xd0 [eecd1e60] [c020e5e4] alg_test+0x114/0x2e0 [eecd1ee0] [c020bd1c] cryptomgr_test+0x4c/0x60 [eecd1ef0] [c0047058] kthread+0xa8/0xb0 [eecd1f40] [c000eb0c] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 0f080000 81290024 552807fe 0f080000 5529003a 4bffffb4 90830000 39400000 39000001 8124000c 2f890000 7d28579e <0f090000> 81240008 91230004 4e800020 ---[ end trace 6d652dfcd1be37bd ]--- Cc: Jussi Kivilinna Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu Signed-off-by: Kamal Mostafa --- crypto/ccm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 32fe1bb5decb..18d64ad0433c 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -271,7 +271,8 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain, } /* compute plaintext into mac */ - get_data_to_compute(cipher, pctx, plain, cryptlen); + if (cryptlen) + get_data_to_compute(cipher, pctx, plain, cryptlen); out: return err; From 16d84ab89a00bcd9982b636ebb3653c8edbb95ca Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 23 Oct 2013 06:25:40 -0400 Subject: [PATCH 75/92] [SCSI] Disable WRITE SAME for RAID and virtual host adapter drivers commit 54b2b50c20a61b51199bedb6e5d2f8ec2568fb43 upstream. Some host adapters do not pass commands through to the target disk directly. Instead they provide an emulated target which may or may not accurately report its capabilities. In some cases the physical device characteristics are reported even when the host adapter is processing commands on the device's behalf. This can lead to adapter firmware hangs or excessive I/O errors. This patch disables WRITE SAME for devices connected to host adapters that provide an emulated target. Driver writers can disable WRITE SAME by setting the no_write_same flag in the host adapter template. [jejb: fix up rejections due to eh_deadline patch] Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley [ kamal: backport to 3.8 (context: eh_deadline) ] Signed-off-by: Kamal Mostafa --- drivers/ata/libata-scsi.c | 1 + drivers/firewire/sbp2.c | 1 + drivers/scsi/3w-9xxx.c | 3 ++- drivers/scsi/3w-sas.c | 3 ++- drivers/scsi/3w-xxxx.c | 3 ++- drivers/scsi/aacraid/linit.c | 1 + drivers/scsi/arcmsr/arcmsr_hba.c | 1 + drivers/scsi/gdth.c | 1 + drivers/scsi/hosts.c | 1 + drivers/scsi/hpsa.c | 1 + drivers/scsi/ipr.c | 3 ++- drivers/scsi/ips.c | 1 + drivers/scsi/megaraid.c | 1 + drivers/scsi/megaraid/megaraid_mbox.c | 1 + drivers/scsi/megaraid/megaraid_sas_base.c | 1 + drivers/scsi/pmcraid.c | 1 + drivers/scsi/sd.c | 6 ++++++ drivers/scsi/storvsc_drv.c | 1 + include/scsi/scsi_host.h | 6 ++++++ 19 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7c337e754dab..f677e94a8907 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3604,6 +3604,7 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht) shost->max_lun = 1; shost->max_channel = 1; shost->max_cmd_len = 16; + shost->no_write_same = 1; /* Schedule policy is determined by ->qc_defer() * callback and it needs to see every deferred qc. diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 1162d6b3bf85..488287313b04 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1628,6 +1628,7 @@ static struct scsi_host_template scsi_driver_template = { .cmd_per_lun = 1, .can_queue = 1, .sdev_attrs = sbp2_scsi_sysfs_attrs, + .no_write_same = 1, }; MODULE_AUTHOR("Kristian Hoegsberg "); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index d1f0120cdb98..fc255e13e0c2 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -2025,7 +2025,8 @@ static struct scsi_host_template driver_template = { .cmd_per_lun = TW_MAX_CMDS_PER_LUN, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = twa_host_attrs, - .emulated = 1 + .emulated = 1, + .no_write_same = 1, }; /* This function will probe and initialize a card */ diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 52a2f0580d97..683059519a11 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1600,7 +1600,8 @@ static struct scsi_host_template driver_template = { .cmd_per_lun = TW_MAX_CMDS_PER_LUN, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = twl_host_attrs, - .emulated = 1 + .emulated = 1, + .no_write_same = 1, }; /* This function will probe and initialize a card */ diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index 62071d2fc1ce..5bf49f88ffdd 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -2277,7 +2277,8 @@ static struct scsi_host_template driver_template = { .cmd_per_lun = TW_MAX_CMDS_PER_LUN, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = tw_host_attrs, - .emulated = 1 + .emulated = 1, + .no_write_same = 1, }; /* This function will probe and initialize a card */ diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index f0d432c139d0..4921ed19a027 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1081,6 +1081,7 @@ static struct scsi_host_template aac_driver_template = { #endif .use_clustering = ENABLE_CLUSTERING, .emulated = 1, + .no_write_same = 1, }; static void __aac_shutdown(struct aac_dev * aac) diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 33c52bc2c7b4..278c9fa62067 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -137,6 +137,7 @@ static struct scsi_host_template arcmsr_scsi_host_template = { .cmd_per_lun = ARCMSR_MAX_CMD_PERLUN, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = arcmsr_host_attrs, + .no_write_same = 1, }; static struct pci_device_id arcmsr_device_id_table[] = { {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)}, diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 599790e41a98..faa92a2741da 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -4691,6 +4691,7 @@ static struct scsi_host_template gdth_template = { .cmd_per_lun = GDTH_MAXC_P_L, .unchecked_isa_dma = 1, .use_clustering = ENABLE_CLUSTERING, + .no_write_same = 1, }; #ifdef CONFIG_ISA diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 593085a52275..b471b86659c8 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -388,6 +388,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; shost->ordered_tag = sht->ordered_tag; + shost->no_write_same = sht->no_write_same; if (sht->supported_mode == MODE_UNKNOWN) /* means we didn't set it ... default to INITIATOR */ diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 4f338061b5c3..cae15d276dae 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -538,6 +538,7 @@ static struct scsi_host_template hpsa_driver_template = { .sdev_attrs = hpsa_sdev_attrs, .shost_attrs = hpsa_shost_attrs, .max_sectors = 8192, + .no_write_same = 1, }; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1d7da3f41ebb..a3b869240ea2 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6016,7 +6016,8 @@ static struct scsi_host_template driver_template = { .use_clustering = ENABLE_CLUSTERING, .shost_attrs = ipr_ioa_attrs, .sdev_attrs = ipr_dev_attrs, - .proc_name = IPR_NAME + .proc_name = IPR_NAME, + .no_write_same = 1, }; /** diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 9aa86a315a08..8fc61d73af8a 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -374,6 +374,7 @@ static struct scsi_host_template ips_driver_template = { .sg_tablesize = IPS_MAX_SG, .cmd_per_lun = 3, .use_clustering = ENABLE_CLUSTERING, + .no_write_same = 1, }; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 9504ec0ec682..d61eb154c1cb 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4520,6 +4520,7 @@ static struct scsi_host_template megaraid_template = { .eh_device_reset_handler = megaraid_reset, .eh_bus_reset_handler = megaraid_reset, .eh_host_reset_handler = megaraid_reset, + .no_write_same = 1, }; static int diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index e6a1e0b38a19..3316d8031e82 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -367,6 +367,7 @@ static struct scsi_host_template megaraid_template_g = { .eh_host_reset_handler = megaraid_reset_handler, .change_queue_depth = megaraid_change_queue_depth, .use_clustering = ENABLE_CLUSTERING, + .no_write_same = 1, .sdev_attrs = megaraid_sdev_attrs, .shost_attrs = megaraid_shost_attrs, }; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3433e07c5c0b..39e6f84a0393 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2099,6 +2099,7 @@ static struct scsi_host_template megasas_template = { .bios_param = megasas_bios_param, .use_clustering = ENABLE_CLUSTERING, .change_queue_depth = megasas_change_queue_depth, + .no_write_same = 1, }; /** diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index b46f5e906837..ad5cbed91491 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4328,6 +4328,7 @@ static struct scsi_host_template pmcraid_host_template = { .this_id = -1, .sg_tablesize = PMCRAID_MAX_IOADLS, .max_sectors = PMCRAID_IOA_MAX_SECTORS, + .no_write_same = 1, .cmd_per_lun = PMCRAID_MAX_CMD_PER_LUN, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = pmcraid_host_attrs, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a4dc0673710a..f6dffc38d92f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2639,6 +2639,12 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; + if (sdev->host->no_write_same) { + sdev->no_write_same = 1; + + return; + } + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) { /* too large values might cause issues with arcmsr */ int vpd_buf_len = 64; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 9f4e5601b610..0374eee57a3b 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1402,6 +1402,7 @@ static struct scsi_host_template scsi_driver = { .use_clustering = DISABLE_CLUSTERING, /* Make sure we dont get a sg segment crosses a page boundary */ .dma_boundary = PAGE_SIZE-1, + .no_write_same = 1, }; enum { diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 49084807eb6b..f79871860cd0 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -473,6 +473,9 @@ struct scsi_host_template { */ unsigned ordered_tag:1; + /* True if the controller does not support WRITE SAME */ + unsigned no_write_same:1; + /* * Countdown for host blocking with no commands outstanding. */ @@ -672,6 +675,9 @@ struct Scsi_Host { /* Don't resume host in EH */ unsigned eh_noresume:1; + /* The controller does not support WRITE SAME */ + unsigned no_write_same:1; + /* * Optional work queue to be utilized by the transport */ From ee2966b0890d212cdd349d46f2bfa99bc9deee6c Mon Sep 17 00:00:00 2001 From: Madper Xie Date: Fri, 29 Nov 2013 15:58:57 +0800 Subject: [PATCH 76/92] efi-pstore: Make efi-pstore return a unique id commit fdeadb43fdf1e7d5698c027b555c389174548e5a upstream. Pstore fs expects that backends provide a unique id which could avoid pstore making entries as duplication or denominating entries the same name. So I combine the timestamp, part and count into id. Signed-off-by: Madper Xie Cc: Seiji Aguchi Signed-off-by: Matt Fleming [ kamal: backport to 3.8 ] Signed-off-by: Kamal Mostafa --- drivers/firmware/efivars.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index e3e95dd20986..e2d4c7abdc92 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -1314,6 +1314,12 @@ static int efi_pstore_close(struct pstore_info *psi) return 0; } +static inline u64 generic_id(unsigned long timestamp, + unsigned int part, int count) +{ + return (timestamp * 100 + part) * 1000 + count; +} + static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, int *count, struct timespec *timespec, char **buf, struct pstore_info *psi) @@ -1334,7 +1340,7 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, } if (sscanf(name, "dump-type%u-%u-%d-%lu", type, &part, &cnt, &time) == 4) { - *id = part; + *id = generic_id(time, part, cnt); *count = cnt; timespec->tv_sec = time; timespec->tv_nsec = 0; @@ -1345,7 +1351,7 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, * which doesn't support holding * multiple logs, remains. */ - *id = part; + *id = generic_id(time, part, cnt); *count = 0; timespec->tv_sec = time; timespec->tv_nsec = 0; @@ -1436,9 +1442,11 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, struct efivars *efivars = psi->data; struct efivar_entry *entry, *found = NULL; int i; + unsigned int part; - sprintf(name, "dump-type%u-%u-%d-%lu", type, (unsigned int)id, count, - time.tv_sec); + do_div(id, 1000); + part = do_div(id, 100); + sprintf(name, "dump-type%u-%u-%d-%lu", type, part, count, time.tv_sec); spin_lock_irq(&efivars->lock); From b4a6ffe4ea199d0647a68071f59f6c3988854416 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Nov 2013 12:47:34 +0100 Subject: [PATCH 77/92] ALSA: hda - Fix silent output on ASUS W7J laptop commit 6ddf0fd1c462a418a3cbb8b0653820dc48ffbd98 upstream. The recent kernels got regressions on ASUS W7J with ALC660 codec where no sound comes out. After a long debugging session, we found out that setting the pin control on the unused NID 0x10 is mandatory for the outputs. And, it was found out that another magic of NID 0x0f that is required for other ASUS laptops isn't needed on this machine. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=66081 Reported-and-tested-by: Andrey Lipaev Signed-off-by: Takashi Iwai Signed-off-by: Kamal Mostafa --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8a4764afa06f..a67e6e244484 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6578,6 +6578,7 @@ enum { ALC861_FIXUP_AMP_VREF_0F, ALC861_FIXUP_NO_JACK_DETECT, ALC861_FIXUP_ASUS_A6RP, + ALC660_FIXUP_ASUS_W7J, }; /* On some laptops, VREF of pin 0x0f is abused for controlling the main amp */ @@ -6628,10 +6629,21 @@ static const struct alc_fixup alc861_fixups[] = { .v.func = alc861_fixup_asus_amp_vref_0f, .chained = true, .chain_id = ALC861_FIXUP_NO_JACK_DETECT, + }, + [ALC660_FIXUP_ASUS_W7J] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* ASUS W7J needs a magic pin setup on unused NID 0x10 + * for enabling outputs + */ + {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24}, + { } + }, } }; static const struct snd_pci_quirk alc861_fixup_tbl[] = { + SND_PCI_QUIRK(0x1043, 0x1253, "ASUS W7J", ALC660_FIXUP_ASUS_W7J), SND_PCI_QUIRK(0x1043, 0x1393, "ASUS A6Rp", ALC861_FIXUP_ASUS_A6RP), SND_PCI_QUIRK_VENDOR(0x1043, "ASUS laptop", ALC861_FIXUP_AMP_VREF_0F), SND_PCI_QUIRK(0x1462, 0x7254, "HP DX2200", ALC861_FIXUP_NO_JACK_DETECT), From 431aab3ee0da5dfcf9af5dfdee79894152667b5b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Nov 2013 13:33:21 +0100 Subject: [PATCH 78/92] net: smc91: fix crash regression on the versatile commit b268daffdcb9762ad9aa3898096570a9dd92aa9b upstream. After commit e9e4ea74f06635f2ffc1dffe5ef40c854faa0a90 "net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data" The Versatile SMSC LAN91C111 is crashing like this: ------------[ cut here ]------------ kernel BUG at /home/linus/linux/drivers/net/ethernet/smsc/smc91x.c:599! Internal error: Oops - BUG: 0 [#1] ARM Modules linked in: CPU: 0 PID: 43 Comm: udhcpc Not tainted 3.13.0-rc1+ #24 task: c6ccfaa0 ti: c6cd0000 task.ti: c6cd0000 PC is at smc_hardware_send_pkt+0x198/0x22c LR is at smc_hardware_send_pkt+0x24/0x22c pc : [] lr : [] psr: 20000013 sp : c6cd1d08 ip : 00000001 fp : 00000000 r10: c02adb08 r9 : 00000000 r8 : c6ced802 r7 : c786fba0 r6 : 00000146 r5 : c8800000 r4 : c78d6000 r3 : 0000000f r2 : 00000146 r1 : 00000000 r0 : 00000031 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 0005317f Table: 06cf4000 DAC: 00000015 Process udhcpc (pid: 43, stack limit = 0xc6cd01c0) Stack: (0xc6cd1d08 to 0xc6cd2000) 1d00: 00000010 c8800000 c78d6000 c786fba0 c78d6000 c01be868 1d20: c01be7a4 00004000 00000000 c786fba0 c6c12b80 c0208554 000004d0 c780fc60 1d40: 00000220 c01fb734 00000000 00000000 00000000 c6c9a440 c6c12b80 c78d6000 1d60: c786fba0 c6c9a440 00000000 c021d1d8 00000000 00000000 c6c12b80 c78d6000 1d80: c786fba0 00000001 c6c9a440 c02087f8 c6c9a4a0 00080008 00000000 00000000 1da0: c78d6000 c786fba0 c78d6000 00000138 00000000 00000000 00000000 00000000 1dc0: 00000000 c027ba74 00000138 00000138 00000001 00000010 c6cedc00 00000000 1de0: 00000008 c7404400 c6cd1eec c6cd1f14 c067a73c c065c0b8 00000000 c067a740 1e00: 01ffffff 002040d0 00000000 00000000 00000000 00000000 00000000 ffffffff 1e20: 43004400 00110022 c6cdef20 c027ae8c c6ccfaa0 be82d65c 00000014 be82d3cc 1e40: 00000000 00000000 00000000 c01f2870 00000000 00000000 00000000 c6cd1e88 1e60: c6ccfaa0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1e80: 00000000 00000000 00000031 c7802310 c7802300 00000138 c7404400 c0771da0 1ea0: 00000000 c6cd1eec c7800340 00000138 be82d65c 00000014 be82d3cc c6cd1f08 1ec0: 00000014 00000000 c7404400 c7404400 00000138 c01f4628 c78d6000 00000000 1ee0: 00000000 be82d3cc 00000138 c6cd1f08 00000014 c6cd1ee4 00000001 00000000 1f00: 00000000 00000000 00080011 00000002 06000000 ffffffff 0000ffff 00000002 1f20: 06000000 ffffffff 0000ffff c00928c8 c065c520 c6cd1f58 00000003 c009299c 1f40: 00000003 c065c520 c7404400 00000000 c7404400 c01f2218 c78106b0 c7441cb0 1f60: 00000000 00000006 c06799fc 00000000 00000000 00000006 00000000 c01f3ee0 1f80: 00000000 00000000 be82d678 be82d65c 00000014 00000001 00000122 c00139c8 1fa0: c6cd0000 c0013840 be82d65c 00000014 00000006 be82d3cc 00000138 00000000 1fc0: be82d65c 00000014 00000001 00000122 00000000 00000000 00018cb1 00000000 1fe0: 00003801 be82d3a8 0003a0c7 b6e9af08 60000010 00000006 00000000 00000000 [] (smc_hardware_send_pkt+0x198/0x22c) from [] (smc_hard_start_xmit+0xc4/0x1e8) [] (smc_hard_start_xmit+0xc4/0x1e8) from [] (dev_hard_start_xmit+0x460/0x4cc) [] (dev_hard_start_xmit+0x460/0x4cc) from [] (sch_direct_xmit+0x94/0x18c) [] (sch_direct_xmit+0x94/0x18c) from [] (dev_queue_xmit+0x238/0x42c) [] (dev_queue_xmit+0x238/0x42c) from [] (packet_sendmsg+0xbe8/0xd28) [] (packet_sendmsg+0xbe8/0xd28) from [] (sock_sendmsg+0x84/0xa8) [] (sock_sendmsg+0x84/0xa8) from [] (SyS_sendto+0xb8/0xdc) [] (SyS_sendto+0xb8/0xdc) from [] (ret_fast_syscall+0x0/0x2c) Code: e3130002 1a000001 e3130001 0affffcd (e7f001f2) ---[ end trace 81104fe70e8da7fe ]--- Kernel panic - not syncing: Fatal exception in interrupt This is because the macro operations in smc91x.h defined for Versatile are missing SMC_outsw() as used in this commit. The Versatile needs and uses the same accessors as the other platforms in the first if(...) clause, just switch it to using that and we have one problem less to worry about. Checkpatch complains about spacing, but I have opted to follow the style of this .h-file. Cc: Russell King Cc: Nicolas Pitre Cc: Eric Miao Cc: Jonathan Cameron Cc: Will Deacon Signed-off-by: Linus Walleij Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- drivers/net/ethernet/smsc/smc91x.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 370e13dde115..97bd44b83dfe 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -46,7 +46,8 @@ defined(CONFIG_MACH_LITTLETON) ||\ defined(CONFIG_MACH_ZYLONITE2) ||\ defined(CONFIG_ARCH_VIPER) ||\ - defined(CONFIG_MACH_STARGATE2) + defined(CONFIG_MACH_STARGATE2) ||\ + defined(CONFIG_ARCH_VERSATILE) #include @@ -206,23 +207,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 -#elif defined(CONFIG_ARCH_VERSATILE) - -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 1 -#define SMC_NOWAIT 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_inl(a, r) readl((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_outl(v, a, r) writel(v, (a) + (r)) -#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) -#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - #elif defined(CONFIG_MN10300) /* From a1566353988f33ef36f339f2badaf41b2a549884 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Nov 2013 21:43:40 +0000 Subject: [PATCH 79/92] ARM: fix booting low-vectors machines commit d8aa712c30148ba26fd89a5dc14de95d4c375184 upstream. Commit f6f91b0d9fd9 (ARM: allow kuser helpers to be removed from the vector page) required two pages for the vectors code. Although the code setting up the initial page tables was updated, the code which allocates page tables for new processes wasn't, neither was the code which tears down the mappings. Fix this. Fixes: f6f91b0d9fd9 ("ARM: allow kuser helpers to be removed from the vector page") Signed-off-by: Russell King Signed-off-by: Kamal Mostafa --- arch/arm/include/asm/pgtable.h | 2 +- arch/arm/mm/mmap.c | 2 +- arch/arm/mm/pgd.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 26e9ce4a1367..bb810021120a 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -58,7 +58,7 @@ extern void __pgd_error(const char *file, int line, pgd_t); * mapping to be mapped at. This is particularly important for * non-high vector CPUs. */ -#define FIRST_USER_ADDRESS PAGE_SIZE +#define FIRST_USER_ADDRESS (PAGE_SIZE * 2) /* * Use TASK_SIZE as the ceiling argument for free_pgtables() and diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 10062ceadd1c..f0ef2f7d4ad7 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = FIRST_USER_ADDRESS; info.high_limit = mm->mmap_base; info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; info.align_offset = pgoff << PAGE_SHIFT; diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 0acb089d0f70..1046b373d1ae 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) init_pud = pud_offset(init_pgd, 0); init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); - set_pte_ext(new_pte, *init_pte, 0); + set_pte_ext(new_pte + 0, init_pte[0], 0); + set_pte_ext(new_pte + 1, init_pte[1], 0); pte_unmap(init_pte); pte_unmap(new_pte); } From 9a0760ed6e76432acdcb7de19ddc66875fa169d4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 28 Nov 2013 21:55:41 +0000 Subject: [PATCH 80/92] ARM: footbridge: fix VGA initialisation commit 43659222e7a0113912ed02f6b2231550b3e471ac upstream. It's no good setting vga_base after the VGA console has been initialised, because if we do that we get this: Unable to handle kernel paging request at virtual address 000b8000 pgd = c0004000 [000b8000] *pgd=07ffc831, *pte=00000000, *ppte=00000000 0Internal error: Oops: 5017 [#1] ARM Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.0+ #49 task: c03e2974 ti: c03d8000 task.ti: c03d8000 PC is at vgacon_startup+0x258/0x39c LR is at request_resource+0x10/0x1c pc : [] lr : [] psr: 60000053 sp : c03d9f68 ip : 000b8000 fp : c03d9f8c r10: 000055aa r9 : 4401a103 r8 : ffffaa55 r7 : c03e357c r6 : c051b460 r5 : 000000ff r4 : 000c0000 r3 : 000b8000 r2 : c03e0514 r1 : 00000000 r0 : c0304971 Flags: nZCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment kernel which is an access to the 0xb8000 without the PCI offset required to make it work. Fixes: cc22b4c18540 ("ARM: set vga memory base at run-time") Signed-off-by: Russell King Signed-off-by: Kamal Mostafa --- arch/arm/mach-footbridge/common.c | 3 +++ arch/arm/mach-footbridge/dc21285.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index a42b369bc439..95d0b6e6438b 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -15,6 +15,7 @@ #include #include #include +#include