From ba3e6eaa3d72c9be90c8294e6d1f15cfa8885cc1 Mon Sep 17 00:00:00 2001 From: Antonio Alecrim Jr Date: Mon, 16 Sep 2013 11:04:54 -0300 Subject: [PATCH 001/178] X.509: remove possible code fragility: enumeration values not handled commit eb8948a03704f3dbbfc7e83090e20e93c6c476d2 upstream. Signed-off-by: Antonio Alecrim Jr Signed-off-by: David Howells Signed-off-by: Willy Tarreau --- scripts/asn1_compiler.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index db0e5cd34c70..91c4117637ae 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out) render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act); render_opcode(out, "_jump_target(%u),\n", entry); break; + default: + break; } if (e->action) render_opcode(out, "_action(ACT_%s),\n", From 2d2bec8f2fc546020e9b15d388ce23d6a00b0b38 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 5 Aug 2013 15:02:45 -0700 Subject: [PATCH 002/178] x86, asmlinkage, apm: Make APM data structure used from assembler visible commit 54c2f3fdb941204cad136024c7b854b7ad112ab6 upstream. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1375740170-7446-12-git-send-email-andi@firstfloor.org Signed-off-by: H. Peter Anvin Signed-off-by: Willy Tarreau --- arch/x86/kernel/apm_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 53a4e2744846..3ab03430211d 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -392,7 +392,7 @@ static struct cpuidle_device apm_cpuidle_device; /* * Local variables */ -static struct { +__visible struct { unsigned long offset; unsigned short segment; } apm_bios_entry; From 0608074c2c1fd874b10117c07b2751d2477bec14 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:49 +0100 Subject: [PATCH 003/178] netfilter: x_tables: validate e->target_offset early commit bdf533de6968e9686df777dc178486f600c6e617 upstream. We should check that e->target_offset is sane before mark_source_chains gets called since it will fetch the target entry for loop detection. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 17 ++++++++--------- net/ipv4/netfilter/ip_tables.c | 17 ++++++++--------- net/ipv6/netfilter/ip6_tables.c | 17 ++++++++--------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c8abe31961ed..269759dd96f4 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -465,14 +465,12 @@ static int mark_source_chains(const struct xt_table_info *newinfo, return 1; } -static inline int check_entry(const struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e) { const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); + if (!arp_checkentry(&e->arp)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) return -EINVAL; @@ -513,10 +511,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) struct xt_target *target; int ret; - ret = check_entry(e, name); - if (ret) - return ret; - t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); @@ -561,6 +555,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { @@ -575,6 +570,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1232,7 +1231,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e, name); + ret = check_entry((struct arpt_entry *)e); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 651c10774d58..5ca478f13080 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -560,14 +560,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e) { const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, name); + if (!ip_checkentry(&e->ip)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -657,10 +655,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - j = 0; mtpar.net = net; mtpar.table = name; @@ -724,6 +718,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { @@ -738,6 +733,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1498,7 +1497,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e, name); + ret = check_entry((struct ipt_entry *)e); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 89a4e4ddd8bb..597f539f3d33 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -570,14 +570,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e) { const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); + if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -668,10 +666,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - j = 0; mtpar.net = net; mtpar.table = name; @@ -735,6 +729,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { @@ -749,6 +744,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1510,7 +1509,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + ret = check_entry((struct ip6t_entry *)e); if (ret) return ret; From 74afad874aaeef7ef5b500366a0e271fb5b265c3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:50 +0100 Subject: [PATCH 004/178] netfilter: x_tables: make sure e->next_offset covers remaining blob size commit 6e94e0cfb0887e4013b3b930fa6ab1fe6bb6ba91 upstream. Otherwise this function may read data beyond the ruleset blob. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 6 ++++-- net/ipv4/netfilter/ip_tables.c | 6 ++++-- net/ipv6/netfilter/ip6_tables.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 269759dd96f4..ee40bd4e766e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -558,7 +558,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1218,7 +1219,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5ca478f13080..7cfd5663fe24 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -721,7 +721,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1484,7 +1485,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 597f539f3d33..9c6b543472ef 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -732,7 +732,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1496,7 +1497,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } From 1ee858dd54d3c88dc4331cc401524d94989cba61 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:52 +0100 Subject: [PATCH 005/178] netfilter: x_tables: fix unconditional helper commit 54d83fc74aa9ec72794373cb47432c5f7fb1a309 upstream. Ben Hawkes says: In the mark_source_chains function (net/ipv4/netfilter/ip_tables.c) it is possible for a user-supplied ipt_entry structure to have a large next_offset field. This field is not bounds checked prior to writing a counter value at the supplied offset. Problem is that mark_source_chains should not have been called -- the rule doesn't have a next entry, so its supposed to return an absolute verdict of either ACCEPT or DROP. However, the function conditional() doesn't work as the name implies. It only checks that the rule is using wildcard address matching. However, an unconditional rule must also not be using any matches (no -m args). The underflow validator only checked the addresses, therefore passing the 'unconditional absolute verdict' test, while mark_source_chains also tested for presence of matches, and thus proceeeded to the next (not-existent) rule. Unify this so that all the callers have same idea of 'unconditional rule'. Reported-by: Ben Hawkes Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 18 +++++++++--------- net/ipv4/netfilter/ip_tables.c | 23 +++++++++++------------ net/ipv6/netfilter/ip6_tables.c | 23 +++++++++++------------ 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ee40bd4e766e..456fc6efe05d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -350,11 +350,12 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline bool unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; - return memcmp(arp, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if @@ -393,11 +394,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->arp)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -536,7 +536,7 @@ static bool check_underflow(const struct arpt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->arp)) + if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -583,9 +583,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7cfd5663fe24..a5bd3c8eee84 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; - return memcmp(ip, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } @@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ip)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -467,11 +467,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->ip)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -698,7 +697,7 @@ static bool check_underflow(const struct ipt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ip)) + if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -746,9 +745,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9c6b543472ef..fb8a146abed8 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -195,11 +195,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -255,11 +256,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -477,11 +477,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -709,7 +708,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -757,9 +756,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; From 82e2616ad251a3f72991036d6e8acebbd0aceb80 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 15 Jul 2016 15:08:15 -0400 Subject: [PATCH 006/178] netfilter: x_tables: don't move to non-existent next rule commit f24e230d257af1ad7476c6e81a8dc3127a74204e upstream. Ben Hawkes says: In the mark_source_chains function (net/ipv4/netfilter/ip_tables.c) it is possible for a user-supplied ipt_entry structure to have a large next_offset field. This field is not bounds checked prior to writing a counter value at the supplied offset. Base chains enforce absolute verdict. User defined chains are supposed to end with an unconditional return, xtables userspace adds them automatically. But if such return is missing we will move to non-existent next rule. CVE-2016-3134 Reported-by: Ben Hawkes Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Chas Williams <3chas3@gmail.com> Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 8 +++++--- net/ipv4/netfilter/ip_tables.c | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 456fc6efe05d..7460b7bef3ab 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -430,6 +430,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct arpt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -452,6 +454,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct arpt_entry *) (entry0 + newpos); @@ -675,10 +679,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, } } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { - duprintf("Looping hook\n"); + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; - } /* Finally, each sanity check must pass */ i = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a5bd3c8eee84..8fc22eed9603 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -511,6 +511,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ipt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -532,6 +534,8 @@ mark_source_chains(const struct xt_table_info *newinfo, } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ipt_entry *) (entry0 + newpos); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index fb8a146abed8..63f7876c4f29 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -521,6 +521,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ip6t_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -542,6 +544,8 @@ mark_source_chains(const struct xt_table_info *newinfo, } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ip6t_entry *) (entry0 + newpos); From ff9be2063c1c029e9ee4951882fb64387fa21ede Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:23 +0200 Subject: [PATCH 007/178] netfilter: x_tables: add and use xt_check_entry_offsets commit 7d35812c3214afa5b37a675113555259cfd67b98 upstream. Currently arp/ip and ip6tables each implement a short helper to check that the target offset is large enough to hold one xt_entry_target struct and that t->u.target_size fits within the current rule. Unfortunately these checks are not sufficient. To avoid adding new tests to all of ip/ip6/arptables move the current checks into a helper, then extend this helper in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- include/linux/netfilter/x_tables.h | 4 ++++ net/ipv4/netfilter/arp_tables.c | 11 +--------- net/ipv4/netfilter/ip_tables.c | 12 +---------- net/ipv6/netfilter/ip6_tables.c | 12 +---------- net/netfilter/x_tables.c | 34 ++++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 32 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dd49566315c6..6da5c8275e94 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -239,6 +239,10 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); + extern int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); extern int xt_check_target(struct xt_tgchk_param *, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7460b7bef3ab..ac73ecbc93a6 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -471,19 +471,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, static inline int check_entry(const struct arpt_entry *e) { - const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static inline int check_target(struct arpt_entry *e, const char *name) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 8fc22eed9603..5cbc01a3ac44 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -565,20 +565,10 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_entry(const struct ipt_entry *e) { - const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static int diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 63f7876c4f29..84adc187e5d9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -575,20 +575,10 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) static int check_entry(const struct ip6t_entry *e) { - const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; + return xt_check_entry_offsets(e, e->target_offset, e->next_offset); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8b03028cca69..55b1e0ccb0e2 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -560,6 +560,40 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, EXPORT_SYMBOL_GPL(xt_compat_match_to_user); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset) +{ + const struct xt_entry_target *t; + const char *e = base; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { From ea878a4401096f67851ee631f670fdeb73ae8f98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:24 +0200 Subject: [PATCH 008/178] netfilter: x_tables: kill check_entry helper commit aa412ba225dd3bc36d404c28cdc3d674850d80d0 upstream. Once we add more sanity testing to xt_check_entry_offsets it becomes relvant if we're expecting a 32bit 'config_compat' blob or a normal one. Since we already have a lot of similar-named functions (check_entry, compat_check_entry, find_and_check_entry, etc.) and the current incarnation is short just fold its contents into the callers. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 19 ++++++++----------- net/ipv4/netfilter/ip_tables.c | 20 ++++++++------------ net/ipv6/netfilter/ip6_tables.c | 20 ++++++++------------ 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ac73ecbc93a6..fdfab3371562 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -469,14 +469,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, return 1; } -static inline int check_entry(const struct arpt_entry *e) -{ - if (!arp_checkentry(&e->arp)) - return -EINVAL; - - return xt_check_entry_offsets(e, e->target_offset, e->next_offset); -} - static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); @@ -566,7 +558,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (err) return err; @@ -1225,8 +1220,10 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5cbc01a3ac44..a003c753f7aa 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -562,15 +562,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ipt_entry *e) -{ - if (!ip_checkentry(&e->ip)) - return -EINVAL; - - return xt_check_entry_offsets(e, e->target_offset, e->next_offset); -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { @@ -727,7 +718,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (err) return err; @@ -1491,8 +1485,10 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 84adc187e5d9..f5b5cb0fcea3 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -572,15 +572,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ip6t_entry *e) -{ - if (!ip6_checkentry(&e->ipv6)) - return -EINVAL; - - return xt_check_entry_offsets(e, e->target_offset, e->next_offset); -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; @@ -738,7 +729,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } - err = check_entry(e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (err) return err; @@ -1503,8 +1497,10 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); if (ret) return ret; From 6b9f8b74d347c956239e4538b1c3427cc5ea3ede Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:25 +0200 Subject: [PATCH 009/178] netfilter: x_tables: assert minimum target size commit a08e4e190b866579896c09af59b3bdca821da2cd upstream. The target size includes the size of the xt_entry_target struct. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/netfilter/x_tables.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 55b1e0ccb0e2..cc34bb38c814 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -587,6 +587,9 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + if (target_offset + t->u.target_size > next_offset) return -EINVAL; From 305323325e044e78afe63dfd4444a21b21ecb804 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:26 +0200 Subject: [PATCH 010/178] netfilter: x_tables: add compat version of xt_check_entry_offsets commit fc1221b3a163d1386d1052184202d5dc50d302d1 upstream. 32bit rulesets have different layout and alignment requirements, so once more integrity checks get added to xt_check_entry_offsets it will reject well-formed 32bit rulesets. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- include/linux/netfilter/x_tables.h | 3 +++ net/ipv4/netfilter/arp_tables.c | 3 ++- net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv6/netfilter/ip6_tables.c | 3 ++- net/netfilter/x_tables.c | 22 ++++++++++++++++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6da5c8275e94..8dfd3dddd1e0 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -437,6 +437,9 @@ extern void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); extern int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fdfab3371562..e4084c00bf29 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1223,7 +1223,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, e->target_offset, + e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a003c753f7aa..fba8929d1117 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1488,7 +1488,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, + e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f5b5cb0fcea3..1949305e2613 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1500,7 +1500,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - ret = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + ret = xt_compat_check_entry_offsets(e, + e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index cc34bb38c814..eeb4edc750cd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -558,6 +558,27 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset) +{ + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ /** @@ -568,6 +589,7 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user); * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane. + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location From 5366deb60a8c3673af8cce77cd137cd96eadf3a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:27 +0200 Subject: [PATCH 011/178] netfilter: x_tables: check standard target size too commit 7ed2abddd20cf8f6bd27f65bd218f26fa5bf7f44 upstream. We have targets and standard targets -- the latter carries a verdict. The ip/ip6tables validation functions will access t->verdict for the standard targets to fetch the jump offset or verdict for chainloop detection, but this happens before the targets get checked/validated. Thus we also need to check for verdict presence here, else t->verdict can point right after a blob. Spotted with UBSAN while testing malformed blobs. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/netfilter/x_tables.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index eeb4edc750cd..37f7eda8ad19 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -559,6 +559,13 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); +/* non-compat version may have padding after verdict */ +struct compat_xt_standard_target { + struct compat_xt_entry_target t; + compat_uint_t verdict; +}; + +/* see xt_check_entry_offsets */ int xt_compat_check_entry_offsets(const void *base, unsigned int target_offset, unsigned int next_offset) @@ -576,6 +583,10 @@ int xt_compat_check_entry_offsets(const void *base, if (target_offset + t->u.target_size > next_offset) return -EINVAL; + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + return -EINVAL; + return 0; } EXPORT_SYMBOL(xt_compat_check_entry_offsets); @@ -615,6 +626,10 @@ int xt_check_entry_offsets(const void *base, if (target_offset + t->u.target_size > next_offset) return -EINVAL; + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + target_offset + sizeof(struct xt_standard_target) != next_offset) + return -EINVAL; + return 0; } EXPORT_SYMBOL(xt_check_entry_offsets); From 71f72bbf29981206b98845c7ac8b8d1d7d44fa55 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:28 +0200 Subject: [PATCH 012/178] netfilter: x_tables: check for bogus target offset commit ce683e5f9d045e5d67d1312a42b359cb2ab2a13c upstream. We're currently asserting that targetoff + targetsize <= nextoff. Extend it to also check that targetoff is >= sizeof(xt_entry). Since this is generic code, add an argument pointing to the start of the match/target, we can then derive the base structure size from the delta. We also need the e->elems pointer in a followup change to validate matches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- include/linux/netfilter/x_tables.h | 4 ++-- net/ipv4/netfilter/arp_tables.c | 5 +++-- net/ipv4/netfilter/ip_tables.c | 5 +++-- net/ipv6/netfilter/ip6_tables.c | 5 +++-- net/netfilter/x_tables.c | 17 +++++++++++++++-- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8dfd3dddd1e0..8bb770647bd5 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -239,7 +239,7 @@ extern void xt_unregister_match(struct xt_match *target); extern int xt_register_matches(struct xt_match *match, unsigned int n); extern void xt_unregister_matches(struct xt_match *match, unsigned int n); -int xt_check_entry_offsets(const void *base, +int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); @@ -437,7 +437,7 @@ extern void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); extern int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e4084c00bf29..efc75466eab6 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -561,7 +561,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1223,7 +1224,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (!arp_checkentry(&e->arp)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, e->target_offset, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fba8929d1117..b04a845e0568 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -721,7 +721,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1488,7 +1489,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (!ip_checkentry(&e->ip)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1949305e2613..ac964d914eca 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -732,7 +732,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - err = xt_check_entry_offsets(e, e->target_offset, e->next_offset); + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (err) return err; @@ -1500,7 +1501,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - ret = xt_compat_check_entry_offsets(e, + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 37f7eda8ad19..ea147468df9c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -565,14 +565,17 @@ struct compat_xt_standard_target { compat_uint_t verdict; }; -/* see xt_check_entry_offsets */ -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) { + long size_of_base_struct = elems - (const char *)base; const struct compat_xt_entry_target *t; const char *e = base; + if (target_offset < size_of_base_struct) + return -EINVAL; + if (target_offset + sizeof(*t) > next_offset) return -EINVAL; @@ -596,12 +599,16 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * xt_check_entry_offsets - validate arp/ip/ip6t_entry * * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems * @target_offset: the arp/ip/ip6_t->target_offset * @next_offset: the arp/ip/ip6_t->next_offset * * validates that target_offset and next_offset are sane. * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct. + * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location * - base to base + next_offset must be accessible, i.e. not exceed allocated @@ -610,12 +617,18 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * Return: 0 on success, negative errno on failure. */ int xt_check_entry_offsets(const void *base, + const char *elems, unsigned int target_offset, unsigned int next_offset) { + long size_of_base_struct = elems - (const char *)base; const struct xt_entry_target *t; const char *e = base; + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + if (target_offset + sizeof(*t) > next_offset) return -EINVAL; From 1b507b9259b31b83303a1787bb88473ebee04402 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:29 +0200 Subject: [PATCH 013/178] netfilter: x_tables: validate all offsets and sizes in a rule commit 13631bfc604161a9d69cd68991dff8603edd66f9 upstream. Validate that all matches (if any) add up to the beginning of the target and that each match covers at least the base structure size. The compat path should be able to safely re-use the function as the structures only differ in alignment; added a BUILD_BUG_ON just in case we have an arch that adds padding as well. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/netfilter/x_tables.c | 81 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ea147468df9c..c4826337866b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -435,6 +435,47 @@ int xt_check_match(struct xt_mtchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_match); +/** xt_check_entry_match - check that matches end before start of target + * + * @match: beginning of xt_entry_match + * @target: beginning of this rules target (alleged end of matches) + * @alignment: alignment requirement of match structures + * + * Validates that all matches add up to the beginning of the target, + * and that each match covers at least the base structure size. + * + * Return: 0 on success, negative errno on failure. + */ +static int xt_check_entry_match(const char *match, const char *target, + const size_t alignment) +{ + const struct xt_entry_match *pos; + int length = target - match; + + if (length == 0) /* no matches */ + return 0; + + pos = (struct xt_entry_match *)match; + do { + if ((unsigned long)pos % alignment) + return -EINVAL; + + if (length < (int)sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size < sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size > length) + return -EINVAL; + + length -= pos->u.match_size; + pos = ((void *)((char *)(pos) + (pos)->u.match_size)); + } while (length > 0); + + return 0; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -590,7 +631,14 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, target_offset + sizeof(struct compat_xt_standard_target) != next_offset) return -EINVAL; - return 0; + /* compat_xt_entry match has less strict aligment requirements, + * otherwise they are identical. In case of padding differences + * we need to add compat version of xt_check_entry_match. + */ + BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct compat_xt_entry_match)); } EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ @@ -603,17 +651,39 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets); * @target_offset: the arp/ip/ip6_t->target_offset * @next_offset: the arp/ip/ip6_t->next_offset * - * validates that target_offset and next_offset are sane. - * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. + * validates that target_offset and next_offset are sane and that all + * match sizes (if any) align with the target offset. * * This function does not validate the targets or matches themselves, it - * only tests that all the offsets and sizes are correct. + * only tests that all the offsets and sizes are correct, that all + * match structures are aligned, and that the last structure ends where + * the target structure begins. + * + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. * * The arp/ip/ip6t_entry structure @base must have passed following tests: * - it must point to a valid memory location * - base to base + next_offset must be accessible, i.e. not exceed allocated * length. * + * A well-formed entry looks like this: + * + * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry + * e->elems[]-----' | | + * matchsize | | + * matchsize | | + * | | + * target_offset---------------------------------' | + * next_offset---------------------------------------------------' + * + * elems[]: flexible array member at end of ip(6)/arpt_entry struct. + * This is where matches (if any) and the target reside. + * target_offset: beginning of target. + * next_offset: start of the next rule; also: size of this rule. + * Since targets have a minimum size, target_offset + minlen <= next_offset. + * + * Every match stores its size, sum of sizes must not exceed target_offset. + * * Return: 0 on success, negative errno on failure. */ int xt_check_entry_offsets(const void *base, @@ -643,7 +713,8 @@ int xt_check_entry_offsets(const void *base, target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; - return 0; + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct xt_entry_match)); } EXPORT_SYMBOL(xt_check_entry_offsets); From 1a10388eed94b55a77b1984f14f265f3f07cf20a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Jun 2016 02:04:44 +0200 Subject: [PATCH 014/178] netfilter: x_tables: don't reject valid target size on some architectures commit 7b7eba0f3515fca3296b8881d583f7c1042f5226 upstream. Quoting John Stultz: In updating a 32bit arm device from 4.6 to Linus' current HEAD, I noticed I was having some trouble with networking, and realized that /proc/net/ip_tables_names was suddenly empty. Digging through the registration process, it seems we're catching on the: if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; Where next_offset seems to be 4 bytes larger then the offset + standard_target struct size. next_offset needs to be aligned via XT_ALIGN (so we can access all members of ip(6)t_entry struct). This problem didn't show up on i686 as it only needs 4-byte alignment for u64, but iptables userspace on other 32bit arches does insert extra padding. Reported-by: John Stultz Tested-by: John Stultz Fixes: 7ed2abddd20cf ("netfilter: x_tables: check standard target size too") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c4826337866b..6ac9fb4f42fc 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -628,7 +628,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; /* compat_xt_entry match has less strict aligment requirements, @@ -710,7 +710,7 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct xt_standard_target) != next_offset) + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) return -EINVAL; return xt_check_entry_match(elems, base + target_offset, From b6694fc658ebf550b870fd4528f0b3711457da73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:32 +0200 Subject: [PATCH 015/178] netfilter: arp_tables: simplify translate_compat_table args commit 8dddd32756f6fe8e4e82a63361119b7e2384e02f upstream. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 82 +++++++++++++++------------------ 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index efc75466eab6..bd1a19e40c82 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1183,6 +1183,18 @@ static int do_add_counters(struct net *net, const void __user *user, } #ifdef CONFIG_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; @@ -1198,8 +1210,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned int *underflows) { struct xt_entry_target *t; struct xt_target *target; @@ -1270,7 +1281,7 @@ out: static int compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; @@ -1303,14 +1314,9 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, return ret; } -static int translate_compat_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info **pinfo, +static int translate_compat_table(struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; @@ -1322,8 +1328,8 @@ static int translate_compat_table(const char *name, info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -1334,40 +1340,39 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, number); + xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size, + compatr->hook_entry, + compatr->underflow); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(compatr->valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, info->hook_entry[i]); goto out_unlock; } if (info->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, info->underflow[i]); goto out_unlock; } } @@ -1377,17 +1382,17 @@ static int translate_compat_table(const char *name, if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) { ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); + newinfo, entry1); if (ret != 0) break; } @@ -1397,12 +1402,12 @@ static int translate_compat_table(const char *name, goto free_newinfo; ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) + if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) goto free_newinfo; i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = check_target(iter1, name); + ret = check_target(iter1, compatr->name); if (ret != 0) break; ++i; @@ -1447,7 +1452,7 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); @@ -1459,18 +1464,6 @@ out_unlock: goto out; } -struct compat_arpt_replace { - char name[XT_TABLE_MAXNAMELEN]; - u32 valid_hooks; - u32 num_entries; - u32 size; - u32 hook_entry[NF_ARP_NUMHOOKS]; - u32 underflow[NF_ARP_NUMHOOKS]; - u32 num_counters; - compat_uptr_t counters; - struct compat_arpt_entry entries[0]; -}; - static int compat_do_replace(struct net *net, void __user *user, unsigned int len) { @@ -1501,10 +1494,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; From f9e9232546c17d4fc96d87fd1b3e9a69a5603f1b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:30 +0200 Subject: [PATCH 016/178] netfilter: ip_tables: simplify translate_compat_table args commit 7d3f843eed29222254c9feab481f55175a1afcc9 upstream. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/ip_tables.c | 61 ++++++++++++++-------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b04a845e0568..dc0572430516 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1423,7 +1423,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ipt_ip *ip, unsigned int hookmask, int *size) @@ -1461,8 +1460,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned int *underflows) { struct xt_entry_match *ematch; struct xt_entry_target *t; @@ -1498,8 +1496,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, - &e->ip, e->comefrom, &off); + ret = compat_find_calc_match(ematch, &e->ip, e->comefrom, + &off); if (ret != 0) goto release_matches; ++j; @@ -1548,7 +1546,7 @@ release_matches: static int compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; @@ -1624,14 +1622,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; @@ -1643,8 +1636,8 @@ translate_compat_table(struct net *net, info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -1655,40 +1648,39 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, number); + xt_compat_init_offsets(AF_INET, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size, + compatr->hook_entry, + compatr->underflow); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(compatr->valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, info->hook_entry[i]); goto out_unlock; } if (info->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, info->underflow[i]); goto out_unlock; } } @@ -1698,17 +1690,17 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) { ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); + newinfo, entry1); if (ret != 0) break; } @@ -1718,12 +1710,12 @@ translate_compat_table(struct net *net, goto free_newinfo; ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) + if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) goto free_newinfo; i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); + ret = compat_check_entry(iter1, net, compatr->name); if (ret != 0) break; ++i; @@ -1768,7 +1760,7 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); out: - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); @@ -1811,10 +1803,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; From b560137e0fbba392045f8be120e69a0439a84029 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:31 +0200 Subject: [PATCH 017/178] netfilter: ip6_tables: simplify translate_compat_table args commit 329a0807124f12fe1c8032f95d8a8eb47047fb0e upstream. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv6/netfilter/ip6_tables.c | 61 ++++++++++++++------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ac964d914eca..c182a3ff9c3d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1435,7 +1435,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ip6t_ip6 *ipv6, unsigned int hookmask, int *size) @@ -1473,8 +1472,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned int *underflows) { struct xt_entry_match *ematch; struct xt_entry_target *t; @@ -1510,8 +1508,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, - &e->ipv6, e->comefrom, &off); + ret = compat_find_calc_match(ematch, &e->ipv6, e->comefrom, + &off); if (ret != 0) goto release_matches; ++j; @@ -1560,7 +1558,7 @@ release_matches: static int compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; @@ -1634,14 +1632,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; @@ -1653,8 +1646,8 @@ translate_compat_table(struct net *net, info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; + size = compatr->size; + info->number = compatr->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -1665,40 +1658,39 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, number); + xt_compat_init_offsets(AF_INET6, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size, + compatr->hook_entry, + compatr->underflow); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(compatr->valid_hooks & (1 << i))) continue; if (info->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, info->hook_entry[i]); goto out_unlock; } if (info->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, info->underflow[i]); goto out_unlock; } } @@ -1708,17 +1700,17 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) { ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); + newinfo, entry1); if (ret != 0) break; } @@ -1728,12 +1720,12 @@ translate_compat_table(struct net *net, goto free_newinfo; ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) + if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) goto free_newinfo; i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); + ret = compat_check_entry(iter1, net, compatr->name); if (ret != 0) break; ++i; @@ -1778,7 +1770,7 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); out: - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); @@ -1821,10 +1813,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; From fbe426f822a42ca6759448185e4933069f46d822 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:33 +0200 Subject: [PATCH 018/178] netfilter: x_tables: xt_compat_match_from_user doesn't need a retval commit 0188346f21e6546498c2a0f84888797ad4063fc5 upstream. Always returned 0. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/arp_tables.c | 17 +++++------------ net/ipv4/netfilter/ip_tables.c | 26 +++++++++----------------- net/ipv6/netfilter/ip6_tables.c | 27 +++++++++------------------ net/netfilter/x_tables.c | 5 ++--- 5 files changed, 26 insertions(+), 51 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8bb770647bd5..9f0d2b52b976 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -427,7 +427,7 @@ extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); -extern int xt_compat_match_from_user(struct xt_entry_match *m, +extern void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); extern int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index bd1a19e40c82..ed22b6066d74 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1279,7 +1279,7 @@ out: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1288,9 +1288,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1311,7 +1310,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int translate_compat_table(struct xt_table_info **pinfo, @@ -1390,16 +1388,11 @@ static int translate_compat_table(struct xt_table_info **pinfo, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index dc0572430516..0f9d1d8458fb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1544,7 +1544,7 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1553,10 +1553,9 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1565,11 +1564,9 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; @@ -1582,7 +1579,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int @@ -1698,16 +1694,12 @@ translate_compat_table(struct net *net, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c182a3ff9c3d..441a4f823a48 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1556,7 +1556,7 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) @@ -1564,10 +1564,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1576,11 +1575,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1592,7 +1589,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } static int compat_check_entry(struct ip6t_entry *e, struct net *net, @@ -1707,17 +1703,12 @@ translate_compat_table(struct net *net, } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; - size = compatr->size; - xt_entry_foreach(iter0, entry0, compatr->size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - newinfo, entry1); - if (ret != 0) - break; - } + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; ret = -ELOOP; if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6ac9fb4f42fc..9cf3039deac2 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -545,8 +545,8 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; @@ -568,7 +568,6 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); From bb5605936e4f62e89e314ad49421f69cf23ebff8 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 19 May 2015 20:55:17 -0400 Subject: [PATCH 019/178] netfilter: ensure number of counters is >0 in do_replace() commit 1086bbe97a074844188c6c988fa0b1a98c3ccbb9 upstream. After improving setsockopt() coverage in trinity, I started triggering vmalloc failures pretty reliably from this code path: warn_alloc_failed+0xe9/0x140 __vmalloc_node_range+0x1be/0x270 vzalloc+0x4b/0x50 __do_replace+0x52/0x260 [ip_tables] do_ipt_set_ctl+0x15d/0x1d0 [ip_tables] nf_setsockopt+0x65/0x90 ip_setsockopt+0x61/0xa0 raw_setsockopt+0x16/0x60 sock_common_setsockopt+0x14/0x20 SyS_setsockopt+0x71/0xd0 It turns out we don't validate that the num_counters field in the struct we pass in from userspace is initialized. The same problem also exists in ebtables, arptables, ipv6, and the compat variants. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/bridge/netfilter/ebtables.c | 4 ++++ net/ipv4/netfilter/arp_tables.c | 6 ++++++ net/ipv4/netfilter/ip_tables.c | 6 ++++++ net/ipv6/netfilter/ip6_tables.c | 6 ++++++ 4 files changed, 22 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 6651a7797d46..8d6094836d0a 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1105,6 +1105,8 @@ static int do_replace(struct net *net, const void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; tmp.name[sizeof(tmp.name) - 1] = 0; @@ -2150,6 +2152,8 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ed22b6066d74..a0030d56cac2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1060,6 +1060,9 @@ static int do_replace(struct net *net, const void __user *user, /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1474,6 +1477,9 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0f9d1d8458fb..e189ad45dfdf 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1246,6 +1246,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1781,6 +1784,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 441a4f823a48..6685d09cdedc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1256,6 +1256,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); @@ -1790,6 +1793,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); From 151cc2f5e35f3babc90a1a3cf23d2ac80c7b5803 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:34 +0200 Subject: [PATCH 020/178] netfilter: x_tables: do compat validation via translate_table commit 09d9686047dbbe1cf4faa558d3ecc4aae2046054 upstream. This looks like refactoring, but its also a bug fix. Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few sanity tests that are done in the normal path. For example, we do not check for underflows and the base chain policies. While its possible to also add such checks to the compat path, its more copy&pastry, for instance we cannot reuse check_underflow() helper as e->target_offset differs in the compat case. Other problem is that it makes auditing for validation errors harder; two places need to be checked and kept in sync. At a high level 32 bit compat works like this: 1- initial pass over blob: validate match/entry offsets, bounds checking lookup all matches and targets do bookkeeping wrt. size delta of 32/64bit structures assign match/target.u.kernel pointer (points at kernel implementation, needed to access ->compatsize etc.) 2- allocate memory according to the total bookkeeping size to contain the translated ruleset 3- second pass over original blob: for each entry, copy the 32bit representation to the newly allocated memory. This also does any special match translations (e.g. adjust 32bit to 64bit longs, etc). 4- check if ruleset is free of loops (chase all jumps) 5-first pass over translated blob: call the checkentry function of all matches and targets. The alternative implemented by this patch is to drop steps 3&4 from the compat process, the translation is changed into an intermediate step rather than a full 1:1 translate_table replacement. In the 2nd pass (step #3), change the 64bit ruleset back to a kernel representation, i.e. put() the kernel pointer and restore ->u.user.name . This gets us a 64bit ruleset that is in the format generated by a 64bit iptables userspace -- we can then use translate_table() to get the 'native' sanity checks. This has two drawbacks: 1. we re-validate all the match and target entry structure sizes even though compat translation is supposed to never generate bogus offsets. 2. we put and then re-lookup each match and target. THe upside is that we get all sanity tests and ruleset validations provided by the normal path and can remove some duplicated compat code. iptables-restore time of autogenerated ruleset with 300k chains of form -A CHAIN0001 -m limit --limit 1/s -j CHAIN0002 -A CHAIN0002 -m limit --limit 1/s -j CHAIN0003 shows no noticeable differences in restore times: old: 0m30.796s new: 0m31.521s 64bit: 0m25.674s Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/ipv4/netfilter/arp_tables.c | 113 +++++------------------ net/ipv4/netfilter/ip_tables.c | 155 +++++++------------------------- net/ipv6/netfilter/ip6_tables.c | 149 +++++------------------------- net/netfilter/x_tables.c | 8 ++ 4 files changed, 86 insertions(+), 339 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a0030d56cac2..28489d97e9c1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1206,19 +1206,17 @@ static inline void compat_release_entry(struct compat_arpt_entry *e) module_put(t->u.kernel.target->me); } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows) + const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || @@ -1263,17 +1261,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (ret) goto release_target; - /* Check hooks & underflows */ - for (h = 0; h < NF_ARP_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; release_target: @@ -1323,7 +1310,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; - struct arpt_entry *iter1; + struct arpt_replace repl; unsigned int size; int ret = 0; @@ -1332,12 +1319,6 @@ static int translate_compat_table(struct xt_table_info **pinfo, size = compatr->size; info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); @@ -1346,9 +1327,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + compatr->size, - compatr->hook_entry, - compatr->underflow); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; @@ -1361,23 +1340,6 @@ static int translate_compat_table(struct xt_table_info **pinfo, goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(compatr->valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, info->hook_entry[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, info->underflow[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) @@ -1394,52 +1356,26 @@ static int translate_compat_table(struct xt_table_info **pinfo, xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); + + /* all module references in entry0 are now gone */ + xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - ret = -ELOOP; - if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) + memcpy(&repl, compatr, sizeof(*compatr)); + + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; + } + + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(newinfo, entry1, &repl); + if (ret) goto free_newinfo; - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = check_target(iter1, compatr->name); - if (ret != 0) - break; - ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1); - } - xt_free_table_info(newinfo); - return ret; - } - - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1447,17 +1383,16 @@ static int translate_compat_table(struct xt_table_info **pinfo, free_newinfo: xt_free_table_info(newinfo); -out: + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(NFPROTO_ARP); - xt_compat_unlock(NFPROTO_ARP); - goto out; } static int compat_do_replace(struct net *net, void __user *user, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e189ad45dfdf..e728884e5c50 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1461,16 +1461,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || @@ -1523,17 +1521,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1576,6 +1563,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; @@ -1584,41 +1572,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, } } -static int -compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) -{ - struct xt_entry_match *ematch; - struct xt_mtchk_param mtpar; - unsigned int j; - int ret = 0; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ip; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV4; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - return ret; -} - static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, @@ -1629,7 +1582,7 @@ translate_compat_table(struct net *net, struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; - struct ipt_entry *iter1; + struct ipt_replace repl; unsigned int size; int ret; @@ -1638,12 +1591,6 @@ translate_compat_table(struct net *net, size = compatr->size; info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); @@ -1652,9 +1599,7 @@ translate_compat_table(struct net *net, xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + compatr->size, - compatr->hook_entry, - compatr->underflow); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; @@ -1667,23 +1612,6 @@ translate_compat_table(struct net *net, goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(compatr->valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, info->hook_entry[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, info->underflow[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) @@ -1691,8 +1619,8 @@ translate_compat_table(struct net *net, newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; @@ -1701,52 +1629,30 @@ translate_compat_table(struct net *net, compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - ret = -ELOOP; - if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) + memcpy(&repl, compatr, sizeof(*compatr)); + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; + } + + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) goto free_newinfo; - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, compatr->name); - if (ret != 0) - break; - ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; - } - - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1754,17 +1660,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET); - xt_compat_unlock(AF_INET); - goto out; } static int diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6685d09cdedc..efbe986a562a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1473,16 +1473,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || @@ -1535,17 +1533,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1594,41 +1581,6 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, } } -static int compat_check_entry(struct ip6t_entry *e, struct net *net, - const char *name) -{ - unsigned int j; - int ret = 0; - struct xt_mtchk_param mtpar; - struct xt_entry_match *ematch; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ipv6; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV6; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - return ret; -} - static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, @@ -1639,7 +1591,7 @@ translate_compat_table(struct net *net, struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; - struct ip6t_entry *iter1; + struct ip6t_replace repl; unsigned int size; int ret = 0; @@ -1648,12 +1600,6 @@ translate_compat_table(struct net *net, size = compatr->size; info->number = compatr->num_entries; - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } - duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); @@ -1662,9 +1608,7 @@ translate_compat_table(struct net *net, xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + compatr->size, - compatr->hook_entry, - compatr->underflow); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; @@ -1677,23 +1621,6 @@ translate_compat_table(struct net *net, goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(compatr->valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, info->hook_entry[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, info->underflow[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) @@ -1701,61 +1628,34 @@ translate_compat_table(struct net *net, newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; + size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); + /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - ret = -ELOOP; - if (!mark_source_chains(newinfo, compatr->valid_hooks, entry1)) + memcpy(&repl, compatr, sizeof(*compatr)); + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; + } + + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) goto free_newinfo; - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, compatr->name); - if (ret != 0) - break; - ++i; - if (strcmp(ip6t_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; - } - - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1763,17 +1663,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET6); - xt_compat_unlock(AF_INET6); - goto out; } static int diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9cf3039deac2..da11978eff5d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -552,6 +552,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; + char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); @@ -565,6 +566,9 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; + strlcpy(name, match->name, sizeof(name)); + module_put(match->me); + strncpy(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; @@ -782,6 +786,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; + char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); @@ -795,6 +800,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; + strlcpy(name, target->name, sizeof(name)); + module_put(target->me); + strncpy(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; From eb4a7d671bb54008039dfd99bc45a39e4f73a516 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Thu, 28 May 2015 10:26:18 +0200 Subject: [PATCH 021/178] Revert "netfilter: ensure number of counters is >0 in do_replace()" commit d26e2c9ffa385dd1b646f43c1397ba12af9ed431 upstream. This partially reverts commit 1086bbe97a07 ("netfilter: ensure number of counters is >0 in do_replace()") in net/bridge/netfilter/ebtables.c. Setting rules with ebtables does not work any more with 1086bbe97a07 place. There is an error message and no rules set in the end. e.g. ~# ebtables -t nat -A POSTROUTING --src 12:34:56:78:9a:bc -j DROP Unable to update the kernel. Two possible causes: 1. Multiple ebtables programs were executing simultaneously. The ebtables userspace tool doesn't by default support multiple ebtables programs running Reverting the ebtables part of 1086bbe97a07 makes this work again. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso Signed-off-by: Willy Tarreau --- net/bridge/netfilter/ebtables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d6094836d0a..6651a7797d46 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1105,8 +1105,6 @@ static int do_replace(struct net *net, const void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; - if (tmp.num_counters == 0) - return -EINVAL; tmp.name[sizeof(tmp.name) - 1] = 0; @@ -2152,8 +2150,6 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; - if (tmp.num_counters == 0) - return -EINVAL; memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); From 0a0523382f1a1e95799311648f0267177d752ea4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 15:37:59 +0200 Subject: [PATCH 022/178] netfilter: x_tables: introduce and use xt_copy_counters_from_user commit 63ecb81aadf1c823c85c70a2bfd1ec9df3341a72 upstream. commit d7591f0c41ce3e67600a982bab6989ef0f07b3ce upstream The three variants use same copy&pasted code, condense this into a helper and use that. Make sure info.name is 0-terminated. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- include/linux/netfilter/x_tables.h | 3 ++ net/ipv4/netfilter/arp_tables.c | 48 ++----------------- net/ipv4/netfilter/ip_tables.c | 48 ++----------------- net/ipv6/netfilter/ip6_tables.c | 49 ++------------------ net/netfilter/x_tables.c | 74 ++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 130 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9f0d2b52b976..547a5846e6ac 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -248,6 +248,9 @@ extern int xt_check_match(struct xt_mtchk_param *, extern int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat); + extern struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 28489d97e9c1..95a5f261fe8a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1103,56 +1103,18 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1160,7 +1122,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e728884e5c50..92c8f2727ee9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1290,56 +1290,18 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1347,7 +1309,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index efbe986a562a..e214222cd06f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1300,56 +1300,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; const void *loc_cpu_entry; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1358,7 +1319,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index da11978eff5d..51c141b09dba 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -771,6 +771,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { From ad0fd1a5f8441837568a0e0c0b3e8ed14ce5969f Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Mon, 8 Jun 2015 14:46:49 +0200 Subject: [PATCH 023/178] perf/x86: Honor the architectural performance monitoring version commit 2c33645d366d13b969d936b68b9f4875b1fdddea upstream. Architectural performance monitoring, version 1, doesn't support fixed counters. Currently, even if a hypervisor advertises support for architectural performance monitoring version 1, perf may still try to use the fixed counters, as the constraints are set up based on the CPU model. This patch ensures that perf honors the architectural performance monitoring version returned by CPUID, and it only uses the fixed counters for version 2 and above. (Some of the ideas in this patch came from Peter Zijlstra.) Signed-off-by: Imre Palik Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Anthony Liguori Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433767609-1039-1-git-send-email-imrep.amz@gmail.com Signed-off-by: Ingo Molnar [wt: FIXED_EVENT_FLAGS was X86_RAW_EVENT_MASK in 3.10] Cc: Kevin Christopher Signed-off-by: Willy Tarreau --- arch/x86/kernel/cpu/perf_event_intel.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ac057583282a..bfc6bb5ac654 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2241,13 +2241,16 @@ __init int intel_pmu_init(void) * counter, so do not extend mask to generic counters */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != X86_RAW_EVENT_MASK - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { + if (c->cmask == X86_RAW_EVENT_MASK + && c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; continue; } - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; + c->idxmsk64 &= + ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); + } } From 55e1f395ceb2d6e26ae8e3699eb756abc394fbbe Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 11 May 2016 16:51:51 +0300 Subject: [PATCH 024/178] perf/x86: Fix undefined shift on 32-bit kernels commit 6d6f2833bfbf296101f9f085e10488aef2601ba5 upstream. Jim reported: UBSAN: Undefined behaviour in arch/x86/events/intel/core.c:3708:12 shift exponent 35 is too large for 32-bit type 'long unsigned int' The use of 'unsigned long' type obviously is not correct here, make it 'unsigned long long' instead. Reported-by: Jim Cromie Signed-off-by: Andrey Ryabinin Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Imre Palik Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 2c33645d366d ("perf/x86: Honor the architectural performance monitoring version") Link: http://lkml.kernel.org/r/1462974711-10037-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar Cc: Kevin Christopher Signed-off-by: Willy Tarreau --- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index bfc6bb5ac654..a18154454e36 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2248,7 +2248,7 @@ __init int intel_pmu_init(void) } c->idxmsk64 &= - ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); c->weight = hweight64(c->idxmsk64); } From 15db6bf1002c974b80cfcb274c21efa06d6d6c9f Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 16 Apr 2015 12:47:35 -0700 Subject: [PATCH 025/178] signal: remove warning about using SI_TKILL in rt_[tg]sigqueueinfo commit 69828dce7af2cb6d08ef5a03de687d422fb7ec1f upstream. Sending SI_TKILL from rt_[tg]sigqueueinfo was deprecated, so now we issue a warning on the first attempt of doing it. We use WARN_ON_ONCE, which is not informative and, what is worse, taints the kernel, making the trinity syscall fuzzer complain false-positively from time to time. It does not look like we need this warning at all, because the behaviour changed quite a long time ago (2.6.39), and if an application relies on the old API, it gets EPERM anyway and can issue a warning by itself. So let us zap the warning in kernel. Signed-off-by: Vladimir Davydov Acked-by: Oleg Nesterov Cc: Richard Weinberger Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Vinson Lee Signed-off-by: Willy Tarreau --- kernel/signal.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 4d1f7fa3138d..7b81c53b0097 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3004,11 +3004,9 @@ static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info) * Nor can they impersonate a kill()/tgkill(), which adds source info. */ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && - (task_pid_vnr(current) != pid)) { - /* We used to allow any < 0 si_code */ - WARN_ON_ONCE(info->si_code < 0); + (task_pid_vnr(current) != pid)) return -EPERM; - } + info->si_signo = sig; /* POSIX.1b doesn't mention process groups. */ @@ -3053,12 +3051,10 @@ static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ - if (((info->si_code >= 0 || info->si_code == SI_TKILL)) && - (task_pid_vnr(current) != pid)) { - /* We used to allow any < 0 si_code */ - WARN_ON_ONCE(info->si_code < 0); + if ((info->si_code >= 0 || info->si_code == SI_TKILL) && + (task_pid_vnr(current) != pid)) return -EPERM; - } + info->si_signo = sig; return do_send_specific(tgid, pid, sig, info); From 6aaf5d4c50f75bfb633e1883d69421ef0593b508 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 15 Jul 2016 14:26:23 -0400 Subject: [PATCH 026/178] PCI/ACPI: Fix _OSC ordering to allow PCIe hotplug use when available commit 3dc48af310709b85d07c8b0d3aa8f1ead02829d3 upstream. This fixes the problem of acpiphp claiming slots that should be managed by pciehp, which may keep ExpressCard slots from working. The acpiphp driver claims PCIe slots unless the BIOS has granted us control of PCIe native hotplug via _OSC. Prior to v3.10, the acpiphp .add method (add_bridge()) was always called *after* we had requested native hotplug control with _OSC. But after 3b63aaa70e ("PCI: acpiphp: Do not use ACPI PCI subdriver mechanism"), which appeared in v3.10, acpiphp initialization is done during the bus scan via the pcibios_add_bus() hook, and this happens *before* we request native hotplug control. Therefore, acpiphp doesn't know yet whether the BIOS will grant control, and it claims slots that we should be handling with native hotplug. This patch requests native hotplug control earlier, so we know whether the BIOS granted it to us before we initialize acpiphp. To avoid reintroducing the ASPM issue fixed by b8178f130e ('Revert "PCI/ACPI: Request _OSC control before scanning PCI root bus"'), we run _OSC earlier but defer the actual ASPM calls until after the bus scan is complete. Tested successfully by myself. [bhelgaas: changelog, mark for stable] Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60736 Signed-off-by: Neil Horman Signed-off-by: Bjorn Helgaas Acked-by: Yinghai Lu CC: stable@vger.kernel.org # v3.10+ CC: Len Brown CC: "Rafael J. Wysocki" [ciwillia@brocade.com: backported to 3.10: adjusted context] Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- drivers/acpi/pci_root.c | 67 +++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index a02a91cd1de4..c5e3dd93865a 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -385,6 +385,7 @@ static int acpi_pci_root_add(struct acpi_device *device, int result; struct acpi_pci_root *root; u32 flags, base_flags; + bool no_aspm = false, clear_aspm = false; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -445,31 +446,10 @@ static int acpi_pci_root_add(struct acpi_device *device, flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; acpi_pci_osc_support(root, flags); - /* - * TBD: Need PCI interface for enumeration/configuration of roots. - */ - mutex_lock(&acpi_pci_root_lock); list_add_tail(&root->node, &acpi_pci_roots); mutex_unlock(&acpi_pci_root_lock); - /* - * Scan the Root Bridge - * -------------------- - * Must do this prior to any attempt to bind the root device, as the - * PCI namespace does not get created until this call is made (and - * thus the root bridge's pci_dev does not exist). - */ - root->bus = pci_acpi_scan_root(root); - if (!root->bus) { - printk(KERN_ERR PREFIX - "Bus %04x:%02x not present in PCI namespace\n", - root->segment, (unsigned int)root->secondary.start); - result = -ENODEV; - goto out_del_root; - } - - /* Indicate support for various _OSC capabilities. */ if (pci_ext_cfg_avail()) flags |= OSC_EXT_PCI_CONFIG_SUPPORT; if (pcie_aspm_support_enabled()) { @@ -483,7 +463,7 @@ static int acpi_pci_root_add(struct acpi_device *device, if (ACPI_FAILURE(status)) { dev_info(&device->dev, "ACPI _OSC support " "notification failed, disabling PCIe ASPM\n"); - pcie_no_aspm(); + no_aspm = true; flags = base_flags; } } @@ -515,7 +495,7 @@ static int acpi_pci_root_add(struct acpi_device *device, * We have ASPM control, but the FADT indicates * that it's unsupported. Clear it. */ - pcie_clear_aspm(root->bus); + clear_aspm = true; } } else { dev_info(&device->dev, @@ -524,7 +504,14 @@ static int acpi_pci_root_add(struct acpi_device *device, acpi_format_exception(status), flags); pr_info("ACPI _OSC control for PCIe not granted, " "disabling ASPM\n"); - pcie_no_aspm(); + /* + * We want to disable ASPM here, but aspm_disabled + * needs to remain in its state from boot so that we + * properly handle PCIe 1.1 devices. So we set this + * flag here, to defer the action until after the ACPI + * root scan. + */ + no_aspm = true; } } else { dev_info(&device->dev, @@ -532,6 +519,33 @@ static int acpi_pci_root_add(struct acpi_device *device, "(_OSC support mask: 0x%02x)\n", flags); } + /* + * TBD: Need PCI interface for enumeration/configuration of roots. + */ + + /* + * Scan the Root Bridge + * -------------------- + * Must do this prior to any attempt to bind the root device, as the + * PCI namespace does not get created until this call is made (and + * thus the root bridge's pci_dev does not exist). + */ + root->bus = pci_acpi_scan_root(root); + if (!root->bus) { + dev_err(&device->dev, + "Bus %04x:%02x not present in PCI namespace\n", + root->segment, (unsigned int)root->secondary.start); + result = -ENODEV; + goto end; + } + + if (clear_aspm) { + dev_info(&device->dev, "Disabling ASPM (FADT indicates it is unsupported)\n"); + pcie_clear_aspm(root->bus); + } + if (no_aspm) + pcie_no_aspm(); + pci_acpi_add_bus_pm_notifier(device, root->bus); if (device->wakeup.flags.run_wake) device_set_run_wake(root->bus->bridge, true); @@ -548,11 +562,6 @@ static int acpi_pci_root_add(struct acpi_device *device, pci_bus_add_devices(root->bus); return 1; -out_del_root: - mutex_lock(&acpi_pci_root_lock); - list_del(&root->node); - mutex_unlock(&acpi_pci_root_lock); - end: kfree(root); return result; From 98f57e42cab062608cf3dce2b8eecbb2a0780ac4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Jul 2016 14:26:24 -0400 Subject: [PATCH 027/178] udp: properly support MSG_PEEK with truncated buffers commit 197c949e7798fbf28cfadc69d9ca0c2abbf93191 upstream. Backport of this upstream commit into stable kernels : 89c22d8c3b27 ("net: Fix skb csum races when peeking") exposed a bug in udp stack vs MSG_PEEK support, when user provides a buffer smaller than skb payload. In this case, skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); returns -EFAULT. This bug does not happen in upstream kernels since Al Viro did a great job to replace this into : skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); This variant is safe vs short buffers. For the time being, instead reverting Herbert Xu patch and add back skb->ip_summed invalid changes, simply store the result of udp_lib_checksum_complete() so that we avoid computing the checksum a second time, and avoid the problematic skb_copy_and_csum_datagram_iovec() call. This patch can be applied on recent kernels as it avoids a double checksumming, then backported to stable kernels as a bug fix. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller [ luis: backported to 3.16: adjusted context ] Signed-off-by: Luis Henriques Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- net/ipv4/udp.c | 6 ++++-- net/ipv6/udp.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 63b536bbf0b0..68174e4d88c7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1208,6 +1208,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; bool slow; if (flags & MSG_ERRQUEUE) @@ -1233,11 +1234,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, copied); else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3046d0244393..d234e6f80570 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -370,6 +370,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; int is_udp4; bool slow; @@ -401,11 +402,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, copied); else { From bbb094201689b833910a5753fad2f46be2c78b67 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 15 Jul 2016 14:26:25 -0400 Subject: [PATCH 028/178] USB: fix invalid memory access in hub_activate() commit e50293ef9775c5f1cf3fcc093037dd6a8c5684ea upstream. Commit 8520f38099cc ("USB: change hub initialization sleeps to delayed_work") changed the hub_activate() routine to make part of it run in a workqueue. However, the commit failed to take a reference to the usb_hub structure or to lock the hub interface while doing so. As a result, if a hub is plugged in and quickly unplugged before the work routine can run, the routine will try to access memory that has been deallocated. Or, if the hub is unplugged while the routine is running, the memory may be deallocated while it is in active use. This patch fixes the problem by taking a reference to the usb_hub at the start of hub_activate() and releasing it at the end (when the work is finished), and by locking the hub interface while the work routine is running. It also adds a check at the start of the routine to see if the hub has already been disconnected, in which nothing should be done. CVE-2015-8816 Signed-off-by: Alan Stern Reported-by: Alexandru Cornea Tested-by: Alexandru Cornea Fixes: 8520f38099cc ("USB: change hub initialization sleeps to delayed_work") Signed-off-by: Greg Kroah-Hartman [ luis: backported to 3.16: - Added forward declaration of hub_release() which mainline had with commit 32a6958998c5 ("usb: hub: convert khubd into workqueue") ] Signed-off-by: Luis Henriques Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- drivers/usb/core/hub.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 8eb2de6beee4..4e5156d212dd 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -113,6 +113,7 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 +static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static inline char *portspeed(struct usb_hub *hub, int portstatus) @@ -1024,10 +1025,20 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) unsigned delay; /* Continue a partial initialization */ - if (type == HUB_INIT2) - goto init2; - if (type == HUB_INIT3) + if (type == HUB_INIT2 || type == HUB_INIT3) { + device_lock(hub->intfdev); + + /* Was the hub disconnected while we were waiting? */ + if (hub->disconnected) { + device_unlock(hub->intfdev); + kref_put(&hub->kref, hub_release); + return; + } + if (type == HUB_INIT2) + goto init2; goto init3; + } + kref_get(&hub->kref); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1224,6 +1235,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func3); schedule_delayed_work(&hub->init_work, msecs_to_jiffies(delay)); + device_unlock(hub->intfdev); return; /* Continues at init3: below */ } else { msleep(delay); @@ -1244,6 +1256,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Allow autosuspend if it was suppressed */ if (type <= HUB_INIT3) usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); + + if (type == HUB_INIT2 || type == HUB_INIT3) + device_unlock(hub->intfdev); + + kref_put(&hub->kref, hub_release); } /* Implement the continuations for the delays above */ From ba3904ee86cb7072c2435883421b165dc1684bce Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Jul 2016 15:08:18 -0400 Subject: [PATCH 029/178] USB: usbfs: fix potential infoleak in devio commit 681fef8380eb818c0b845fca5d2ab1dcbab114ee upstream. The stack object "ci" has a total size of 8 bytes. Its last 3 bytes are padding bytes which are not initialized and leaked to userland via "copy_to_user". CVE-2016-4482 Signed-off-by: Kangjie Lu Signed-off-by: Greg Kroah-Hartman [ciwillia@brocade.com: backported to 3.10: adjusted context] Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- drivers/usb/core/devio.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 62e532fb82ad..cfce807531f6 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1106,10 +1106,11 @@ static int proc_getdriver(struct dev_state *ps, void __user *arg) static int proc_connectinfo(struct dev_state *ps, void __user *arg) { - struct usbdevfs_connectinfo ci = { - .devnum = ps->dev->devnum, - .slow = ps->dev->speed == USB_SPEED_LOW - }; + struct usbdevfs_connectinfo ci; + + memset(&ci, 0, sizeof(ci)); + ci.devnum = ps->dev->devnum; + ci.slow = ps->dev->speed == USB_SPEED_LOW; if (copy_to_user(arg, &ci, sizeof(ci))) return -EFAULT; From 903c5a46a861244e7fc5ddf6101963b6a4c9b88f Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 21 Aug 2016 10:47:12 +0200 Subject: [PATCH 030/178] USB: fix up faulty backports Ben Hutchings reported that two patches were incorrectly backported to 3.10 : - ddbe1fca0bcb ("USB: Add device quirk for ASUS T100 Base Station keyboard") - ad87e03213b5 ("USB: add quirk for devices with broken LPM") These two patches introduce quirks which must be in usb_quirk_list and not in usb_interface_quirk_list. These last one must only contain the Logitech UVC camera. Reported-by: Ben Hutchings Signed-off-by: Willy Tarreau --- drivers/usb/core/quirks.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 94e9cddc05c1..aa27ec1f4813 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -170,14 +170,6 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, - { } /* terminating entry must be last */ -}; - -static const struct usb_device_id usb_interface_quirk_list[] = { - /* Logitech UVC Cameras */ - { USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0), - .driver_info = USB_QUIRK_RESET_RESUME }, - /* ASUS Base Station(T100) */ { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, @@ -191,6 +183,14 @@ static const struct usb_device_id usb_interface_quirk_list[] = { { } /* terminating entry must be last */ }; +static const struct usb_device_id usb_interface_quirk_list[] = { + /* Logitech UVC Cameras */ + { USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0), + .driver_info = USB_QUIRK_RESET_RESUME }, + + { } /* terminating entry must be last */ +}; + static bool usb_match_any_interface(struct usb_device *udev, const struct usb_device_id *id) { From 122ac0bd1d6385759bd8a8b052f5b4e3afba82af Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 23 Jun 2016 14:54:37 -0400 Subject: [PATCH 031/178] USB: EHCI: declare hostpc register as zero-length array commit 7e8b3dfef16375dbfeb1f36a83eb9f27117c51fd upstream. The HOSTPC extension registers found in some EHCI implementations form a variable-length array, with one element for each port. Therefore the hostpc field in struct ehci_regs should be declared as a zero-length array, not a single-element array. This fixes a problem reported by UBSAN. Signed-off-by: Alan Stern Reported-by: Wilfried Klaebe Tested-by: Wilfried Klaebe CC: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- include/linux/usb/ehci_def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index daec99af5d54..1c88b177cb9c 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -178,11 +178,11 @@ struct ehci_regs { * PORTSCx */ /* HOSTPC: offset 0x84 */ - u32 hostpc[1]; /* HOSTPC extension */ + u32 hostpc[0]; /* HOSTPC extension */ #define HOSTPC_PHCD (1<<22) /* Phy clock disable */ #define HOSTPC_PSPD (3<<25) /* Port speed detection */ - u32 reserved5[16]; + u32 reserved5[17]; /* USBMODE_EX: offset 0xc8 */ u32 usbmode_ex; /* USB Device mode extension */ From 1883edde34d586fca99033e88f074239041cf7b6 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 6 Jun 2016 12:38:17 +0200 Subject: [PATCH 032/178] USB: serial: option: add support for Telit LE910 PID 0x1206 commit 3c0415fa08548e3bc63ef741762664497ab187ed upstream. This patch adds support for 0x1206 PID of Telit LE910. Since the interfaces positions are the same than the ones for 0x1043 PID of Telit LE922, telit_le922_blacklist_usbcfg3 is used. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Willy Tarreau --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bcb6f5c2bae4..006a2a721edf 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -274,6 +274,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 +#define TELIT_PRODUCT_LE910_USBCFG4 0x1206 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 @@ -1206,6 +1207,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), .driver_info = (kernel_ulong_t)&telit_le920_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ From 788d2a8d062f123a1914f51606e4f2f9be149985 Mon Sep 17 00:00:00 2001 From: Andrew Goodbody Date: Tue, 31 May 2016 10:05:27 -0500 Subject: [PATCH 033/178] usb: musb: Stop bulk endpoint while queue is rotated commit 7b2c17f829545df27a910e8d82e133c21c9a8c9c upstream. Ensure that the endpoint is stopped by clearing REQPKT before clearing DATAERR_NAKTIMEOUT before rotating the queue on the dedicated bulk endpoint. This addresses an issue where a race could result in the endpoint receiving data before it was reprogrammed resulting in a warning about such data from musb_rx_reinit before it was thrown away. The data thrown away was a valid packet that had been correctly ACKed which meant the host and device got out of sync. Signed-off-by: Andrew Goodbody Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/usb/musb/musb_host.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 9d3044bdebe5..cdadbe686a6c 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -948,9 +948,15 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep, if (is_in) { dma = is_dma_capable() ? ep->rx_channel : NULL; - /* clear nak timeout bit */ + /* + * Need to stop the transaction by clearing REQPKT first + * then the NAK Timeout bit ref MUSBMHDRC USB 2.0 HIGH-SPEED + * DUAL-ROLE CONTROLLER Programmer's Guide, section 9.2.2 + */ rx_csr = musb_readw(epio, MUSB_RXCSR); rx_csr |= MUSB_RXCSR_H_WZC_BITS; + rx_csr &= ~MUSB_RXCSR_H_REQPKT; + musb_writew(epio, MUSB_RXCSR, rx_csr); rx_csr &= ~MUSB_RXCSR_DATAERROR; musb_writew(epio, MUSB_RXCSR, rx_csr); From 664133ce7f5392e75d59c81da1c4b4069a68d5c5 Mon Sep 17 00:00:00 2001 From: Andrew Goodbody Date: Tue, 31 May 2016 10:05:26 -0500 Subject: [PATCH 034/178] usb: musb: Ensure rx reinit occurs for shared_fifo endpoints commit f3eec0cf784e0d6c47822ca6b66df3d5812af7e6 upstream. shared_fifo endpoints would only get a previous tx state cleared out, the rx state was only cleared for non shared_fifo endpoints Change this so that the rx state is cleared for all endpoints. This addresses an issue that resulted in rx packets being dropped silently. Signed-off-by: Andrew Goodbody Cc: stable@vger.kernel.org Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/usb/musb/musb_host.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index cdadbe686a6c..c6cc5201665a 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -581,14 +581,13 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, struct musb_hw_ep *ep) musb_writew(ep->regs, MUSB_TXCSR, 0); /* scrub all previous state, clearing toggle */ - } else { - csr = musb_readw(ep->regs, MUSB_RXCSR); - if (csr & MUSB_RXCSR_RXPKTRDY) - WARNING("rx%d, packet/%d ready?\n", ep->epnum, - musb_readw(ep->regs, MUSB_RXCOUNT)); - - musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG); } + csr = musb_readw(ep->regs, MUSB_RXCSR); + if (csr & MUSB_RXCSR_RXPKTRDY) + WARNING("rx%d, packet/%d ready?\n", ep->epnum, + musb_readw(ep->regs, MUSB_RXCOUNT)); + + musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG); /* target addr and (for multipoint) hub addr/port */ if (musb->is_multipoint) { From a2fe08508d196b95e72576beda3017cfbb135176 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 8 Jun 2016 16:32:50 +0900 Subject: [PATCH 035/178] usb: renesas_usbhs: protect the CFIFOSEL setting in usbhsg_ep_enable() commit 15e4292a2d21e9997fdb2b8c014cc461b3f268f0 upstream. This patch fixes an issue that the CFIFOSEL register value is possible to be changed by usbhsg_ep_enable() wrongly. And then, a data transfer using CFIFO may not work correctly. For example: # modprobe g_multi file=usb-storage.bin # ifconfig usb0 192.168.1.1 up (During the USB host is sending file to the mass storage) # ifconfig usb0 down In this case, since the u_ether.c may call usb_ep_enable() in eth_stop(), if the renesas_usbhs driver is also using CFIFO for mass storage, the mass storage may not work correctly. So, this patch adds usbhs_lock() and usbhs_unlock() calling in usbhsg_ep_enable() to protect CFIFOSEL register. This is because: - CFIFOSEL.CURPIPE = 0 is also needed for the pipe configuration - The CFIFOSEL (fifo->sel) is already protected by usbhs_lock() Fixes: 97664a207bc2 ("usb: renesas_usbhs: shrink spin lock area") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi Signed-off-by: Willy Tarreau --- drivers/usb/renesas_usbhs/mod_gadget.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index ed4949faa70d..64223a923932 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -558,6 +558,9 @@ static int usbhsg_ep_enable(struct usb_ep *ep, struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv); struct usbhs_pipe *pipe; int ret = -EIO; + unsigned long flags; + + usbhs_lock(priv, flags); /* * if it already have pipe, @@ -566,7 +569,8 @@ static int usbhsg_ep_enable(struct usb_ep *ep, if (uep->pipe) { usbhs_pipe_clear(uep->pipe); usbhs_pipe_sequence_data0(uep->pipe); - return 0; + ret = 0; + goto usbhsg_ep_enable_end; } pipe = usbhs_pipe_malloc(priv, @@ -594,6 +598,9 @@ static int usbhsg_ep_enable(struct usb_ep *ep, ret = 0; } +usbhsg_ep_enable_end: + usbhs_unlock(priv, flags); + return ret; } From b1b4becf713431393006d95bbba9b6b3586255f0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 15 Jul 2016 14:26:26 -0400 Subject: [PATCH 036/178] x86/mm: Add barriers and document switch_mm()-vs-flush synchronization commit 71b3c126e61177eb693423f2e18a1914205b165e upstream. When switch_mm() activates a new PGD, it also sets a bit that tells other CPUs that the PGD is in use so that TLB flush IPIs will be sent. In order for that to work correctly, the bit needs to be visible prior to loading the PGD and therefore starting to fill the local TLB. Document all the barriers that make this work correctly and add a couple that were missing. CVE-2016-2069 Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar [ luis: backported to 3.16: - dropped N/A comment in flush_tlb_mm_range() - adjusted context ] Signed-off-by: Luis Henriques [ciwillia@brocade.com: backported to 3.10: adjusted context] Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- arch/x86/include/asm/mmu_context.h | 32 +++++++++++++++++++++++++++++- arch/x86/mm/tlb.c | 24 +++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index be12c534fd59..c0d2f6b668ec 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -42,7 +42,32 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs much + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. This barrier synchronizes with + * remote TLB flushers. Fortunately, load_cr3 is + * serializing and thus acts as a full barrier. + * + */ load_cr3(next->pgd); /* Stop flush ipis for the previous mm */ @@ -65,10 +90,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, this is a barrier that forces + * TLB repopulation to be ordered after the + * store to mm_cpumask. */ load_cr3(next->pgd); load_LDT_nolock(&next->context); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 282375f13c7e..c26b610a604d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -149,7 +149,9 @@ void flush_tlb_current_task(void) preempt_disable(); + /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); preempt_enable(); @@ -188,11 +190,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned act_entries, tlb_entries = 0; preempt_disable(); - if (current->active_mm != mm) + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); + goto flush_all; + } if (!current->mm) { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + goto flush_all; } @@ -242,10 +252,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) preempt_disable(); if (current->active_mm == mm) { - if (current->mm) + if (current->mm) { + /* + * Implicit full barrier (INVLPG) that synchronizes + * with switch_mm. + */ __flush_tlb_one(start); - else + } else { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) From f474c525b4f412522cd092b6c8bffb6a0fd9a4de Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 15 Jul 2016 14:26:27 -0400 Subject: [PATCH 037/178] pipe: limit the per-user amount of pages allocated in pipes commit 759c01142a5d0f364a462346168a56de28a80f52 upstream. On no-so-small systems, it is possible for a single process to cause an OOM condition by filling large pipes with data that are never read. A typical process filling 4000 pipes with 1 MB of data will use 4 GB of memory. On small systems it may be tricky to set the pipe max size to prevent this from happening. This patch makes it possible to enforce a per-user soft limit above which new pipes will be limited to a single page, effectively limiting them to 4 kB each, as well as a hard limit above which no new pipes may be created for this user. This has the effect of protecting the system against memory abuse without hurting other users, and still allowing pipes to work correctly though with less data at once. The limit are controlled by two new sysctls : pipe-user-pages-soft, and pipe-user-pages-hard. Both may be disabled by setting them to zero. The default soft limit allows the default number of FDs per process (1024) to create pipes of the default size (64kB), thus reaching a limit of 64MB before starting to create only smaller pipes. With 256 processes limited to 1024 FDs each, this results in 1024*64kB + (256*1024 - 1024) * 4kB = 1084 MB of memory allocated for a user. The hard limit is disabled by default to avoid breaking existing applications that make intensive use of pipes (eg: for splicing). CVE-2016-2847 Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Al Viro Signed-off-by: Luis Henriques Signed-off-by: Chas Williams <3chas3@gmail.com> --- Documentation/sysctl/fs.txt | 23 ++++++++++++++++++ fs/pipe.c | 47 +++++++++++++++++++++++++++++++++++-- include/linux/pipe_fs_i.h | 4 ++++ include/linux/sched.h | 1 + kernel/sysctl.c | 14 +++++++++++ 5 files changed, 87 insertions(+), 2 deletions(-) diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 88152f214f48..302b5ed616a6 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs: - nr_open - overflowuid - overflowgid +- pipe-user-pages-hard +- pipe-user-pages-soft - protected_hardlinks - protected_symlinks - suid_dumpable @@ -159,6 +161,27 @@ The default is 65534. ============================================================== +pipe-user-pages-hard: + +Maximum total number of pages a non-privileged user may allocate for pipes. +Once this limit is reached, no new pipes may be allocated until usage goes +below the limit again. When set to 0, no limit is applied, which is the default +setting. + +============================================================== + +pipe-user-pages-soft: + +Maximum total number of pages a non-privileged user may allocate for pipes +before the pipe size gets limited to a single page. Once this limit is reached, +new pipes will be limited to a single page in size for this user in order to +limit total memory usage, and trying to increase them using fcntl() will be +denied until usage goes below the limit again. The default value allows to +allocate up to 1024 pipes at their default size. When set to 0, no limit is +applied. + +============================================================== + protected_hardlinks: A long-standing class of security issues is the hardlink-based diff --git a/fs/pipe.c b/fs/pipe.c index 50267e6ba688..c281867c453e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -39,6 +39,12 @@ unsigned int pipe_max_size = 1048576; */ unsigned int pipe_min_size = PAGE_SIZE; +/* Maximum allocatable pages per user. Hard limit is unset by default, soft + * matches default values. + */ +unsigned long pipe_user_pages_hard; +unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; + /* * We use a start+len construction, which provides full use of the * allocated memory. @@ -794,20 +800,49 @@ pipe_fasync(int fd, struct file *filp, int on) return retval; } +static void account_pipe_buffers(struct pipe_inode_info *pipe, + unsigned long old, unsigned long new) +{ + atomic_long_add(new - old, &pipe->user->pipe_bufs); +} + +static bool too_many_pipe_buffers_soft(struct user_struct *user) +{ + return pipe_user_pages_soft && + atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft; +} + +static bool too_many_pipe_buffers_hard(struct user_struct *user) +{ + return pipe_user_pages_hard && + atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard; +} + struct pipe_inode_info *alloc_pipe_info(void) { struct pipe_inode_info *pipe; pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); if (pipe) { - pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); + unsigned long pipe_bufs = PIPE_DEF_BUFFERS; + struct user_struct *user = get_current_user(); + + if (!too_many_pipe_buffers_hard(user)) { + if (too_many_pipe_buffers_soft(user)) + pipe_bufs = 1; + pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL); + } + if (pipe->bufs) { init_waitqueue_head(&pipe->wait); pipe->r_counter = pipe->w_counter = 1; - pipe->buffers = PIPE_DEF_BUFFERS; + pipe->buffers = pipe_bufs; + pipe->user = user; + account_pipe_buffers(pipe, 0, pipe_bufs); mutex_init(&pipe->mutex); return pipe; } + free_uid(user); kfree(pipe); } @@ -818,6 +853,8 @@ void free_pipe_info(struct pipe_inode_info *pipe) { int i; + account_pipe_buffers(pipe, pipe->buffers, 0); + free_uid(pipe->user); for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) @@ -1208,6 +1245,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer)); } + account_pipe_buffers(pipe, pipe->buffers, nr_pages); pipe->curbuf = 0; kfree(pipe->bufs); pipe->bufs = bufs; @@ -1279,6 +1317,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { ret = -EPERM; goto out; + } else if ((too_many_pipe_buffers_hard(pipe->user) || + too_many_pipe_buffers_soft(pipe->user)) && + !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto out; } ret = pipe_set_size(pipe, nr_pages); break; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ab5752692113..b3374f63bc36 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -42,6 +42,7 @@ struct pipe_buffer { * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers + * @user: the user who created this pipe **/ struct pipe_inode_info { struct mutex mutex; @@ -57,6 +58,7 @@ struct pipe_inode_info { struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; + struct user_struct *user; }; /* @@ -140,6 +142,8 @@ void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); extern unsigned int pipe_max_size, pipe_min_size; +extern unsigned long pipe_user_pages_hard; +extern unsigned long pipe_user_pages_soft; int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); diff --git a/include/linux/sched.h b/include/linux/sched.h index 4781332f2e11..7728941e7ddc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -671,6 +671,7 @@ struct user_struct { #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ unsigned long unix_inflight; /* How many files in flight in unix sockets */ + atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9469f4c61a30..4fd49fe1046d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1632,6 +1632,20 @@ static struct ctl_table fs_table[] = { .proc_handler = &pipe_proc_fn, .extra1 = &pipe_min_size, }, + { + .procname = "pipe-user-pages-hard", + .data = &pipe_user_pages_hard, + .maxlen = sizeof(pipe_user_pages_hard), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "pipe-user-pages-soft", + .data = &pipe_user_pages_soft, + .maxlen = sizeof(pipe_user_pages_soft), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, { } }; From 82365cf607366c401a29bd19a4b0fb30783b1691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=83=C2=B8rn=20Mork?= Date: Fri, 15 Jul 2016 15:08:16 -0400 Subject: [PATCH 038/178] cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4d06dd537f95683aba3651098ae288b7cbff8274 upstream. usbnet_link_change will call schedule_work and should be avoided if bind is failing. Otherwise we will end up with scheduled work referring to a netdev which has gone away. Instead of making the call conditional, we can just defer it to usbnet_probe, using the driver_info flag made for this purpose. CVE-2016-3951 Fixes: 8a34b0ae8778 ("usbnet: cdc_ncm: apply usbnet_link_change") Reported-by: Andrey Konovalov Suggested-by: Linus Torvalds Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller [ciwillia@brocade.com: backported to 3.10: adjusted context] Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- drivers/net/usb/cdc_ncm.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 74581cbcafa7..6ee9665e20b2 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -598,24 +598,13 @@ EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting); static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) { - int ret; - /* MBIM backwards compatible function? */ cdc_ncm_select_altsetting(dev, intf); if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) return -ENODEV; /* NCM data altsetting is always 1 */ - ret = cdc_ncm_bind_common(dev, intf, 1); - - /* - * We should get an event when network connection is "connected" or - * "disconnected". Set network connection in "disconnected" state - * (carrier is OFF) during attach, so the IP network stack does not - * start IPv6 negotiation and more. - */ - usbnet_link_change(dev, 0, 0); - return ret; + return cdc_ncm_bind_common(dev, intf, 1); } static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max) @@ -1161,7 +1150,8 @@ static void cdc_ncm_disconnect(struct usb_interface *intf) static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", - .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET, + .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET + | FLAG_LINK_INTR, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, @@ -1175,7 +1165,7 @@ static const struct driver_info cdc_ncm_info = { static const struct driver_info wwan_info = { .description = "Mobile Broadband Network Device", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET - | FLAG_WWAN, + | FLAG_LINK_INTR | FLAG_WWAN, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, @@ -1189,7 +1179,7 @@ static const struct driver_info wwan_info = { static const struct driver_info wwan_noarp_info = { .description = "Mobile Broadband Network Device (NO ARP)", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET - | FLAG_WWAN | FLAG_NOARP, + | FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, From 273e07f96b37bb9ee3594ce2dfe07b2a9608f941 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Jul 2016 15:08:17 -0400 Subject: [PATCH 039/178] KEYS: potential uninitialized variable commit 38327424b40bcebe2de92d07312c89360ac9229a upstream. If __key_link_begin() failed then "edit" would be uninitialized. I've added a check to fix that. This allows a random user to crash the kernel, though it's quite difficult to achieve. There are three ways it can be done as the user would have to cause an error to occur in __key_link(): (1) Cause the kernel to run out of memory. In practice, this is difficult to achieve without ENOMEM cropping up elsewhere and aborting the attempt. (2) Revoke the destination keyring between the keyring ID being looked up and it being tested for revocation. In practice, this is difficult to time correctly because the KEYCTL_REJECT function can only be used from the request-key upcall process. Further, users can only make use of what's in /sbin/request-key.conf, though this does including a rejection debugging test - which means that the destination keyring has to be the caller's session keyring in practice. (3) Have just enough key quota available to create a key, a new session keyring for the upcall and a link in the session keyring, but not then sufficient quota to create a link in the nominated destination keyring so that it fails with EDQUOT. The bug can be triggered using option (3) above using something like the following: echo 80 >/proc/sys/kernel/keys/root_maxbytes keyctl request2 user debug:fred negate @t The above sets the quota to something much lower (80) to make the bug easier to trigger, but this is dependent on the system. Note also that the name of the keyring created contains a random number that may be between 1 and 10 characters in size, so may throw the test off by changing the amount of quota used. Assuming the failure occurs, something like the following will be seen: kfree_debugcheck: out of range ptr 6b6b6b6b6b6b6b68h ------------[ cut here ]------------ kernel BUG at ../mm/slab.c:2821! ... RIP: 0010:[] kfree_debugcheck+0x20/0x25 RSP: 0018:ffff8804014a7de8 EFLAGS: 00010092 RAX: 0000000000000034 RBX: 6b6b6b6b6b6b6b68 RCX: 0000000000000000 RDX: 0000000000040001 RSI: 00000000000000f6 RDI: 0000000000000300 RBP: ffff8804014a7df0 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8804014a7e68 R11: 0000000000000054 R12: 0000000000000202 R13: ffffffff81318a66 R14: 0000000000000000 R15: 0000000000000001 ... Call Trace: kfree+0xde/0x1bc assoc_array_cancel_edit+0x1f/0x36 __key_link_end+0x55/0x63 key_reject_and_link+0x124/0x155 keyctl_reject_key+0xb6/0xe0 keyctl_negate_key+0x10/0x12 SyS_keyctl+0x9f/0xe7 do_syscall_64+0x63/0x13a entry_SYSCALL64_slow_path+0x25/0x25 CVE-2016-4470 Fixes: f70e2e06196a ('KEYS: Do preallocation for __key_link()') Signed-off-by: Dan Carpenter Signed-off-by: David Howells cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds [ciwillia@brocade.com: backported to 3.10: adjusted context] Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- security/keys/key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/key.c b/security/keys/key.c index 8fb7c7bd4657..6595b2dd89fe 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -580,7 +580,7 @@ int key_reject_and_link(struct key *key, mutex_unlock(&key_construction_mutex); - if (keyring) + if (keyring && link_ret == 0) __key_link_end(keyring, key->type, prealloc); /* wake up anyone waiting for a key to be constructed */ From af110cc4b24250faafd4f3b9879cf51e350d7799 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 15 Jul 2016 15:08:19 -0400 Subject: [PATCH 040/178] mm: migrate dirty page without clear_page_dirty_for_io etc commit 42cb14b110a5698ccf26ce59c4441722605a3743 upstream. clear_page_dirty_for_io() has accumulated writeback and memcg subtleties since v2.6.16 first introduced page migration; and the set_page_dirty() which completed its migration of PageDirty, later had to be moderated to __set_page_dirty_nobuffers(); then PageSwapBacked had to skip that too. No actual problems seen with this procedure recently, but if you look into what the clear_page_dirty_for_io(page)+set_page_dirty(newpage) is actually achieving, it turns out to be nothing more than moving the PageDirty flag, and its NR_FILE_DIRTY stat from one zone to another. It would be good to avoid a pile of irrelevant decrementations and incrementations, and improper event counting, and unnecessary descent of the radix_tree under tree_lock (to set the PAGECACHE_TAG_DIRTY which radix_tree_replace_slot() left in place anyway). Do the NR_FILE_DIRTY movement, like the other stats movements, while interrupts still disabled in migrate_page_move_mapping(); and don't even bother if the zone is the same. Do the PageDirty movement there under tree_lock too, where old page is frozen and newpage not yet visible: bearing in mind that as soon as newpage becomes visible in radix_tree, an un-page-locked set_page_dirty() might interfere (or perhaps that's just not possible: anything doing so should already hold an additional reference to the old page, preventing its migration; but play safe). But we do still need to transfer PageDirty in migrate_page_copy(), for those who don't go the mapping route through migrate_page_move_mapping(). CVE-2016-3070 Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc: "Kirill A. Shutemov" Cc: Rik van Riel Cc: Vlastimil Babka Cc: Davidlohr Bueso Cc: Oleg Nesterov Cc: Sasha Levin Cc: Dmitry Vyukov Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [ciwillia@brocade.com: backported to 3.10: adjusted context] Signed-off-by: Charles (Chas) Williams Signed-off-by: Willy Tarreau --- mm/migrate.c | 51 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index a88c12f2235d..a61500f2671f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -311,6 +312,8 @@ static int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, struct buffer_head *head, enum migrate_mode mode) { + struct zone *oldzone, *newzone; + int dirty; int expected_count = 0; void **pslot; @@ -321,6 +324,9 @@ static int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } + oldzone = page_zone(page); + newzone = page_zone(newpage); + spin_lock_irq(&mapping->tree_lock); pslot = radix_tree_lookup_slot(&mapping->page_tree, @@ -361,6 +367,13 @@ static int migrate_page_move_mapping(struct address_space *mapping, set_page_private(newpage, page_private(page)); } + /* Move dirty while page refs frozen and newpage not yet exposed */ + dirty = PageDirty(page); + if (dirty) { + ClearPageDirty(page); + SetPageDirty(newpage); + } + radix_tree_replace_slot(pslot, newpage); /* @@ -370,6 +383,9 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ page_unfreeze_refs(page, expected_count - 1); + spin_unlock(&mapping->tree_lock); + /* Leave irq disabled to prevent preemption while updating stats */ + /* * If moved to a different zone then also account * the page for that zone. Other VM counters will be @@ -380,13 +396,19 @@ static int migrate_page_move_mapping(struct address_space *mapping, * via NR_FILE_PAGES and NR_ANON_PAGES if they * are mapped to swap space. */ - __dec_zone_page_state(page, NR_FILE_PAGES); - __inc_zone_page_state(newpage, NR_FILE_PAGES); - if (!PageSwapCache(page) && PageSwapBacked(page)) { - __dec_zone_page_state(page, NR_SHMEM); - __inc_zone_page_state(newpage, NR_SHMEM); + if (newzone != oldzone) { + __dec_zone_state(oldzone, NR_FILE_PAGES); + __inc_zone_state(newzone, NR_FILE_PAGES); + if (PageSwapBacked(page) && !PageSwapCache(page)) { + __dec_zone_state(oldzone, NR_SHMEM); + __inc_zone_state(newzone, NR_SHMEM); + } + if (dirty && mapping_cap_account_dirty(mapping)) { + __dec_zone_state(oldzone, NR_FILE_DIRTY); + __inc_zone_state(newzone, NR_FILE_DIRTY); + } } - spin_unlock_irq(&mapping->tree_lock); + local_irq_enable(); return MIGRATEPAGE_SUCCESS; } @@ -460,20 +482,9 @@ void migrate_page_copy(struct page *newpage, struct page *page) if (PageMappedToDisk(page)) SetPageMappedToDisk(newpage); - if (PageDirty(page)) { - clear_page_dirty_for_io(page); - /* - * Want to mark the page and the radix tree as dirty, and - * redo the accounting that clear_page_dirty_for_io undid, - * but we can't use set_page_dirty because that function - * is actually a signal that all of the page has become dirty. - * Whereas only part of our page may be dirty. - */ - if (PageSwapBacked(page)) - SetPageDirty(newpage); - else - __set_page_dirty_nobuffers(newpage); - } + /* Move dirty on pages not done by migrate_page_move_mapping() */ + if (PageDirty(page)) + SetPageDirty(newpage); mlock_migrate_page(newpage, page); ksm_migrate_page(newpage, page); From 0ba4f4bb1aaf7e36cd8efb820691c9bceb487859 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Jul 2016 15:08:20 -0400 Subject: [PATCH 041/178] printk: do cond_resched() between lines while outputting to consoles commit 8d91f8b15361dfb438ab6eb3b319e2ded43458ff upstream. @console_may_schedule tracks whether console_sem was acquired through lock or trylock. If the former, we're inside a sleepable context and console_conditional_schedule() performs cond_resched(). This allows console drivers which use console_lock for synchronization to yield while performing time-consuming operations such as scrolling. However, the actual console outputting is performed while holding irq-safe logbuf_lock, so console_unlock() clears @console_may_schedule before starting outputting lines. Also, only a few drivers call console_conditional_schedule() to begin with. This means that when a lot of lines need to be output by console_unlock(), for example on a console registration, the task doing console_unlock() may not yield for a long time on a non-preemptible kernel. If this happens with a slow console devices, for example a serial console, the outputting task may occupy the cpu for a very long time. Long enough to trigger softlockup and/or RCU stall warnings, which in turn pile more messages, sometimes enough to trigger the next cycle of warnings incapacitating the system. Fix it by making console_unlock() insert cond_resched() between lines if @console_may_schedule. Signed-off-by: Tejun Heo Reported-by: Calvin Owens Acked-by: Jan Kara Cc: Dave Jones Cc: Kyle McMartin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [ciwillia@brocade.com: adjust context for 3.10.y] Signed-off-by: Chas Williams Signed-off-by: Willy Tarreau --- include/linux/console.h | 1 + kernel/panic.c | 3 +++ kernel/printk.c | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/include/linux/console.h b/include/linux/console.h index 73bab0f58af5..6877ffc97d8c 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -153,6 +153,7 @@ extern int console_trylock(void); extern void console_unlock(void); extern void console_conditional_schedule(void); extern void console_unblank(void); +extern void console_flush_on_panic(void); extern struct tty_driver *console_device(int *); extern void console_stop(struct console *); extern void console_start(struct console *); diff --git a/kernel/panic.c b/kernel/panic.c index 167ec097ce8b..d3d74c4e2258 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -22,6 +22,7 @@ #include #include #include +#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -128,6 +129,8 @@ void panic(const char *fmt, ...) bust_spinlocks(0); + console_flush_on_panic(); + if (!panic_blink) panic_blink = no_blink; diff --git a/kernel/printk.c b/kernel/printk.c index fd0154a57d6e..ee8f6be7d8a9 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2033,13 +2033,24 @@ void console_unlock(void) static u64 seen_seq; unsigned long flags; bool wake_klogd = false; - bool retry; + bool do_cond_resched, retry; if (console_suspended) { up(&console_sem); return; } + /* + * Console drivers are called under logbuf_lock, so + * @console_may_schedule should be cleared before; however, we may + * end up dumping a lot of lines, for example, if called from + * console registration path, and should invoke cond_resched() + * between lines if allowable. Not doing so can cause a very long + * scheduling stall on a slow console leading to RCU stall and + * softlockup warnings which exacerbate the issue with more + * messages practically incapacitating the system. + */ + do_cond_resched = console_may_schedule; console_may_schedule = 0; /* flush buffered message fragment immediately to console */ @@ -2096,6 +2107,9 @@ skip: call_console_drivers(level, text, len); start_critical_timings(); local_irq_restore(flags); + + if (do_cond_resched) + cond_resched(); } console_locked = 0; mutex_release(&console_lock_dep_map, 1, _RET_IP_); @@ -2164,6 +2178,25 @@ void console_unblank(void) console_unlock(); } +/** + * console_flush_on_panic - flush console content on panic + * + * Immediately output all pending messages no matter what. + */ +void console_flush_on_panic(void) +{ + /* + * If someone else is holding the console lock, trylock will fail + * and may_schedule may be set. Ignore and proceed to unlock so + * that messages are flushed out. As this can be called from any + * context and we don't want to get preempted while flushing, + * ensure may_schedule is cleared. + */ + console_trylock(); + console_may_schedule = 0; + console_unlock(); +} + /* * Return the console tty driver structure and its associated index */ From ba293572f4a824d995216914d2d0ce655a604dbc Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Fri, 15 Jul 2016 15:08:21 -0400 Subject: [PATCH 042/178] HID: hiddev: validate num_values for HIDIOCGUSAGES, HIDIOCSUSAGES commands commit 93a2001bdfd5376c3dc2158653034c20392d15c5 upstream. This patch validates the num_values parameter from userland during the HIDIOCGUSAGES and HIDIOCSUSAGES commands. Previously, if the report id was set to HID_REPORT_ID_UNKNOWN, we would fail to validate the num_values parameter leading to a heap overflow. CVE-2016-5829 Cc: stable@vger.kernel.org Signed-off-by: Scott Bauer Signed-off-by: Jiri Kosina Signed-off-by: Chas Williams Signed-off-by: Willy Tarreau --- drivers/hid/usbhid/hiddev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 2f1ddca6f2e0..700145b15088 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, goto inval; } else if (uref->usage_index >= field->report_count) goto inval; - - else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && - (uref_multi->num_values > HID_MAX_MULTI_USAGES || - uref->usage_index + uref_multi->num_values > field->report_count)) - goto inval; } + if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && + (uref_multi->num_values > HID_MAX_MULTI_USAGES || + uref->usage_index + uref_multi->num_values > field->report_count)) + goto inval; + switch (cmd) { case HIDIOCGUSAGE: uref->value = field->value[uref->usage_index]; From 1196c36fd53c3b1615eb02f986cb727b1dfc1047 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 24 Jul 2016 18:32:16 +0200 Subject: [PATCH 043/178] libceph: apply new_state before new_up_client on incrementals commit 930c532869774ebf8af9efe9484c597f896a7d46 upstream. Currently, osd_weight and osd_state fields are updated in the encoding order. This is wrong, because an incremental map may look like e.g. new_up_client: { osd=6, addr=... } # set osd_state and addr new_state: { osd=6, xorstate=EXISTS } # clear osd_state Suppose osd6's current osd_state is EXISTS (i.e. osd6 is down). After applying new_up_client, osd_state is changed to EXISTS | UP. Carrying on with the new_state update, we flip EXISTS and leave osd6 in a weird "!EXISTS but UP" state. A non-existent OSD is considered down by the mapping code 2087 for (i = 0; i < pg->pg_temp.len; i++) { 2088 if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) { 2089 if (ceph_can_shift_osds(pi)) 2090 continue; 2091 2092 temp->osds[temp->size++] = CRUSH_ITEM_NONE; and so requests get directed to the second OSD in the set instead of the first, resulting in OSD-side errors like: [WRN] : client.4239 192.168.122.21:0/2444980242 misdirected client.4239.1:2827 pg 2.5df899f2 to osd.4 not [1,4,6] in e680/680 and hung rbds on the client: [ 493.566367] rbd: rbd0: write 400000 at 11cc00000 (0) [ 493.566805] rbd: rbd0: result -6 xferred 400000 [ 493.567011] blk_update_request: I/O error, dev rbd0, sector 9330688 The fix is to decouple application from the decoding and: - apply new_weight first - apply new_state before new_up_client - twiddle osd_state flags if marking in - clear out some of the state if osd is destroyed Fixes: http://tracker.ceph.com/issues/14901 Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin [idryomov@gmail.com: backport to 3.10-3.14: strip primary-affinity] Signed-off-by: Willy Tarreau --- net/ceph/osdmap.c | 152 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 44 deletions(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 7ec4e0522215..c1de8d404c47 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -797,6 +797,110 @@ bad: return ERR_PTR(err); } +/* + * Encoding order is (new_up_client, new_state, new_weight). Need to + * apply in the (new_weight, new_state, new_up_client) order, because + * an incremental map may look like e.g. + * + * new_up_client: { osd=6, addr=... } # set osd_state and addr + * new_state: { osd=6, xorstate=EXISTS } # clear osd_state + */ +static int decode_new_up_state_weight(void **p, void *end, + struct ceph_osdmap *map) +{ + void *new_up_client; + void *new_state; + void *new_weight_end; + u32 len; + + new_up_client = *p; + ceph_decode_32_safe(p, end, len, e_inval); + len *= sizeof(u32) + sizeof(struct ceph_entity_addr); + ceph_decode_need(p, end, len, e_inval); + *p += len; + + new_state = *p; + ceph_decode_32_safe(p, end, len, e_inval); + len *= sizeof(u32) + sizeof(u8); + ceph_decode_need(p, end, len, e_inval); + *p += len; + + /* new_weight */ + ceph_decode_32_safe(p, end, len, e_inval); + while (len--) { + s32 osd; + u32 w; + + ceph_decode_need(p, end, 2*sizeof(u32), e_inval); + osd = ceph_decode_32(p); + w = ceph_decode_32(p); + BUG_ON(osd >= map->max_osd); + pr_info("osd%d weight 0x%x %s\n", osd, w, + w == CEPH_OSD_IN ? "(in)" : + (w == CEPH_OSD_OUT ? "(out)" : "")); + map->osd_weight[osd] = w; + + /* + * If we are marking in, set the EXISTS, and clear the + * AUTOOUT and NEW bits. + */ + if (w) { + map->osd_state[osd] |= CEPH_OSD_EXISTS; + map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT | + CEPH_OSD_NEW); + } + } + new_weight_end = *p; + + /* new_state (up/down) */ + *p = new_state; + len = ceph_decode_32(p); + while (len--) { + s32 osd; + u8 xorstate; + + osd = ceph_decode_32(p); + xorstate = ceph_decode_8(p); + if (xorstate == 0) + xorstate = CEPH_OSD_UP; + BUG_ON(osd >= map->max_osd); + if ((map->osd_state[osd] & CEPH_OSD_UP) && + (xorstate & CEPH_OSD_UP)) + pr_info("osd%d down\n", osd); + if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && + (xorstate & CEPH_OSD_EXISTS)) { + pr_info("osd%d does not exist\n", osd); + map->osd_weight[osd] = CEPH_OSD_IN; + memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr)); + map->osd_state[osd] = 0; + } else { + map->osd_state[osd] ^= xorstate; + } + } + + /* new_up_client */ + *p = new_up_client; + len = ceph_decode_32(p); + while (len--) { + s32 osd; + struct ceph_entity_addr addr; + + osd = ceph_decode_32(p); + ceph_decode_copy(p, &addr, sizeof(addr)); + ceph_decode_addr(&addr); + BUG_ON(osd >= map->max_osd); + pr_info("osd%d up\n", osd); + map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; + map->osd_addr[osd] = addr; + } + + *p = new_weight_end; + return 0; + +e_inval: + return -EINVAL; +} + /* * decode and apply an incremental map update. */ @@ -912,50 +1016,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, __remove_pg_pool(&map->pg_pools, pi); } - /* new_up */ - err = -EINVAL; - ceph_decode_32_safe(p, end, len, bad); - while (len--) { - u32 osd; - struct ceph_entity_addr addr; - ceph_decode_32_safe(p, end, osd, bad); - ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad); - ceph_decode_addr(&addr); - pr_info("osd%d up\n", osd); - BUG_ON(osd >= map->max_osd); - map->osd_state[osd] |= CEPH_OSD_UP; - map->osd_addr[osd] = addr; - } - - /* new_state */ - ceph_decode_32_safe(p, end, len, bad); - while (len--) { - u32 osd; - u8 xorstate; - ceph_decode_32_safe(p, end, osd, bad); - xorstate = **(u8 **)p; - (*p)++; /* clean flag */ - if (xorstate == 0) - xorstate = CEPH_OSD_UP; - if (xorstate & CEPH_OSD_UP) - pr_info("osd%d down\n", osd); - if (osd < map->max_osd) - map->osd_state[osd] ^= xorstate; - } - - /* new_weight */ - ceph_decode_32_safe(p, end, len, bad); - while (len--) { - u32 osd, off; - ceph_decode_need(p, end, sizeof(u32)*2, bad); - osd = ceph_decode_32(p); - off = ceph_decode_32(p); - pr_info("osd%d weight 0x%x %s\n", osd, off, - off == CEPH_OSD_IN ? "(in)" : - (off == CEPH_OSD_OUT ? "(out)" : "")); - if (osd < map->max_osd) - map->osd_weight[osd] = off; - } + /* new_up_client, new_state, new_weight */ + err = decode_new_up_state_weight(p, end, map); + if (err) + goto bad; /* new_pg_temp */ ceph_decode_32_safe(p, end, len, bad); From 6d2eb0f1168c709a7c8fd62f2c90f6643e39609c Mon Sep 17 00:00:00 2001 From: Anthony Romano Date: Fri, 24 Jun 2016 14:48:43 -0700 Subject: [PATCH 044/178] tmpfs: don't undo fallocate past its last page commit b9b4bb26af017dbe930cd4df7f9b2fc3a0497bfe upstream. When fallocate is interrupted it will undo a range that extends one byte past its range of allocated pages. This can corrupt an in-use page by zeroing out its first byte. Instead, undo using the inclusive byte range. Fixes: 1635f6a74152f1d ("tmpfs: undo fallocation on failure") Link: http://lkml.kernel.org/r/1462713387-16724-1-git-send-email-anthony.romano@coreos.com Signed-off-by: Anthony Romano Cc: Vlastimil Babka Cc: Hugh Dickins Cc: Brandon Philips Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 4e4a7349c5cd..e163e40b0bb5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1950,7 +1950,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, /* Remove the !PageUptodate pages we added */ shmem_undo_range(inode, (loff_t)start << PAGE_CACHE_SHIFT, - (loff_t)index << PAGE_CACHE_SHIFT, true); + ((loff_t)index << PAGE_CACHE_SHIFT) - 1, true); goto undone; } From 24fc11a9879ee6f6ed244fc7dd1507d4a5db64bc Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 10 Jul 2016 16:46:32 -0700 Subject: [PATCH 045/178] tmpfs: fix regression hang in fallocate undo commit 7f556567036cb7f89aabe2f0954b08566b4efb53 upstream. The well-spotted fallocate undo fix is good in most cases, but not when fallocate failed on the very first page. index 0 then passes lend -1 to shmem_undo_range(), and that has two bad effects: (a) that it will undo every fallocation throughout the file, unrestricted by the current range; but more importantly (b) it can cause the undo to hang, because lend -1 is treated as truncation, which makes it keep on retrying until every page has gone, but those already fully instantiated will never go away. Big thank you to xfstests generic/269 which demonstrates this. Fixes: b9b4bb26af01 ("tmpfs: don't undo fallocate past its last page") Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- mm/shmem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index e163e40b0bb5..cc02b6c6eec4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1948,9 +1948,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, NULL); if (error) { /* Remove the !PageUptodate pages we added */ - shmem_undo_range(inode, - (loff_t)start << PAGE_CACHE_SHIFT, - ((loff_t)index << PAGE_CACHE_SHIFT) - 1, true); + if (index > start) { + shmem_undo_range(inode, + (loff_t)start << PAGE_CACHE_SHIFT, + ((loff_t)index << PAGE_CACHE_SHIFT) - 1, true); + } goto undone; } From b1f32b94ac6760f04d548b4d15e0c1f3b3c52720 Mon Sep 17 00:00:00 2001 From: "Charles (Chas) Williams" Date: Tue, 16 Aug 2016 16:50:11 -0400 Subject: [PATCH 046/178] tcp: make challenge acks less predictable commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 upstream. From: Eric Dumazet Yue Cao claims that current host rate limiting of challenge ACKS (RFC 5961) could leak enough information to allow a patient attacker to hijack TCP sessions. He will soon provide details in an academic paper. This patch increases the default limit from 100 to 1000, and adds some randomization so that the attacker can no longer hijack sessions without spending a considerable amount of probes. Based on initial analysis and patch from Linus. Note that we also have per socket rate limiting, so it is tempting to remove the host limit in the future. v2: randomize the count of challenge acks per second, not the period. Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2") Reported-by: Yue Cao Signed-off-by: Eric Dumazet Suggested-by: Linus Torvalds Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller [ ciwillia: backport to 3.10-stable ] Signed-off-by: Chas Williams Signed-off-by: Willy Tarreau --- net/ipv4/tcp_input.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f89087c3cfc8..f3b15bb7fbec 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ -int sysctl_tcp_challenge_ack_limit = 100; +int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; @@ -3288,12 +3289,19 @@ static void tcp_send_challenge_ack(struct sock *sk) static u32 challenge_timestamp; static unsigned int challenge_count; u32 now = jiffies / HZ; + u32 count; if (now != challenge_timestamp) { + u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; + challenge_timestamp = now; - challenge_count = 0; + ACCESS_ONCE(challenge_count) = half + + reciprocal_divide(prandom_u32(), + sysctl_tcp_challenge_ack_limit); } - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + count = ACCESS_ONCE(challenge_count); + if (count > 0) { + ACCESS_ONCE(challenge_count) = count - 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } From 4f6b16928f80440995960c646de25da69a801ed0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 6 Jun 2016 15:07:18 -0700 Subject: [PATCH 047/178] tcp: record TLP and ER timer stats in v6 stats commit ce3cf4ec0305919fc69a972f6c2b2efd35d36abc upstream. The v6 tcp stats scan do not provide TLP and ER timer information correctly like the v4 version . This patch fixes that. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4659b8ab55d9..41c026f11edc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1767,7 +1767,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { From aadf1c479d9a894293298086be0ef73db8cda2c3 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 29 Jul 2016 09:34:02 -0400 Subject: [PATCH 048/178] tcp: consider recv buf for the initial window scale commit f626300a3e776ccc9671b0dd94698fb3aa315966 upstream. tcp_select_initial_window() intends to advertise a window scaling for the maximum possible window size. To do so, it considers the maximum of net.ipv4.tcp_rmem[2] and net.core.rmem_max as the only possible upper-bounds. However, users with CAP_NET_ADMIN can use SO_RCVBUFFORCE to set the socket's receive buffer size to values larger than net.ipv4.tcp_rmem[2] and net.core.rmem_max. Thus, SO_RCVBUFFORCE is effectively ignored by tcp_select_initial_window(). To fix this, consider the maximum of net.ipv4.tcp_rmem[2], net.core.rmem_max and socket's initial buffer space. Fixes: b0573dea1fb3 ("[NET]: Introduce SO_{SND,RCV}BUFFORCE socket options") Signed-off-by: Soheil Hassas Yeganeh Suggested-by: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 76c80b59e80f..276b28301a6b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -222,7 +222,8 @@ void tcp_select_initial_window(int __space, __u32 mss, /* Set window scaling on max possible window * See RFC1323 for an explanation of the limit to 14 */ - space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = max_t(u32, space, sysctl_tcp_rmem[2]); + space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; From 828e4e161f96b244c82d6a0b5909e798c8424549 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 18 Aug 2016 10:22:52 +0100 Subject: [PATCH 049/178] MIPS: KVM: Fix mapped fault broken commpage handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c604cffa93478f8888bec62b23d6073dad03d43a upstream. kvm_mips_handle_mapped_seg_tlb_fault() appears to map the guest page at virtual address 0 to PFN 0 if the guest has created its own mapping there. The intention is unclear, but it may have been an attempt to protect the zero page from being mapped to anything but the comm page in code paths you wouldn't expect from genuine commpage accesses (guest kernel mode cache instructions on that address, hitting trapping instructions when executing from that address with a coincidental TLB eviction during the KVM handling, and guest user mode accesses to that address). Fix this to check for mappings exactly at KVM_GUEST_COMMPAGE_ADDR (it may not be at address 0 since commit 42aa12e74e91 ("MIPS: KVM: Move commpage so 0x0 is unmapped")), and set the corresponding EntryLo to be interpreted as 0 (invalid). Fixes: 858dd5d45733 ("KVM/MIPS32: MMU/TLB operations for the Guest.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Radim Krčmář [james.hogan@imgtec.com: Backport to v3.10.y - v3.15.y] Signed-off-by: James Hogan Signed-off-by: Willy Tarreau --- arch/mips/kvm/kvm_tlb.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index c777dd36d4a8..1e6b1f124377 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -397,21 +397,27 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; pfn_t pfn0, pfn1; + long tlb_lo[2]; + tlb_lo[0] = tlb->tlb_lo0; + tlb_lo[1] = tlb->tlb_lo1; - if ((tlb->tlb_hi & VPN2_MASK) == 0) { - pfn0 = 0; - pfn1 = 0; - } else { - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0) >> PAGE_SHIFT) < 0) - return -1; + /* + * The commpage address must not be mapped to anything else if the guest + * TLB contains entries nearby, or commpage accesses will break. + */ + if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) & + VPN2_MASK & (PAGE_MASK << 1))) + tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0; - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1) >> PAGE_SHIFT) < 0) - return -1; + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT) < 0) + return -1; - pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0) >> PAGE_SHIFT]; - pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1) >> PAGE_SHIFT]; - } + if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT) < 0) + return -1; + + pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT]; + pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT]; if (hpa0) *hpa0 = pfn0 << PAGE_SHIFT; @@ -423,9 +429,9 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? kvm_mips_get_kernel_asid(vcpu) : kvm_mips_get_user_asid(vcpu)); entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); + (tlb_lo[0] & MIPS3_PG_D) | (tlb_lo[0] & MIPS3_PG_V); entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | - (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V); + (tlb_lo[1] & MIPS3_PG_D) | (tlb_lo[1] & MIPS3_PG_V); #ifdef DEBUG kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, From 2336de8d713a4b5986b809906e172f35e466d048 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 18 Aug 2016 10:22:53 +0100 Subject: [PATCH 050/178] MIPS: KVM: Add missing gfn range check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8985d50382359e5bf118fdbefc859d0dbf6cebc7 upstream. kvm_mips_handle_mapped_seg_tlb_fault() calculates the guest frame number based on the guest TLB EntryLo values, however it is not range checked to ensure it lies within the guest_pmap. If the physical memory the guest refers to is out of range then dump the guest TLB and emit an internal error. Fixes: 858dd5d45733 ("KVM/MIPS32: MMU/TLB operations for the Guest.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Radim Krčmář [james.hogan@imgtec.com: Backport to v3.10.y - v3.15.y] Signed-off-by: James Hogan Signed-off-by: Willy Tarreau --- arch/mips/kvm/kvm_tlb.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index 1e6b1f124377..8aba2e54f90f 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -397,6 +397,7 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; pfn_t pfn0, pfn1; + gfn_t gfn0, gfn1; long tlb_lo[2]; tlb_lo[0] = tlb->tlb_lo0; @@ -410,14 +411,24 @@ kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, VPN2_MASK & (PAGE_MASK << 1))) tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0; - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT) < 0) + gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT; + gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT; + if (gfn0 >= kvm->arch.guest_pmap_npages || + gfn1 >= kvm->arch.guest_pmap_npages) { + kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n", + __func__, gfn0, gfn1, tlb->tlb_hi); + kvm_mips_dump_guest_tlbs(vcpu); + return -1; + } + + if (kvm_mips_map_page(kvm, gfn0) < 0) return -1; - if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT) < 0) + if (kvm_mips_map_page(kvm, gfn1) < 0) return -1; - pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT]; - pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT]; + pfn0 = kvm->arch.guest_pmap[gfn0]; + pfn1 = kvm->arch.guest_pmap[gfn1]; if (hpa0) *hpa0 = pfn0 << PAGE_SHIFT; From 8cee00e93cb1b7bedd6e4bf56e964ac315437013 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 18 Aug 2016 10:22:54 +0100 Subject: [PATCH 051/178] MIPS: KVM: Fix gfn range check in kseg0 tlb faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0741f52d1b980dbeb290afe67d88fc2928edd8ab upstream. Two consecutive gfns are loaded into host TLB, so ensure the range check isn't off by one if guest_pmap_npages is odd. Fixes: 858dd5d45733 ("KVM/MIPS32: MMU/TLB operations for the Guest.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Radim Krčmář [james.hogan@imgtec.com: Backport to v3.10.y - v3.15.y] Signed-off-by: James Hogan Signed-off-by: Willy Tarreau --- arch/mips/kvm/kvm_tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index 8aba2e54f90f..5a3c3731214f 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -312,7 +312,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, } gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT); - if (gfn >= kvm->arch.guest_pmap_npages) { + if ((gfn | 1) >= kvm->arch.guest_pmap_npages) { kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__, gfn, badvaddr); kvm_mips_dump_host_tlbs(); From 5928125348d6d78867bc5a5171cc13a958eb6e01 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 18 Aug 2016 10:22:55 +0100 Subject: [PATCH 052/178] MIPS: KVM: Propagate kseg0/mapped tlb fault errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9b731bcfdec4c159ad2e4312e25d69221709b96a upstream. Propagate errors from kvm_mips_handle_kseg0_tlb_fault() and kvm_mips_handle_mapped_seg_tlb_fault(), usually triggering an internal error since they normally indicate the guest accessed bad physical memory or the commpage in an unexpected way. Fixes: 858dd5d45733 ("KVM/MIPS32: MMU/TLB operations for the Guest.") Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Radim Krčmář [james.hogan@imgtec.com: Backport to v3.10.y - v3.15.y] Signed-off-by: James Hogan Signed-off-by: Willy Tarreau --- arch/mips/kvm/kvm_mips_emul.c | 33 ++++++++++++++++++++++++--------- arch/mips/kvm/kvm_tlb.c | 14 ++++++++++---- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 33085819cd89..9f7643874fba 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -972,8 +972,13 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, preempt_disable(); if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { - if (kvm_mips_host_tlb_lookup(vcpu, va) < 0) { - kvm_mips_handle_kseg0_tlb_fault(va, vcpu); + if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 && + kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) { + kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n", + __func__, va, vcpu, read_c0_entryhi()); + er = EMULATE_FAIL; + preempt_enable(); + goto done; } } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) || KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { @@ -1006,11 +1011,16 @@ kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, uint32_t cause, run, vcpu); preempt_enable(); goto dont_update_pc; - } else { - /* We fault an entry from the guest tlb to the shadow host TLB */ - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, - NULL, - NULL); + } + /* We fault an entry from the guest tlb to the shadow host TLB */ + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, + NULL, NULL)) { + kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, va, index, vcpu, + read_c0_entryhi()); + er = EMULATE_FAIL; + preempt_enable(); + goto done; } } } else { @@ -1821,8 +1831,13 @@ kvm_mips_handle_tlbmiss(unsigned long cause, uint32_t *opc, tlb->tlb_hi, tlb->tlb_lo0, tlb->tlb_lo1); #endif /* OK we have a Guest TLB entry, now inject it into the shadow host TLB */ - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL, - NULL); + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, + NULL, NULL)) { + kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, va, index, vcpu, + read_c0_entryhi()); + er = EMULATE_FAIL; + } } } diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c index 5a3c3731214f..4bee4397dca8 100644 --- a/arch/mips/kvm/kvm_tlb.c +++ b/arch/mips/kvm/kvm_tlb.c @@ -926,10 +926,16 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu) local_irq_restore(flags); return KVM_INVALID_INST; } - kvm_mips_handle_mapped_seg_tlb_fault(vcpu, - &vcpu->arch. - guest_tlb[index], - NULL, NULL); + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, + &vcpu->arch.guest_tlb[index], + NULL, NULL)) { + kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n", + __func__, opc, index, vcpu, + read_c0_entryhi()); + kvm_mips_dump_guest_tlbs(vcpu); + local_irq_restore(flags); + return KVM_INVALID_INST; + } inst = *(opc); } local_irq_restore(flags); From 2c767daebacb87a6b308f86f2112fc7b4793cbf0 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 21 Apr 2016 14:04:55 +0100 Subject: [PATCH 053/178] MIPS: math-emu: Fix jalr emulation when rd == $0 commit ab4a92e66741b35ca12f8497896bafbe579c28a1 upstream. When emulating a jalr instruction with rd == $0, the code in isBranchInstr was incorrectly writing to GPR $0 which should actually always remain zeroed. This would lead to any further instructions emulated which use $0 operating on a bogus value until the task is next context switched, at which point the value of $0 in the task context would be restored to the correct zero by a store in SAVE_SOME. Fix this by not writing to rd if it is $0. Fixes: 102cedc32a6e ("MIPS: microMIPS: Floating point support.") Signed-off-by: Paul Burton Cc: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13160/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/mips/math-emu/cp1emu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index f03771900813..3d492a823a55 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -684,9 +684,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case spec_op: switch (insn.r_format.func) { case jalr_op: - regs->regs[insn.r_format.rd] = - regs->cp0_epc + dec_insn.pc_inc + - dec_insn.next_pc_inc; + if (insn.r_format.rd != 0) { + regs->regs[insn.r_format.rd] = + regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + } /* Fall through */ case jr_op: *contpc = regs->regs[insn.r_format.rs]; From f8141132c564c36806b128e865d9b918ef2e791e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 8 Feb 2016 18:43:49 +0000 Subject: [PATCH 054/178] MIPS: Fix siginfo.h to use strict posix types commit 5daebc477da4dfeb31ae193d83084def58fd2697 upstream. Commit 85efde6f4e0d ("make exported headers use strict posix types") changed the asm-generic siginfo.h to use the __kernel_* types, and commit 3a471cbc081b ("remove __KERNEL_STRICT_NAMES") make the internal types accessible only to the kernel, but the MIPS implementation hasn't been updated to match. Switch to proper types now so that the exported asm/siginfo.h won't produce quite so many compiler errors when included alone by a user program. Signed-off-by: James Hogan Cc: Christopher Ferris Cc: linux-mips@linux-mips.org Cc: # 2.6.30- Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12477/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/include/uapi/asm/siginfo.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h index 6a8714193fb9..b5f77f76c899 100644 --- a/arch/mips/include/uapi/asm/siginfo.h +++ b/arch/mips/include/uapi/asm/siginfo.h @@ -45,13 +45,13 @@ typedef struct siginfo { /* kill() */ struct { - pid_t _pid; /* sender's pid */ + __kernel_pid_t _pid; /* sender's pid */ __ARCH_SI_UID_T _uid; /* sender's uid */ } _kill; /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + __kernel_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)]; sigval_t _sigval; /* same as below */ @@ -60,26 +60,26 @@ typedef struct siginfo { /* POSIX.1b signals */ struct { - pid_t _pid; /* sender's pid */ + __kernel_pid_t _pid; /* sender's pid */ __ARCH_SI_UID_T _uid; /* sender's uid */ sigval_t _sigval; } _rt; /* SIGCHLD */ struct { - pid_t _pid; /* which child */ + __kernel_pid_t _pid; /* which child */ __ARCH_SI_UID_T _uid; /* sender's uid */ int _status; /* exit code */ - clock_t _utime; - clock_t _stime; + __kernel_clock_t _utime; + __kernel_clock_t _stime; } _sigchld; /* IRIX SIGCHLD */ struct { - pid_t _pid; /* which child */ - clock_t _utime; + __kernel_pid_t _pid; /* which child */ + __kernel_clock_t _utime; int _status; /* exit code */ - clock_t _stime; + __kernel_clock_t _stime; } _irix_sigchld; /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ From 13f004c0ac440bf6b6272648f679759dd4ca89af Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 24 Mar 2016 16:02:52 +0100 Subject: [PATCH 055/178] MIPS: ath79: make bootconsole wait for both THRE and TEMT commit f5b556c94c8490d42fea79d7b4ae0ecbc291e69d upstream. This makes the ath79 bootconsole behave the same way as the generic 8250 bootconsole. Also waiting for TEMT (transmit buffer is empty) instead of just THRE (transmit buffer is not full) ensures that all characters have been transmitted before the real serial driver starts reconfiguring the serial controller (which would sometimes result in garbage being transmitted.) This change does not cause a visible performance loss. In addition, this seems to fix a hang observed in certain configurations on many AR7xxx/AR9xxx SoCs during autoconfig of the real serial driver. A more complete follow-up patch will disable 8250 autoconfig for ath79 altogether (the serial controller is detected as a 16550A, which is not fully compatible with the ath79 serial, and the autoconfig may lead to undefined behavior on ath79.) Cc: Signed-off-by: Matthias Schiffer Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/mips/ath79/early_printk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/ath79/early_printk.c b/arch/mips/ath79/early_printk.c index b955fafc58ba..d1adc59af5bf 100644 --- a/arch/mips/ath79/early_printk.c +++ b/arch/mips/ath79/early_printk.c @@ -31,13 +31,15 @@ static inline void prom_putchar_wait(void __iomem *reg, u32 mask, u32 val) } while (1); } +#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) + static void prom_putchar_ar71xx(unsigned char ch) { void __iomem *base = (void __iomem *)(KSEG1ADDR(AR71XX_UART_BASE)); - prom_putchar_wait(base + UART_LSR * 4, UART_LSR_THRE, UART_LSR_THRE); + prom_putchar_wait(base + UART_LSR * 4, BOTH_EMPTY, BOTH_EMPTY); __raw_writel(ch, base + UART_TX * 4); - prom_putchar_wait(base + UART_LSR * 4, UART_LSR_THRE, UART_LSR_THRE); + prom_putchar_wait(base + UART_LSR * 4, BOTH_EMPTY, BOTH_EMPTY); } static void prom_putchar_ar933x(unsigned char ch) From ce7222fcf118bb835ed6f0b1699fe14e47f2b461 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 4 Feb 2016 01:24:40 +0100 Subject: [PATCH 056/178] MIPS: Fix 64k page support for 32 bit kernels. commit d7de413475f443957a0c1d256e405d19b3a2cb22 upstream. TASK_SIZE was defined as 0x7fff8000UL which for 64k pages is not a multiple of the page size. Somewhere further down the math fails such that executing an ELF binary fails. Signed-off-by: Ralf Baechle Tested-by: Joshua Henderson Cc: James Hogan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/mips/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 1470b7b68b0e..a7e71744fe89 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -51,7 +51,7 @@ extern unsigned int vced_count, vcei_count; * User space process size: 2GB. This is hardcoded into a few places, * so don't change it unless you know what you are doing. */ -#define TASK_SIZE 0x7fff8000UL +#define TASK_SIZE 0x80000000UL #endif #ifdef __KERNEL__ From 9338ba336a8a48c7644cac5502fa99faaad0e003 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:43 +0100 Subject: [PATCH 057/178] MIPS: KVM: Fix modular KVM under QEMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 797179bc4fe06c89e47a9f36f886f68640b423f8 upstream. Copy __kvm_mips_vcpu_run() into unmapped memory, so that we can never get a TLB refill exception in it when KVM is built as a module. This was observed to happen with the host MIPS kernel running under QEMU, due to a not entirely transparent optimisation in the QEMU TLB handling where TLB entries replaced with TLBWR are copied to a separate part of the TLB array. Code in those pages continue to be executable, but those mappings persist only until the next ASID switch, even if they are marked global. An ASID switch happens in __kvm_mips_vcpu_run() at exception level after switching to the guest exception base. Subsequent TLB mapped kernel instructions just prior to switching to the guest trigger a TLB refill exception, which enters the guest exception handlers without updating EPC. This appears as a guest triggered TLB refill on a host kernel mapped (host KSeg2) address, which is not handled correctly as user (guest) mode accesses to kernel (host) segments always generate address error exceptions. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini [james.hogan@imgtec.com: backported for stable 3.14] Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/mips/include/asm/kvm_host.h | 1 + arch/mips/kvm/kvm_locore.S | 1 + arch/mips/kvm/kvm_mips.c | 11 ++++++++++- arch/mips/kvm/kvm_mips_int.h | 2 ++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4d6fa0bf1305..883a162083af 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -349,6 +349,7 @@ struct kvm_mips_tlb { #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); unsigned long host_stack; unsigned long host_gp; diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index 34c35f0e3290..73553cd98070 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -227,6 +227,7 @@ FEXPORT(__kvm_mips_load_k0k1) /* Jump to guest */ eret .set pop +EXPORT(__kvm_mips_vcpu_run_end) VECTOR(MIPSX(exception), unknown) /* diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index 8aa5f30d8579..97a181a44e53 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -343,6 +343,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) memcpy(gebase + offset, mips32_GuestException, mips32_GuestExceptionEnd - mips32_GuestException); +#ifdef MODULE + offset += mips32_GuestExceptionEnd - mips32_GuestException; + memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run, + __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run); + vcpu->arch.vcpu_run = gebase + offset; +#else + vcpu->arch.vcpu_run = __kvm_mips_vcpu_run; +#endif + /* Invalidate the icache for these ranges */ mips32_SyncICache((unsigned long) gebase, ALIGN(size, PAGE_SIZE)); @@ -426,7 +435,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_guest_enter(); - r = __kvm_mips_vcpu_run(run, vcpu); + r = vcpu->arch.vcpu_run(run, vcpu); kvm_guest_exit(); local_irq_enable(); diff --git a/arch/mips/kvm/kvm_mips_int.h b/arch/mips/kvm/kvm_mips_int.h index 20da7d29eede..bf41ea36210e 100644 --- a/arch/mips/kvm/kvm_mips_int.h +++ b/arch/mips/kvm/kvm_mips_int.h @@ -27,6 +27,8 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ +extern char __kvm_mips_vcpu_run_end[]; + #define C_TI (_ULCAST_(1) << 30) #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) From b82f3ee1d0ae76034452183ad26384a53519a33d Mon Sep 17 00:00:00 2001 From: Ricky Liang Date: Fri, 20 May 2016 10:58:59 -0700 Subject: [PATCH 058/178] Input: uinput - handle compat ioctl for UI_SET_PHYS commit affa80bd97f7ca282d1faa91667b3ee9e4c590e6 upstream. When running a 32-bit userspace on a 64-bit kernel, the UI_SET_PHYS ioctl needs to be treated with special care, as it has the pointer size encoded in the command. Signed-off-by: Ricky Liang Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/misc/uinput.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index a0a4bbaef02c..3f2f3ac96a55 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -835,9 +835,15 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } #ifdef CONFIG_COMPAT + +#define UI_SET_PHYS_COMPAT _IOW(UINPUT_IOCTL_BASE, 108, compat_uptr_t) + static long uinput_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + if (cmd == UI_SET_PHYS_COMPAT) + cmd = UI_SET_PHYS; + return uinput_ioctl_handler(file, cmd, arg, compat_ptr(arg)); } #endif From 2ae02f03721136e1b8ceef84a3926fc006ff2c8b Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Thu, 23 Jun 2016 10:54:17 -0700 Subject: [PATCH 059/178] Input: wacom_w8001 - w8001_MAX_LENGTH should be 13 commit 12afb34400eb2b301f06b2aa3535497d14faee59 upstream. Somehow the patch that added two-finger touch support forgot to update W8001_MAX_LENGTH from 11 to 13. Signed-off-by: Ping Cheng Reviewed-by: Peter Hutterer Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/input/touchscreen/wacom_w8001.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c index 9a83be6b6584..abba11220f29 100644 --- a/drivers/input/touchscreen/wacom_w8001.c +++ b/drivers/input/touchscreen/wacom_w8001.c @@ -28,7 +28,7 @@ MODULE_AUTHOR("Jaya Kumar "); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -#define W8001_MAX_LENGTH 11 +#define W8001_MAX_LENGTH 13 #define W8001_LEAD_MASK 0x80 #define W8001_LEAD_BYTE 0x80 #define W8001_TAB_MASK 0x40 From 0378ecf174b4e7f9d1a263bd07c38fbf2c627e6b Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Wed, 29 Jun 2016 09:51:35 -0700 Subject: [PATCH 060/178] Input: xpad - validate USB endpoint count during probe commit caca925fca4fb30c67be88cacbe908eec6721e43 upstream. This prevents a malicious USB device from causing an oops. Signed-off-by: Cameron Gutman Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/input/joystick/xpad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 856c1b03e22d..685e125d6366 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -843,6 +843,9 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id struct usb_endpoint_descriptor *ep_irq_in; int i, error; + if (intf->cur_altsetting->desc.bNumEndpoints != 2) + return -ENODEV; + for (i = 0; xpad_device[i].idVendor; i++) { if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) && (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct)) From 5b26ade36bce58a17e2411082e8e45db213bdd28 Mon Sep 17 00:00:00 2001 From: Joseph Salisbury Date: Mon, 14 Mar 2016 14:51:48 -0400 Subject: [PATCH 061/178] ath5k: Change led pin configuration for compaq c700 laptop commit 7b9bc799a445aea95f64f15e0083cb19b5789abe upstream. BugLink: http://bugs.launchpad.net/bugs/972604 Commit 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin configuration for compaq c700 laptop") added a pin configuration for the Compaq c700 laptop. However, the polarity of the led pin is reversed. It should be red for wifi off and blue for wifi on, but it is the opposite. This bug was reported in the following bug report: http://pad.lv/972604 Fixes: 09c9bae26b0d3c9472cb6ae45010460a2cee8b8d ("ath5k: add led pin configuration for compaq c700 laptop") Signed-off-by: Joseph Salisbury Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo Signed-off-by: Willy Tarreau --- drivers/net/wireless/ath/ath5k/led.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index f77ef36acf87..61879b1f7083 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -77,7 +77,7 @@ static DEFINE_PCI_DEVICE_TABLE(ath5k_led_devices) = { /* HP Compaq CQ60-206US (ddreggors@jumptv.com) */ { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137a), ATH_LED(3, 1) }, /* HP Compaq C700 (nitrousnrg@gmail.com) */ - { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 1) }, + { ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 0) }, /* LiteOn AR5BXB63 (magooz@salug.it) */ { ATH_SDEVICE(PCI_VENDOR_ID_ATHEROS, 0x3067), ATH_LED(3, 0) }, /* IBM-specific AR5212 (all others) */ From 4f362654ca19157b9191143823061d963a0318d7 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Mon, 25 Apr 2016 23:31:26 -0700 Subject: [PATCH 062/178] aacraid: Relinquish CPU during timeout wait commit 07beca2be24cc710461c0b131832524c9ee08910 upstream. aac_fib_send has a special function case for initial commands during driver initialization using wait < 0(pseudo sync mode). In this case, the command does not sleep but rather spins checking for timeout.This loop is calls cpu_relax() in an attempt to allow other processes/threads to use the CPU, but this function does not relinquish the CPU and so the command will hog the processor. This was observed in a KDUMP "crashkernel" and that prevented the "command thread" (which is responsible for completing the command from being timed out) from starting because it could not get the CPU. Fixed by replacing "cpu_relax()" call with "schedule()" Cc: stable@vger.kernel.org Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/aacraid/commsup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 6a0d362e2596..addcd584d82d 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -590,10 +590,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, } return -EFAULT; } - /* We used to udelay() here but that absorbed - * a CPU when a timeout occured. Not very - * useful. */ - cpu_relax(); + /* + * Allow other processes / CPUS to use core + */ + schedule(); } } else if (down_interruptible(&fibptr->event_wait)) { /* Do nothing ... satisfy From d67794f3e8a417e0bde31ed56f47a10146aabab9 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Mon, 25 Apr 2016 23:31:57 -0700 Subject: [PATCH 063/178] aacraid: Fix for aac_command_thread hang commit fc4bf75ea300a5e62a2419f89dd0e22189dd7ab7 upstream. Typically under error conditions, it is possible for aac_command_thread() to miss the wakeup from kthread_stop() and go back to sleep, causing it to hang aac_shutdown. In the observed scenario, the adapter is not functioning correctly and so aac_fib_send() never completes (or time-outs depending on how it was called). Shortly after aac_command_thread() starts it performs aac_fib_send(SendHostTime) which hangs. When aac_probe_one /aac_get_adapter_info send time outs, kthread_stop is called which breaks the command thread out of it's hang. The code will still go back to sleep in schedule_timeout() without checking kthread_should_stop() so it causes aac_probe_one to hang until the schedule_timeout() which is 30 minutes. Fixed by: Adding another kthread_should_stop() before schedule_timeout() Cc: stable@vger.kernel.org Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/aacraid/commsup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index addcd584d82d..284efac5f202 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1920,6 +1920,10 @@ int aac_command_thread(void *data) if (difference <= 0) difference = 1; set_current_state(TASK_INTERRUPTIBLE); + + if (kthread_should_stop()) + break; + schedule_timeout(difference); if (kthread_should_stop()) From f1fa6f726649b39f26449bb34e592c53f26e11ab Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 11 May 2016 12:27:16 -0400 Subject: [PATCH 064/178] PCI: Disable all BAR sizing for devices with non-compliant BARs commit ad67b437f187ea818b2860524d10f878fadfdd99 upstream. b84106b4e229 ("PCI: Disable IO/MEM decoding for devices with non-compliant BARs") disabled BAR sizing for BARs 0-5 of devices that don't comply with the PCI spec. But it didn't do anything for expansion ROM BARs, so we still try to size them, resulting in warnings like this on Broadwell-EP: pci 0000:ff:12.0: BAR 6: failed to assign [mem size 0x00000001 pref] Move the non-compliant BAR check from __pci_read_base() up to pci_read_bases() so it applies to the expansion ROM BAR as well as to BARs 0-5. Note that direct callers of __pci_read_base(), like sriov_init(), will now bypass this check. We haven't had reports of devices with broken SR-IOV BARs yet. [bhelgaas: changelog] Fixes: b84106b4e229 ("PCI: Disable IO/MEM decoding for devices with non-compliant BARs") Signed-off-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas CC: Thomas Gleixner CC: Ingo Molnar CC: "H. Peter Anvin" CC: Andi Kleen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/pci/probe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index d332d55885f8..2d7cd0c080d3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -173,9 +173,6 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct pci_bus_region region; bool bar_too_big = false, bar_disabled = false; - if (dev->non_compliant_bars) - return 0; - mask = type ? PCI_ROM_ADDRESS_MASK : ~0; /* No printks while decoding is disabled! */ @@ -295,6 +292,9 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) { unsigned int pos, reg; + if (dev->non_compliant_bars) + return; + for (pos = 0; pos < howmany; pos++) { struct resource *res = &dev->resource[pos]; reg = PCI_BASE_ADDRESS_0 + (pos << 2); From 3ba97d7ea6afb1427ddc635550c75b20b7dfd1cd Mon Sep 17 00:00:00 2001 From: wang yanqing Date: Tue, 3 May 2016 00:38:36 +0800 Subject: [PATCH 065/178] rtlwifi: Fix logic error in enter/exit power-save mode commit 873ffe154ae074c46ed2d72dbd9a2a99f06f55b4 upstream. In commit a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue"), the tests for enter/exit power-save mode were inverted. With this change applied, the wifi connection becomes much more stable. Fixes: a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue") Signed-off-by: Wang YanQing Acked-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/net/wireless/rtlwifi/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index 6fc0853fd7f9..d066f74f743a 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -1392,9 +1392,9 @@ void rtl_watchdog_wq_callback(void *data) if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || (rtlpriv->link_info.num_rx_inperiod > 2)) - rtlpriv->enter_ps = true; - else rtlpriv->enter_ps = false; + else + rtlpriv->enter_ps = true; /* LeisurePS only work in infra mode. */ schedule_work(&rtlpriv->works.lps_change_work); From be925e7ad222afd656ce8a2f9249939fa94b8699 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 15 Apr 2016 22:48:02 +1000 Subject: [PATCH 066/178] powerpc/book3s64: Fix branching to OOL handlers in relocatable kernel commit 8ed8ab40047a570fdd8043a40c104a57248dd3fd upstream. Some of the interrupt vectors on 64-bit POWER server processors are only 32 bytes long (8 instructions), which is not enough for the full first-level interrupt handler. For these we need to branch to an out-of-line (OOL) handler. But when we are running a relocatable kernel, interrupt vectors till __end_interrupts marker are copied down to real address 0x100. So, branching to labels (ie. OOL handlers) outside this section must be handled differently (see LOAD_HANDLER()), considering relocatable kernel, which would need at least 4 instructions. However, branching from interrupt vector means that we corrupt the CFAR (come-from address register) on POWER7 and later processors as mentioned in commit 1707dd16. So, EXCEPTION_PROLOG_0 (6 instructions) that contains the part up to the point where the CFAR is saved in the PACA should be part of the short interrupt vectors before we branch out to OOL handlers. But as mentioned already, there are interrupt vectors on 64-bit POWER server processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.), which cannot accomodate the above two cases at the same time owing to space constraint. Currently, in these interrupt vectors, we simply branch out to OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when running a relocatable kernel (eg. kdump case). While this has been the case for sometime now and kdump is used widely, we were fortunate not to see any problems so far, for three reasons: 1. In almost all cases, production kernel (relocatable) is used for kdump as well, which would mean that crashed kernel's OOL handler would be at the same place where we end up branching to, from short interrupt vector of kdump kernel. 2. Also, OOL handler was unlikely the reason for crash in almost all the kdump scenarios, which meant we had a sane OOL handler from crashed kernel that we branched to. 3. On most 64-bit POWER server processors, page size is large enough that marking interrupt vector code as executable (see commit 429d2e83) leads to marking OOL handler code from crashed kernel, that sits right below interrupt vector code from kdump kernel, as executable as well. Let us fix this by moving the __end_interrupts marker down past OOL handlers to make sure that we also copy OOL handlers to real address 0x100 when running a relocatable kernel. This fix has been tested successfully in kdump scenario, on an LPAR with 4K page size by using different default/production kernel and kdump kernel. Also tested by manually corrupting the OOL handlers in the first kernel and then kdump'ing, and then causing the OOL handlers to fire - mpe. Fixes: c1fb6816fb1b ("powerpc: Add relocation on exception vector handlers") Signed-off-by: Hari Bathini Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/exceptions-64s.S | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 902ca3c6b4b6..3ac1d3a90551 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -857,11 +857,6 @@ hv_facility_unavailable_relon_trampoline: #endif STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist) - /* Other future vectors */ - .align 7 - .globl __end_interrupts -__end_interrupts: - .align 7 system_call_entry_direct: #if defined(CONFIG_RELOCATABLE) @@ -1191,6 +1186,17 @@ __end_handlers: STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable) + /* + * The __end_interrupts marker must be past the out-of-line (OOL) + * handlers, so that they are copied to real address 0x100 when running + * a relocatable kernel. This ensures they can be reached from the short + * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch + * directly, without using LOAD_HANDLER(). + */ + .align 7 + .globl __end_interrupts +__end_interrupts: + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * Data area reserved for FWNMI option. From aeadb93a16d400f837d0c19e68cf22a884b182a6 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 12 May 2016 13:26:44 +0200 Subject: [PATCH 067/178] powerpc: Fix definition of SIAR and SDAR registers commit d23fac2b27d94aeb7b65536a50d32bfdc21fe01e upstream. The SIAR and SDAR registers are available twice, one time as SPRs 780 / 781 (unprivileged, but read-only), and one time as the SPRs 796 / 797 (privileged, but read and write). The Linux kernel code currently uses the unprivileged SPRs - while this is OK for reading, writing to that register of course does not work. Since the KVM code tries to write to this register, too (see the mtspr in book3s_hv_rmhandlers.S), the contents of this register sometimes get lost for the guests, e.g. during migration of a VM. To fix this issue, simply switch to the privileged SPR numbers instead. Signed-off-by: Thomas Huth Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/powerpc/include/asm/reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 60c31698f7d5..d2a66519bd86 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -677,13 +677,13 @@ #define SPRN_PMC6 792 #define SPRN_PMC7 793 #define SPRN_PMC8 794 -#define SPRN_SIAR 780 -#define SPRN_SDAR 781 #define SPRN_SIER 784 #define SIER_SIPR 0x2000000 /* Sampled MSR_PR */ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_SIAR 796 +#define SPRN_SDAR 797 #define SPRN_PA6T_MMCR0 795 #define PA6T_MMCR0_EN0 0x0000000000000001UL From 0d330892ab7d7e1c0f1969250659deb11fe967b7 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 12 May 2016 13:29:11 +0200 Subject: [PATCH 068/178] powerpc: Use privileged SPR number for MMCR2 commit 8dd75ccb571f3c92c48014b3dabd3d51a115ab41 upstream. We are already using the privileged versions of MMCR0, MMCR1 and MMCRA in the kernel, so for MMCR2, we should better use the privileged versions, too, to be consistent. Fixes: 240686c13687 ("powerpc: Initialise PMU related regs on Power8") Suggested-by: Paul Mackerras Signed-off-by: Thomas Huth Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d2a66519bd86..469d7715d6aa 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -643,7 +643,7 @@ #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 -#define SPRN_MMCR2 769 +#define SPRN_MMCR2 785 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL From 628cb1780bca2d899d76b29fd6884fa73d243c7c Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 7 Apr 2016 16:28:26 +1000 Subject: [PATCH 069/178] powerpc/pseries/eeh: Handle RTAS delay requests in configure_bridge commit 871e178e0f2c4fa788f694721a10b4758d494ce1 upstream. In the "ibm,configure-pe" and "ibm,configure-bridge" RTAS calls, the spec states that values of 9900-9905 can be returned, indicating that software should delay for 10^x (where x is the last digit, i.e. 990x) milliseconds and attempt the call again. Currently, the kernel doesn't know about this, and respecting it fixes some PCI failures when the hypervisor is busy. The delay is capped at 0.2 seconds. Cc: # 3.10+ Signed-off-by: Russell Currey Acked-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/powerpc/platforms/pseries/eeh_pseries.c | 51 ++++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 68f97d5a4679..dc0278e7fd91 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -551,29 +551,50 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { int config_addr; int ret; + /* Waiting 0.2s maximum before skipping configuration */ + int max_wait = 200; /* Figure out the PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - /* Use new configure-pe function, if supported */ - if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { - ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - } else { - return -EFAULT; + while (max_wait > 0) { + /* Use new configure-pe function, if supported */ + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); + } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); + } else { + return -EFAULT; + } + + if (!ret) + return ret; + + /* + * If RTAS returns a delay value that's above 100ms, cut it + * down to 100ms in case firmware made a mistake. For more + * on how these delay values work see rtas_busy_delay_time + */ + if (ret > RTAS_EXTENDED_DELAY_MIN+2 && + ret <= RTAS_EXTENDED_DELAY_MAX) + ret = RTAS_EXTENDED_DELAY_MIN+2; + + max_wait -= rtas_busy_delay_time(ret); + + if (max_wait < 0) + break; + + rtas_busy_delay(ret); } - if (ret) - pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, ret); - + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, ret); return ret; } From b738ed81a859e1185d2c9b54ef15456c8117f306 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Mon, 11 Apr 2016 16:17:23 -0300 Subject: [PATCH 070/178] powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism commit 8445a87f7092bc8336ea1305be9306f26b846d93 upstream. Commit 39baadbf36ce ("powerpc/eeh: Remove eeh information from pci_dn") changed the pci_dn struct by removing its EEH-related members. As part of this clean-up, DDW mechanism was modified to read the device configuration address from eeh_dev struct. As a consequence, now if we disable EEH mechanism on kernel command-line for example, the DDW mechanism will fail, generating a kernel oops by dereferencing a NULL pointer (which turns to be the eeh_dev pointer). This patch just changes the configuration address calculation on DDW functions to a manual calculation based on pci_dn members instead of using eeh_dev-based address. No functional changes were made. This was tested on pSeries, both in PHyp and qemu guest. Fixes: 39baadbf36ce ("powerpc/eeh: Remove eeh information from pci_dn") Cc: stable@vger.kernel.org # v3.4+ Reviewed-by: Gavin Shan Signed-off-by: Guilherme G. Piccoli Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/platforms/pseries/iommu.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 86ae364900d6..1d2fc2f2b34c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -858,7 +858,8 @@ machine_arch_initcall(pseries, find_existing_ddw_windows); static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_query_response *query) { - struct eeh_dev *edev; + struct device_node *dn; + struct pci_dn *pdn; u32 cfg_addr; u64 buid; int ret; @@ -869,11 +870,10 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, * Retrieve them from the pci device, not the node with the * dma-window property */ - edev = pci_dev_to_eeh_dev(dev); - cfg_addr = edev->config_addr; - if (edev->pe_config_addr) - cfg_addr = edev->pe_config_addr; - buid = edev->phb->buid; + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 8) | pdn->devfn; ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); @@ -887,7 +887,8 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, struct ddw_create_response *create, int page_shift, int window_shift) { - struct eeh_dev *edev; + struct device_node *dn; + struct pci_dn *pdn; u32 cfg_addr; u64 buid; int ret; @@ -898,11 +899,10 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, * Retrieve them from the pci device, not the node with the * dma-window property */ - edev = pci_dev_to_eeh_dev(dev); - cfg_addr = edev->config_addr; - if (edev->pe_config_addr) - cfg_addr = edev->pe_config_addr; - buid = edev->phb->buid; + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 8) | pdn->devfn; do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ From cc80e5c914b0534f6dcaf8d45031c73a6ea2ae7b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 May 2016 09:56:07 +1000 Subject: [PATCH 071/178] powerpc/pseries: Fix PCI config address for DDW commit 8a934efe94347eee843aeea65bdec8077a79e259 upstream. In commit 8445a87f7092 "powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism", the PE address was replaced with the PCI config address in order to remove dependency on EEH. According to PAPR spec, firmware (pHyp or QEMU) should accept "xxBBSSxx" format PCI config address, not "xxxxBBSS" provided by the patch. Note that "BB" is PCI bus number and "SS" is the combination of slot and function number. This fixes the PCI address passed to DDW RTAS calls. Fixes: 8445a87f7092 ("powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism") Cc: stable@vger.kernel.org # v3.4+ Reported-by: Guilherme G. Piccoli Signed-off-by: Gavin Shan Tested-by: Guilherme G. Piccoli Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/platforms/pseries/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1d2fc2f2b34c..401369134ba3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -873,7 +873,7 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, dn = pci_device_to_OF_node(dev); pdn = PCI_DN(dn); buid = pdn->phb->buid; - cfg_addr = (pdn->busno << 8) | pdn->devfn; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); @@ -902,7 +902,7 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, dn = pci_device_to_OF_node(dev); pdn = PCI_DN(dn); buid = pdn->phb->buid; - cfg_addr = (pdn->busno << 8) | pdn->devfn; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ From 8110080dc53335d5dd99b123144a6174f19ffc65 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Fri, 17 Jun 2016 14:58:34 +1000 Subject: [PATCH 072/178] powerpc/tm: Always reclaim in start_thread() for exec() class syscalls commit 8e96a87c5431c256feb65bcfc5aec92d9f7839b6 upstream. Userspace can quite legitimately perform an exec() syscall with a suspended transaction. exec() does not return to the old process, rather it load a new one and starts that, the expectation therefore is that the new process starts not in a transaction. Currently exec() is not treated any differently to any other syscall which creates problems. Firstly it could allow a new process to start with a suspended transaction for a binary that no longer exists. This means that the checkpointed state won't be valid and if the suspended transaction were ever to be resumed and subsequently aborted (a possibility which is exceedingly likely as exec()ing will likely doom the transaction) the new process will jump to invalid state. Secondly the incorrect attempt to keep the transactional state while still zeroing state for the new process creates at least two TM Bad Things. The first triggers on the rfid to return to userspace as start_thread() has given the new process a 'clean' MSR but the suspend will still be set in the hardware MSR. The second TM Bad Thing triggers in __switch_to() as the processor is still transactionally suspended but __switch_to() wants to zero the TM sprs for the new process. This is an example of the outcome of calling exec() with a suspended transaction. Note the first 700 is likely the first TM bad thing decsribed earlier only the kernel can't report it as we've loaded userspace registers. c000000000009980 is the rfid in fast_exception_return() Bad kernel stack pointer 3fffcfa1a370 at c000000000009980 Oops: Bad kernel stack pointer, sig: 6 [#1] CPU: 0 PID: 2006 Comm: tm-execed Not tainted NIP: c000000000009980 LR: 0000000000000000 CTR: 0000000000000000 REGS: c00000003ffefd40 TRAP: 0700 Not tainted MSR: 8000000300201031 CR: 00000000 XER: 00000000 CFAR: c0000000000098b4 SOFTE: 0 PACATMSCRATCH: b00000010000d033 GPR00: 0000000000000000 00003fffcfa1a370 0000000000000000 0000000000000000 GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 00003fff966611c0 0000000000000000 0000000000000000 0000000000000000 NIP [c000000000009980] fast_exception_return+0xb0/0xb8 LR [0000000000000000] (null) Call Trace: Instruction dump: f84d0278 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 e8010070 e8410080 e8610088 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed023b Kernel BUG at c000000000043e80 [verbose debug info unavailable] Unexpected TM Bad Thing exception at c000000000043e80 (msr 0x201033) Oops: Unrecoverable exception, sig: 6 [#2] CPU: 0 PID: 2006 Comm: tm-execed Tainted: G D task: c0000000fbea6d80 ti: c00000003ffec000 task.ti: c0000000fb7ec000 NIP: c000000000043e80 LR: c000000000015a24 CTR: 0000000000000000 REGS: c00000003ffef7e0 TRAP: 0700 Tainted: G D MSR: 8000000300201033 CR: 28002828 XER: 00000000 CFAR: c000000000015a20 SOFTE: 0 PACATMSCRATCH: b00000010000d033 GPR00: 0000000000000000 c00000003ffefa60 c000000000db5500 c0000000fbead000 GPR04: 8000000300001033 2222222222222222 2222222222222222 00000000ff160000 GPR08: 0000000000000000 800000010000d033 c0000000fb7e3ea0 c00000000fe00004 GPR12: 0000000000002200 c00000000fe00000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c0000000fbea7410 00000000ff160000 GPR24: c0000000ffe1f600 c0000000fbea8700 c0000000fbea8700 c0000000fbead000 GPR28: c000000000e20198 c0000000fbea6d80 c0000000fbeab680 c0000000fbea6d80 NIP [c000000000043e80] tm_restore_sprs+0xc/0x1c LR [c000000000015a24] __switch_to+0x1f4/0x420 Call Trace: Instruction dump: 7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8 4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020 This fixes CVE-2016-5828. Fixes: bc2a9408fa65 ("powerpc: Hook in new transactional memory code") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman Signed-off-by: Willy Tarreau --- arch/powerpc/kernel/process.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d55357ee9028..a5e339806589 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1088,6 +1088,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; From b3feb52602655d533109f4aadfc1090fa5759a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=83=C2=A1=C3=85=C2=A1=20Trnka?= Date: Fri, 20 May 2016 16:41:10 +0200 Subject: [PATCH 073/178] sunrpc: fix stripping of padded MIC tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c0cb8bf3a8e4bd82e640862cdd8891400405cb89 upstream. The length of the GSS MIC token need not be a multiple of four bytes. It is then padded by XDR to a multiple of 4 B, but unwrap_integ_data() would previously only trim mic.len + 4 B. The remaining up to three bytes would then trigger a check in nfs4svc_decode_compoundargs(), leading to a "garbage args" error and mount failure: nfs4svc_decode_compoundargs: compound not properly padded! nfsd: failed to decode arguments! This would prevent older clients using the pre-RFC 4121 MIC format (37-byte MIC including a 9-byte OID) from mounting exports from v3.9+ servers using krb5i. The trimming was introduced by commit 4c190e2f913f ("sunrpc: trim off trailing checksum before returning decrypted or integrity authenticated buffer"). Fixes: 4c190e2f913f "unrpc: trim off trailing checksum..." Signed-off-by: Tomáš Trnka Cc: stable@vger.kernel.org Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Willy Tarreau --- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 29b4ba93ab3c..62663a08ffbd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -859,8 +859,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; - /* trim off the mic at the end before returning */ - xdr_buf_trim(buf, mic.len + 4); + /* trim off the mic and padding at the end before returning */ + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); From 184f1f0125bfd71d5deb86f58f72270f4b9661c7 Mon Sep 17 00:00:00 2001 From: Itai Handler Date: Tue, 3 Nov 2015 00:20:56 +0200 Subject: [PATCH 074/178] drm/gma500: Fix possible out of bounds read commit 7ccca1d5bf69fdd1d3c5fcf84faf1659a6e0ad11 upstream. Fix possible out of bounds read, by adding missing comma. The code may read pass the end of the dsi_errors array when the most significant bit (bit #31) in the intr_stat register is set. This bug has been detected using CppCheck (static analysis tool). Cc: stable@vger.kernel.org Signed-off-by: Itai Handler Signed-off-by: Patrik Jakobsson Signed-off-by: Willy Tarreau --- drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c index 489ffd2c66e5..a3d37e4a84ae 100644 --- a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c +++ b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c @@ -85,7 +85,7 @@ static const char *const dsi_errors[] = { "RX Prot Violation", "HS Generic Write FIFO Full", "LP Generic Write FIFO Full", - "Generic Read Data Avail" + "Generic Read Data Avail", "Special Packet Sent", "Tearing Effect", }; From 30e00cbd5f24d3f9724cee5ce7221d9baaa67391 Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 12 May 2016 10:56:59 -0400 Subject: [PATCH 075/178] drm/fb_helper: Fix references to dev->mode_config.num_connector commit 255f0e7c418ad95a4baeda017ae6182ba9b3c423 upstream. During boot, MST hotplugs are generally expected (even if no physical hotplugging occurs) and result in DRM's connector topology changing. This means that using num_connector from the current mode configuration can lead to the number of connectors changing under us. This can lead to some nasty scenarios in fbcon: - We allocate an array to the size of dev->mode_config.num_connectors. - MST hotplug occurs, dev->mode_config.num_connectors gets incremented. - We try to loop through each element in the array using the new value of dev->mode_config.num_connectors, and end up going out of bounds since dev->mode_config.num_connectors is now larger then the array we allocated. fb_helper->connector_count however, will always remain consistent while we do a modeset in fb_helper. Note: This is just polish for 4.7, Dave Airlie's drm_connector refcounting fixed these bugs for real. But it's good enough duct-tape for stable kernel backporting, since backporting the refcounting changes is way too invasive. Signed-off-by: Lyude [danvet: Clarify why we need this. Also remove the now unused "dev" local variable to appease gcc.] Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1463065021-18280-3-git-send-email-cpaul@redhat.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/gpu/drm/drm_fb_helper.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index b78cbe74dadf..93b74107d20d 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1313,7 +1313,6 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper, int n, int width, int height) { int c, o; - struct drm_device *dev = fb_helper->dev; struct drm_connector *connector; struct drm_connector_helper_funcs *connector_funcs; struct drm_encoder *encoder; @@ -1334,7 +1333,7 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper, if (modes[n] == NULL) return best_score; - crtcs = kzalloc(dev->mode_config.num_connector * + crtcs = kzalloc(fb_helper->connector_count * sizeof(struct drm_fb_helper_crtc *), GFP_KERNEL); if (!crtcs) return best_score; @@ -1381,7 +1380,7 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper, best_crtc = crtc; best_score = score; memcpy(best_crtcs, crtcs, - dev->mode_config.num_connector * + fb_helper->connector_count * sizeof(struct drm_fb_helper_crtc *)); } } From fee154e5b3b1a6eb6e92b4dd75cc41fbe4181fe5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 13 Jun 2016 15:37:34 -0400 Subject: [PATCH 076/178] drm/radeon: fix asic initialization for virtualized environments commit 05082b8bbd1a0ffc74235449c4b8930a8c240f85 upstream. When executing in a PCI passthrough based virtuzliation environment, the hypervisor will usually attempt to send a PCIe bus reset signal to the ASIC when the VM reboots. In this scenario, the card is not correctly initialized, but we still consider it to be posted. Therefore, in a passthrough based environemnt we should always post the card to guarantee it is in a good state for driver initialization. Ported from amdgpu commit: amdgpu: fix asic initialization for virtualized environments Cc: Andres Rodriguez Cc: Alex Williamson Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- drivers/gpu/drm/radeon/radeon_device.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 8df1525f71d2..e9db3f8125ed 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -449,6 +449,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) /* * GPU helpers function. */ + +/** + * radeon_device_is_virtual - check if we are running is a virtual environment + * + * Check if the asic has been passed through to a VM (all asics). + * Used at driver startup. + * Returns true if virtual or false if not. + */ +static bool radeon_device_is_virtual(void) +{ +#ifdef CONFIG_X86 + return boot_cpu_has(X86_FEATURE_HYPERVISOR); +#else + return false; +#endif +} + /** * radeon_card_posted - check if the hw has already been initialized * @@ -462,6 +479,10 @@ bool radeon_card_posted(struct radeon_device *rdev) { uint32_t reg; + /* for pass through, always force asic_init */ + if (radeon_device_is_virtual()) + return false; + /* required for EFI mode on macbook2,1 which uses an r5xx asic */ if (efi_enabled(EFI_BOOT) && (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && From 6f78f4c51cea9f7667484dc21f52078e3f6175f6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 1 Jun 2016 12:58:36 -0400 Subject: [PATCH 077/178] drm/radeon: add a delay after ATPX dGPU power off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d814b24fb74cb9797d70cb8053961447c5879a5c upstream. ATPX dGPU power control requires a 200ms delay between power off and on. This should fix dGPU failures on resume from power off. Reviewed-by: Hawking Zhang Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 8c44ef57864b..a7e1893de838 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "radeon_acpi.h" @@ -252,6 +253,10 @@ static int radeon_atpx_set_discrete_state(struct radeon_atpx *atpx, u8 state) if (!info) return -EIO; kfree(info); + + /* 200ms delay is required after off */ + if (state == 0) + msleep(200); } return 0; } From 14a039bc47b8958d1517204cba4a339794b49990 Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 24 Jun 2016 17:54:31 -0400 Subject: [PATCH 078/178] drm/radeon: Poll for both connect/disconnect on analog connectors commit 14ff8d48f2235295dfb3117693008e367b49cdb5 upstream. DRM_CONNECTOR_POLL_CONNECT only enables polling for connections, not disconnections. Because of this, we end up losing hotplug polling for analog connectors once they get connected. Easy way to reproduce: - Grab a machine with a radeon GPU and a VGA port - Plug a monitor into the VGA port, wait for it to update the connector from disconnected to connected - Disconnect the monitor on VGA, a hotplug event is never sent for the removal of the connector. Originally, only using DRM_CONNECTOR_POLL_CONNECT might have been a good idea since doing VGA polling can sometimes result in having to mess with the DAC voltages to figure out whether or not there's actually something there since VGA doesn't have HPD. Doing this would have the potential of showing visible artifacts on the screen every time we ran a poll while a VGA display was connected. Luckily, radeon_vga_detect() only resorts to this sort of polling if the poll is forced, and DRM's polling helper doesn't force it's polls. Additionally, this removes some assignments to connector->polled that weren't actually doing anything. Cc: stable@vger.kernel.org Signed-off-by: Lyude Signed-off-by: Alex Deucher Signed-off-by: Willy Tarreau --- drivers/gpu/drm/radeon/radeon_connectors.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 1fbd38b371d4..ea62810aeda6 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1691,7 +1691,6 @@ radeon_add_atom_connector(struct drm_device *dev, 1); /* no HPD on analog connectors */ radeon_connector->hpd.hpd = RADEON_HPD_NONE; - connector->polled = DRM_CONNECTOR_POLL_CONNECT; connector->interlace_allowed = true; connector->doublescan_allowed = true; break; @@ -1889,8 +1888,10 @@ radeon_add_atom_connector(struct drm_device *dev, } if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) { - if (i2c_bus->valid) - connector->polled = DRM_CONNECTOR_POLL_CONNECT; + if (i2c_bus->valid) { + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; + } } else connector->polled = DRM_CONNECTOR_POLL_HPD; @@ -1962,7 +1963,6 @@ radeon_add_legacy_connector(struct drm_device *dev, 1); /* no HPD on analog connectors */ radeon_connector->hpd.hpd = RADEON_HPD_NONE; - connector->polled = DRM_CONNECTOR_POLL_CONNECT; connector->interlace_allowed = true; connector->doublescan_allowed = true; break; @@ -2047,10 +2047,13 @@ radeon_add_legacy_connector(struct drm_device *dev, } if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) { - if (i2c_bus->valid) - connector->polled = DRM_CONNECTOR_POLL_CONNECT; + if (i2c_bus->valid) { + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; + } } else connector->polled = DRM_CONNECTOR_POLL_HPD; + connector->display_info.subpixel_order = subpixel_order; drm_sysfs_connector_add(connector); } From 015378466167e49a2370c95e4e15607ff27f6e7a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Jul 2016 15:28:56 -0400 Subject: [PATCH 079/178] drm/radeon: fix firmware info version checks commit 3edc38a0facef45ee22af8afdce3737f421f36ab upstream. Some of the checks didn't handle frev 2 tables properly. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- drivers/gpu/drm/radeon/radeon_atombios.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index f3cce23f4a62..f4b9b1c0cae8 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1144,7 +1144,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) le16_to_cpu(firmware_info->info.usReferenceClock); p1pll->reference_div = 0; - if (crev < 2) + if ((frev < 2) && (crev < 2)) p1pll->pll_out_min = le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output); else @@ -1153,7 +1153,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) p1pll->pll_out_max = le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output); - if (crev >= 4) { + if (((frev < 2) && (crev >= 4)) || (frev >= 2)) { p1pll->lcd_pll_out_min = le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100; if (p1pll->lcd_pll_out_min == 0) From 3d1658bb0a72683b0e22f04420eae260295d8237 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 30 Apr 2016 00:48:54 -0400 Subject: [PATCH 080/178] ext4: fix hang when processing corrupted orphaned inode list commit c9eb13a9105e2e418f72e46a2b6da3f49e696902 upstream. If the orphaned inode list contains inode #5, ext4_iget() returns a bad inode (since the bootloader inode should never be referenced directly). Because of the bad inode, we end up processing the inode repeatedly and this hangs the machine. This can be reproduced via: mke2fs -t ext4 /tmp/foo.img 100 debugfs -w -R "ssv last_orphan 5" /tmp/foo.img mount -o loop /tmp/foo.img /mnt (But don't do this if you are using an unpatched kernel if you care about the system staying functional. :-) This bug was found by the port of American Fuzzy Lop into the kernel to find file system problems[1]. (Since it *only* happens if inode #5 shows up on the orphan list --- 3, 7, 8, etc. won't do it, it's not surprising that AFL needed two hours before it found it.) [1] http://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf Cc: stable@vger.kernel.org Reported by: Vegard Nossum Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/ialloc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4d4718cf25ab..00cbc648e1dc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1027,11 +1027,13 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) goto iget_failed; /* - * If the orphans has i_nlinks > 0 then it should be able to be - * truncated, otherwise it won't be removed from the orphan list - * during processing and an infinite loop will result. + * If the orphans has i_nlinks > 0 then it should be able to + * be truncated, otherwise it won't be removed from the orphan + * list during processing and an infinite loop will result. + * Similarly, it must not be a bad inode. */ - if (inode->i_nlink && !ext4_can_truncate(inode)) + if ((inode->i_nlink && !ext4_can_truncate(inode)) || + is_bad_inode(inode)) goto bad_orphan; if (NEXT_ORPHAN(inode) > max_ino) From 58e32a8d4e8eb5e034b9e092430e9ee138d8f0dc Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 5 May 2016 17:38:03 -0400 Subject: [PATCH 081/178] ext4: address UBSAN warning in mb_find_order_for_block() commit b5cb316cdf3a3f5f6125412b0f6065185240cfdc upstream. Currently, in mb_find_order_for_block(), there's a loop like the following: while (order <= e4b->bd_blkbits + 1) { ... bb += 1 << (e4b->bd_blkbits - order); } Note that the updated bb is used in the loop's next iteration only. However, at the last iteration, that is at order == e4b->bd_blkbits + 1, the shift count becomes negative (c.f. C99 6.5.7(3)) and UBSAN reports UBSAN: Undefined behaviour in fs/ext4/mballoc.c:1281:11 shift exponent -1 is negative [...] Call Trace: [] dump_stack+0xbc/0x117 [] ? _atomic_dec_and_lock+0x169/0x169 [] ubsan_epilogue+0xd/0x4e [] __ubsan_handle_shift_out_of_bounds+0x1fb/0x254 [] ? __ubsan_handle_load_invalid_value+0x158/0x158 [] ? ext4_mb_generate_from_pa+0x590/0x590 [] ? ext4_read_block_bitmap_nowait+0x598/0xe80 [] mb_find_order_for_block+0x1ce/0x240 [...] Unless compilers start to do some fancy transformations (which at least GCC 6.0.0 doesn't currently do), the issue is of cosmetic nature only: the such calculated value of bb is never used again. Silence UBSAN by introducing another variable, bb_incr, holding the next increment to apply to bb and adjust that one by right shifting it by one position per loop iteration. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=114701 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112161 Cc: stable@vger.kernel.org Signed-off-by: Nicolai Stange Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/mballoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 61ee01603940..b2e99401189b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1232,6 +1232,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { int order = 1; + int bb_incr = 1 << (e4b->bd_blkbits - 1); void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); @@ -1244,7 +1245,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) /* this block is part of buddy of order 'order' */ return order; } - bb += 1 << (e4b->bd_blkbits - order); + bb += bb_incr; + bb_incr >>= 1; order++; } return 0; From cb0b53ed47f7e7a893b58f9becf4ea48e6ca7d43 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 5 May 2016 19:46:19 -0400 Subject: [PATCH 082/178] ext4: silence UBSAN in ext4_mb_init() commit 935244cd54b86ca46e69bc6604d2adfb1aec2d42 upstream. Currently, in ext4_mb_init(), there's a loop like the following: do { ... offset += 1 << (sb->s_blocksize_bits - i); i++; } while (i <= sb->s_blocksize_bits + 1); Note that the updated offset is used in the loop's next iteration only. However, at the last iteration, that is at i == sb->s_blocksize_bits + 1, the shift count becomes equal to (unsigned)-1 > 31 (c.f. C99 6.5.7(3)) and UBSAN reports UBSAN: Undefined behaviour in fs/ext4/mballoc.c:2621:15 shift exponent 4294967295 is too large for 32-bit type 'int' [...] Call Trace: [] dump_stack+0xbc/0x117 [] ? _atomic_dec_and_lock+0x169/0x169 [] ubsan_epilogue+0xd/0x4e [] __ubsan_handle_shift_out_of_bounds+0x1fb/0x254 [] ? __ubsan_handle_load_invalid_value+0x158/0x158 [] ? kmem_cache_alloc+0x101/0x390 [] ? ext4_mb_init+0x13b/0xfd0 [] ? create_cache+0x57/0x1f0 [] ? create_cache+0x11a/0x1f0 [] ? mutex_lock+0x38/0x60 [] ? mutex_unlock+0x1b/0x50 [] ? put_online_mems+0x5b/0xc0 [] ? kmem_cache_create+0x117/0x2c0 [] ext4_mb_init+0xc49/0xfd0 [...] Observe that the mentioned shift exponent, 4294967295, equals (unsigned)-1. Unless compilers start to do some fancy transformations (which at least GCC 6.0.0 doesn't currently do), the issue is of cosmetic nature only: the such calculated value of offset is never used again. Silence UBSAN by introducing another variable, offset_incr, holding the next increment to apply to offset and adjust that one by right shifting it by one position per loop iteration. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=114701 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112161 Cc: stable@vger.kernel.org Signed-off-by: Nicolai Stange Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/mballoc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b2e99401189b..4d739463831e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2516,7 +2516,7 @@ int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned i, j; - unsigned offset; + unsigned offset, offset_incr; unsigned max; int ret; @@ -2545,11 +2545,13 @@ int ext4_mb_init(struct super_block *sb) i = 1; offset = 0; + offset_incr = 1 << (sb->s_blocksize_bits - 1); max = sb->s_blocksize << 2; do { sbi->s_mb_offsets[i] = offset; sbi->s_mb_maxs[i] = max; - offset += 1 << (sb->s_blocksize_bits - i); + offset += offset_incr; + offset_incr = offset_incr >> 1; max = max >> 1; i++; } while (i <= sb->s_blocksize_bits + 1); From ea8f8498b39a4e9ef9c55f5b3eb25efea85a6ffb Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 15 Jul 2016 00:22:07 -0400 Subject: [PATCH 083/178] ext4: verify extent header depth commit 7bc9491645118c9461bd21099c31755ff6783593 upstream. Although the extent tree depth of 5 should enough be for the worst case of 2*32 extents of length 1, the extent tree code does not currently to merge nodes which are less than half-full with a sibling node, or to shrink the tree depth if possible. So it's possible, at least in theory, for the tree depth to be greater than 5. However, even in the worst case, a tree depth of 32 is highly unlikely, and if the file system is maliciously corrupted, an insanely large eh_depth can cause memory allocation failures that will trigger kernel warnings (here, eh_depth = 65280): JBD2: ext4.exe wants too many credits credits:195849 rsv_credits:0 max:256 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 50 at fs/jbd2/transaction.c:293 start_this_handle+0x569/0x580 CPU: 0 PID: 50 Comm: ext4.exe Not tainted 4.7.0-rc5+ #508 Stack: 604a8947 625badd8 0002fd09 00000000 60078643 00000000 62623910 601bf9bc 62623970 6002fc84 626239b0 900000125 Call Trace: [<6001c2dc>] show_stack+0xdc/0x1a0 [<601bf9bc>] dump_stack+0x2a/0x2e [<6002fc84>] __warn+0x114/0x140 [<6002fdff>] warn_slowpath_null+0x1f/0x30 [<60165829>] start_this_handle+0x569/0x580 [<60165d4e>] jbd2__journal_start+0x11e/0x220 [<60146690>] __ext4_journal_start_sb+0x60/0xa0 [<60120a81>] ext4_truncate+0x131/0x3a0 [<60123677>] ext4_setattr+0x757/0x840 [<600d5d0f>] notify_change+0x16f/0x2a0 [<600b2b16>] do_truncate+0x76/0xc0 [<600c3e56>] path_openat+0x806/0x1300 [<600c55c9>] do_filp_open+0x89/0xf0 [<600b4074>] do_sys_open+0x134/0x1e0 [<600b4140>] SyS_open+0x20/0x30 [<6001ea68>] handle_syscall+0x88/0x90 [<600295fd>] userspace+0x3fd/0x500 [<6001ac55>] fork_handler+0x85/0x90 ---[ end trace 08b0b88b6387a244 ]--- [ Commit message modified and the extent tree depath check changed from 5 to 32 -- tytso ] Cc: Darrick J. Wong Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/extents.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index df633bb25909..f9fe3593d926 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -454,6 +454,10 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid extent entries"; goto corrupted; } + if (unlikely(depth > 32)) { + error_msg = "too large eh_depth"; + goto corrupted; + } /* Verify checksum on non-root extent tree nodes */ if (ext_depth(inode) != depth && !ext4_extent_block_csum_verify(inode, eh)) { From 6dc68acbb65e2336d6f8b35c2f0c9a4219d3adf2 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 30 Jun 2016 11:53:46 -0400 Subject: [PATCH 084/178] ext4: check for extents that wrap around commit f70749ca42943faa4d4dcce46dfdcaadb1d0c4b6 upstream. An extent with lblock = 4294967295 and len = 1 will pass the ext4_valid_extent() test: ext4_lblk_t last = lblock + len - 1; if (len == 0 || lblock > last) return 0; since last = 4294967295 + 1 - 1 = 4294967295. This would later trigger the BUG_ON(es->es_lblk + es->es_len < es->es_lblk) in ext4_es_end(). We can simplify it by removing the - 1 altogether and changing the test to use lblock + len <= lblock, since now if len = 0, then lblock + 0 == lblock and it fails, and if len > 0 then lblock + len > lblock in order to pass (i.e. it doesn't overflow). Fixes: 5946d0893 ("ext4: check for overlapping extents in ext4_valid_extent_entries()") Fixes: 2f974865f ("ext4: check for zero length extent explicitly") Cc: Eryu Guan Cc: stable@vger.kernel.org Signed-off-by: Phil Turnbull Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Signed-off-by: Willy Tarreau --- fs/ext4/extents.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f9fe3593d926..7eea76168d33 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -361,9 +361,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); - ext4_lblk_t last = lblock + len - 1; - if (len == 0 || lblock > last) + /* + * We allow neither: + * - zero length + * - overflow/wrap-around + */ + if (lblock + len <= lblock) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } From 8b6ab35cbc581ab0eb74425d92e35e9522382121 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 4 Jul 2016 11:03:00 -0400 Subject: [PATCH 085/178] ext4: don't call ext4_should_journal_data() on the journal inode commit 6a7fd522a7c94cdef0a3b08acf8e6702056e635c upstream. If ext4_fill_super() fails early, it's possible for ext4_evict_inode() to call ext4_should_journal_data() before superblock options and flags are fully set up. In that case, the iput() on the journal inode can end up causing a BUG(). Work around this problem by reordering the tests so we only call ext4_should_journal_data() after we know it's not the journal inode. Fixes: 2d859db3e4 ("ext4: fix data corruption in inodes with journalled data") Fixes: 2b405bfa84 ("ext4: fix data=journal fast mount/umount hang") Cc: Jan Kara Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Willy Tarreau --- fs/ext4/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fb7e576df25c..221b58298847 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -206,9 +206,9 @@ void ext4_evict_inode(struct inode *inode) * Note that directories do not have this problem because they * don't use page cache. */ - if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_ino != EXT4_JOURNAL_INO) { + if (inode->i_ino != EXT4_JOURNAL_INO && + ext4_should_journal_data(inode) && + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; From bf86199bfadda49f1e6d2880dc58a891a47890c6 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 14 Jul 2016 23:21:35 -0400 Subject: [PATCH 086/178] ext4: short-cut orphan cleanup on error commit c65d5c6c81a1f27dec5f627f67840726fcd146de upstream. If we encounter a filesystem error during orphan cleanup, we should stop. Otherwise, we may end up in an infinite loop where the same inode is processed again and again. EXT4-fs (loop0): warning: checktime reached, running e2fsck is recommended EXT4-fs error (device loop0): ext4_mb_generate_buddy:758: group 2, block bitmap and bg descriptor inconsistent: 6117 vs 0 free clusters Aborting journal on device loop0-8. EXT4-fs (loop0): Remounting filesystem read-only EXT4-fs error (device loop0) in ext4_free_blocks:4895: Journal has aborted EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted EXT4-fs error (device loop0) in ext4_ext_remove_space:3068: IO failure EXT4-fs error (device loop0) in ext4_ext_truncate:4667: Journal has aborted EXT4-fs error (device loop0) in ext4_orphan_del:2927: Journal has aborted EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted EXT4-fs (loop0): Inode 16 (00000000618192a0): orphan list check failed! [...] EXT4-fs (loop0): Inode 16 (0000000061819748): orphan list check failed! [...] EXT4-fs (loop0): Inode 16 (0000000061819bf0): orphan list check failed! [...] See-also: c9eb13a9105 ("ext4: fix hang when processing corrupted orphaned inode list") Cc: Jan Kara Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- fs/ext4/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 063eb5094a63..15a81897df4e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2153,6 +2153,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, while (es->s_last_orphan) { struct inode *inode; + /* + * We may have encountered an error during cleanup; if + * so, skip the rest. + */ + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { + jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); + es->s_last_orphan = 0; + break; + } + inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); if (IS_ERR(inode)) { es->s_last_orphan = 0; From e7dcdba7d680b021538ecd93cee1954291558399 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 14 Jul 2016 23:02:47 -0400 Subject: [PATCH 087/178] ext4: fix reference counting bug on block allocation error commit 554a5ccc4e4a20c5f3ec859de0842db4b4b9c77e upstream. If we hit this error when mounted with errors=continue or errors=remount-ro: EXT4-fs error (device loop0): ext4_mb_mark_diskspace_used:2940: comm ext4.exe: Allocating blocks 5090-6081 which overlap fs metadata then ext4_mb_new_blocks() will call ext4_mb_release_context() and try to continue. However, ext4_mb_release_context() is the wrong thing to call here since we are still actually using the allocation context. Instead, just error out. We could retry the allocation, but there is a possibility of getting stuck in an infinite loop instead, so this seems safer. [ Fixed up so we don't return EAGAIN to userspace. --tytso ] Fixes: 8556e8f3b6 ("ext4: Don't allow new groups to be added during block allocation") Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Cc: Aneesh Kumar K.V Cc: stable@vger.kernel.org [wt: 3.10 doesn't have EFSCORRUPTED, but XFS uses EUCLEAN as does 3.14 on this patch so use this instead] Signed-off-by: Willy Tarreau --- fs/ext4/mballoc.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4d739463831e..08b4495c1b12 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2876,7 +2876,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata", block, block+len); /* File system mounted not to panic on error - * Fix the bitmap and repeat the block allocation + * Fix the bitmap and return EUCLEAN * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); @@ -2885,7 +2885,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) - err = -EAGAIN; + err = -EUCLEAN; goto out_err; } @@ -4452,18 +4452,7 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); - if (*errp == -EAGAIN) { - /* - * drop the reference that we took - * in ext4_mb_use_best_found - */ - ext4_mb_release_context(ac); - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; - ac->ac_status = AC_STATUS_CONTINUE; - goto repeat; - } else if (*errp) { + if (*errp) { ext4_discard_allocated_blocks(ac); goto errout; } else { From 0ed4547f527c1410ff2a35af766ce5c3a29405c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 May 2016 15:16:25 -0700 Subject: [PATCH 088/178] dma-debug: avoid spinlock recursion when disabling dma-debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3017cd63f26fc655d56875aaf497153ba60e9edf upstream. With netconsole (at least) the pr_err("... disablingn") call can recurse back into the dma-debug code, where it'll try to grab free_entries_lock again. Avoid the problem by doing the printk after dropping the lock. Link: http://lkml.kernel.org/r/1463678421-18683-1-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- lib/dma-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index eb43517bf261..c32437f6be61 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -445,9 +445,9 @@ static struct dma_debug_entry *dma_entry_alloc(void) spin_lock_irqsave(&free_entries_lock, flags); if (list_empty(&free_entries)) { - pr_err("DMA-API: debugging out of memory - disabling\n"); global_disable = true; spin_unlock_irqrestore(&free_entries_lock, flags); + pr_err("DMA-API: debugging out of memory - disabling\n"); return NULL; } From 01ee4801e26c15e5bf3d0a9b563d125bbbd2ed66 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 18 May 2016 13:53:42 +1000 Subject: [PATCH 089/178] xfs: xfs_iflush_cluster fails to abort on error commit b1438f477934f5a4d5a44df26f3079a7575d5946 upstream. When a failure due to an inode buffer occurs, the error handling fails to abort the inode writeback correctly. This can result in the inode being reclaimed whilst still in the AIL, leading to use-after-free situations as well as filesystems that cannot be unmounted as the inode log items left in the AIL never get removed. Fix this by ensuring fatal errors from xfs_imap_to_bp() result in the inode flush being aborted correctly. Reported-by: Shyam Kaushik Diagnosed-by: Shyam Kaushik Tested-by: Shyam Kaushik Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman [wt: in kernels < 3.17, the error sign is positive, not negative] Signed-off-by: Willy Tarreau --- fs/xfs/xfs_inode.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f010ab4594f1..e27645569802 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2726,7 +2726,7 @@ xfs_iflush( struct xfs_buf **bpp) { struct xfs_mount *mp = ip->i_mount; - struct xfs_buf *bp; + struct xfs_buf *bp = NULL; struct xfs_dinode *dip; int error; @@ -2768,14 +2768,22 @@ xfs_iflush( } /* - * Get the buffer containing the on-disk inode. + * Get the buffer containing the on-disk inode. We are doing a try-lock + * operation here, so we may get an EAGAIN error. In that case, we + * simply want to return with the inode still dirty. + * + * If we get any other error, we effectively have a corruption situation + * and we cannot flush the inode, so we treat it the same as failing + * xfs_iflush_int(). */ error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 0); - if (error || !bp) { + if (error == EAGAIN) { xfs_ifunlock(ip); return error; } + if (error) + goto corrupt_out; /* * First flush out the inode that xfs_iflush was called with. @@ -2803,7 +2811,8 @@ xfs_iflush( return 0; corrupt_out: - xfs_buf_relse(bp); + if (bp) + xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); cluster_corrupt_out: error = XFS_ERROR(EFSCORRUPTED); From 360914d619705b62f974492658e96f3d111232a8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 18 May 2016 13:54:22 +1000 Subject: [PATCH 090/178] xfs: fix inode validity check in xfs_iflush_cluster commit 51b07f30a71c27405259a0248206ed4e22adbee2 upstream. Some careless idiot(*) wrote crap code in commit 1a3e8f3 ("xfs: convert inode cache lookups to use RCU locking") back in late 2010, and so xfs_iflush_cluster checks the wrong inode for whether it is still valid under RCU protection. Fix it to lock and check the correct inode. (*) Careless-idiot: Dave Chinner cc: # 3.10.x- Discovered-by: Brain Foster Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Willy Tarreau --- fs/xfs/xfs_inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e27645569802..bb0d8ccebb26 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2604,13 +2604,13 @@ xfs_iflush_cluster( * We need to check under the i_flags_lock for a valid inode * here. Skip it if it is not valid or the wrong inode. */ - spin_lock(&ip->i_flags_lock); - if (!ip->i_ino || + spin_lock(&iq->i_flags_lock); + if (!iq->i_ino || (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { - spin_unlock(&ip->i_flags_lock); + spin_unlock(&iq->i_flags_lock); continue; } - spin_unlock(&ip->i_flags_lock); + spin_unlock(&iq->i_flags_lock); /* * Do an un-protected check to see if the inode is dirty and From 9eccedc413f809916ea82c7e0d11c0fd31e00383 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 18 May 2016 13:54:23 +1000 Subject: [PATCH 091/178] xfs: skip stale inodes in xfs_iflush_cluster commit 7d3aa7fe970791f1a674b14572a411accf2f4d4e upstream. We don't write back stale inodes so we should skip them in xfs_iflush_cluster, too. cc: # 3.10.x- Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Willy Tarreau --- fs/xfs/xfs_inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bb0d8ccebb26..06dec557d247 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2606,6 +2606,7 @@ xfs_iflush_cluster( */ spin_lock(&iq->i_flags_lock); if (!iq->i_ino || + __xfs_iflags_test(iq, XFS_ISTALE) || (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { spin_unlock(&iq->i_flags_lock); continue; From 42bc57b6a75b5242f2103b368be0693fecf292d2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 1 Jun 2016 14:09:23 +0200 Subject: [PATCH 092/178] KVM: x86: fix OOPS after invalid KVM_SET_DEBUGREGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d14bdb553f9196169f003058ae1cdabe514470e6 upstream. MOV to DR6 or DR7 causes a #GP if an attempt is made to write a 1 to any of bits 63:32. However, this is not detected at KVM_SET_DEBUGREGS time, and the next KVM_RUN oopses: general protection fault: 0000 [#1] SMP CPU: 2 PID: 14987 Comm: a.out Not tainted 4.4.9-300.fc23.x86_64 #1 Hardware name: LENOVO 2325F51/2325F51, BIOS G2ET32WW (1.12 ) 05/30/2012 [...] Call Trace: [] kvm_arch_vcpu_ioctl_run+0x141d/0x14e0 [kvm] [] kvm_vcpu_ioctl+0x33d/0x620 [kvm] [] do_vfs_ioctl+0x298/0x480 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 Code: 55 83 ff 07 48 89 e5 77 27 89 ff ff 24 fd 90 87 80 81 0f 23 fe 5d c3 0f 23 c6 5d c3 0f 23 ce 5d c3 0f 23 d6 5d c3 0f 23 de 5d c3 <0f> 23 f6 5d c3 0f 0b 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 RIP [] native_set_debugreg+0x2b/0x40 RSP Testcase (beautified/reduced from syzkaller output): #include #include #include #include #include #include #include long r[8]; int main() { struct kvm_debugregs dr = { 0 }; r[2] = open("/dev/kvm", O_RDONLY); r[3] = ioctl(r[2], KVM_CREATE_VM, 0); r[4] = ioctl(r[3], KVM_CREATE_VCPU, 7); memcpy(&dr, "\x5d\x6a\x6b\xe8\x57\x3b\x4b\x7e\xcf\x0d\xa1\x72" "\xa3\x4a\x29\x0c\xfc\x6d\x44\x00\xa7\x52\xc7\xd8" "\x00\xdb\x89\x9d\x78\xb5\x54\x6b\x6b\x13\x1c\xe9" "\x5e\xd3\x0e\x40\x6f\xb4\x66\xf7\x5b\xe3\x36\xcb", 48); r[7] = ioctl(r[4], KVM_SET_DEBUGREGS, &dr); r[6] = ioctl(r[4], KVM_RUN, 0); } Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c0b085b4336..8e57771d4bfd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2966,6 +2966,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, if (dbgregs->flags) return -EINVAL; + if (dbgregs->dr6 & ~0xffffffffull) + return -EINVAL; + if (dbgregs->dr7 & ~0xffffffffull) + return -EINVAL; + memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = dbgregs->dr6; vcpu->arch.dr7 = dbgregs->dr7; From 951b391d266e93a56559d28817663a10128e0159 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 May 2016 23:14:56 +0100 Subject: [PATCH 093/178] ARM: fix PTRACE_SETVFPREGS on SMP systems commit e2dfb4b880146bfd4b6aa8e138c0205407cebbaf upstream. PTRACE_SETVFPREGS fails to properly mark the VFP register set to be reloaded, because it undoes one of the effects of vfp_flush_hwstate(). Specifically vfp_flush_hwstate() sets thread->vfpstate.hard.cpu to an invalid CPU number, but vfp_set() overwrites this with the original CPU number, thereby rendering the hardware state as apparently "valid", even though the software state is more recent. Fix this by reverting the previous change. Cc: Fixes: 8130b9d7b9d8 ("ARM: 7308/1: vfp: flush thread hwstate before copying ptrace registers") Acked-by: Will Deacon Tested-by: Simon Marchi Signed-off-by: Russell King Signed-off-by: Willy Tarreau --- arch/arm/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 03deeffd9f6d..4e2110d48c41 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -733,8 +733,8 @@ static int vfp_set(struct task_struct *target, if (ret) return ret; - vfp_flush_hwstate(thread); thread->vfpstate.hard = new_vfp; + vfp_flush_hwstate(thread); return 0; } From d948109df11c8485e972b4cc0eb4820d4b754615 Mon Sep 17 00:00:00 2001 From: Dave Weinstein Date: Thu, 28 Jul 2016 11:55:41 -0700 Subject: [PATCH 094/178] arm: oabi compat: add missing access checks commit 7de249964f5578e67b99699c5f0b405738d820a2 upstream. Add access checks to sys_oabi_epoll_wait() and sys_oabi_semtimedop(). This fixes CVE-2016-3857, a local privilege escalation under CONFIG_OABI_COMPAT. Cc: stable@vger.kernel.org Reported-by: Chiachih Wu Reviewed-by: Kees Cook Reviewed-by: Nicolas Pitre Signed-off-by: Dave Weinstein Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- arch/arm/kernel/sys_oabi-compat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 3e94811690ce..a0aee80b608d 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -275,8 +275,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, mm_segment_t fs; long ret, err, i; - if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) + if (maxevents <= 0 || + maxevents > (INT_MAX/sizeof(*kbuf)) || + maxevents > (INT_MAX/sizeof(*events))) return -EINVAL; + if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents)) + return -EFAULT; kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); if (!kbuf) return -ENOMEM; @@ -313,6 +317,8 @@ asmlinkage long sys_oabi_semtimedop(int semid, if (nsops < 1 || nsops > SEMOPM) return -EINVAL; + if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops)) + return -EFAULT; sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); if (!sops) return -ENOMEM; From fcf55035f9cf155d2d63a07af15522bfafb54951 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 4 Jun 2016 17:21:33 +0200 Subject: [PATCH 095/178] parisc: Fix pagefault crash in unaligned __get_user() call commit 8b78f260887df532da529f225c49195d18fef36b upstream. One of the debian buildd servers had this crash in the syslog without any other information: Unaligned handler failed, ret = -2 clock_adjtime (pid 22578): Unaligned data reference (code 28) CPU: 1 PID: 22578 Comm: clock_adjtime Tainted: G E 4.5.0-2-parisc64-smp #1 Debian 4.5.4-1 task: 000000007d9960f8 ti: 00000001bde7c000 task.ti: 00000001bde7c000 YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI PSW: 00001000000001001111100000001111 Tainted: G E r00-03 000000ff0804f80f 00000001bde7c2b0 00000000402d2be8 00000001bde7c2b0 r04-07 00000000409e1fd0 00000000fa6f7fff 00000001bde7c148 00000000fa6f7fff r08-11 0000000000000000 00000000ffffffff 00000000fac9bb7b 000000000002b4d4 r12-15 000000000015241c 000000000015242c 000000000000002d 00000000fac9bb7b r16-19 0000000000028800 0000000000000001 0000000000000070 00000001bde7c218 r20-23 0000000000000000 00000001bde7c210 0000000000000002 0000000000000000 r24-27 0000000000000000 0000000000000000 00000001bde7c148 00000000409e1fd0 r28-31 0000000000000001 00000001bde7c320 00000001bde7c350 00000001bde7c218 sr00-03 0000000001200000 0000000001200000 0000000000000000 0000000001200000 sr04-07 0000000000000000 0000000000000000 0000000000000000 0000000000000000 IASQ: 0000000000000000 0000000000000000 IAOQ: 00000000402d2e84 00000000402d2e88 IIR: 0ca0d089 ISR: 0000000001200000 IOR: 00000000fa6f7fff CPU: 1 CR30: 00000001bde7c000 CR31: ffffffffffffffff ORIG_R28: 00000002369fe628 IAOQ[0]: compat_get_timex+0x2dc/0x3c0 IAOQ[1]: compat_get_timex+0x2e0/0x3c0 RP(r2): compat_get_timex+0x40/0x3c0 Backtrace: [<00000000402d4608>] compat_SyS_clock_adjtime+0x40/0xc0 [<0000000040205024>] syscall_exit+0x0/0x14 This means the userspace program clock_adjtime called the clock_adjtime() syscall and then crashed inside the compat_get_timex() function. Syscalls should never crash programs, but instead return EFAULT. The IIR register contains the executed instruction, which disassebles into "ldw 0(sr3,r5),r9". This load-word instruction is part of __get_user() which tried to read the word at %r5/IOR (0xfa6f7fff). This means the unaligned handler jumped in. The unaligned handler is able to emulate all ldw instructions, but it fails if it fails to read the source e.g. because of page fault. The following program reproduces the problem: #define _GNU_SOURCE #include #include #include int main(void) { /* allocate 8k */ char *ptr = mmap(NULL, 2*4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); /* free second half (upper 4k) and make it invalid. */ munmap(ptr+4096, 4096); /* syscall where first int is unaligned and clobbers into invalid memory region */ /* syscall should return EFAULT */ return syscall(__NR_clock_adjtime, 0, ptr+4095); } To fix this issue we simply need to check if the faulting instruction address is in the exception fixup table when the unaligned handler failed. If it is, call the fixup routine instead of crashing. While looking at the unaligned handler I found another issue as well: The target register should not be modified if the handler was unsuccessful. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- arch/parisc/kernel/unaligned.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index d7c0acb35ec2..8d49614d600d 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -666,7 +666,7 @@ void handle_unaligned(struct pt_regs *regs) break; } - if (modify && R1(regs->iir)) + if (ret == 0 && modify && R1(regs->iir)) regs->gr[R1(regs->iir)] = newbase; @@ -677,6 +677,14 @@ void handle_unaligned(struct pt_regs *regs) if (ret) { + /* + * The unaligned handler failed. + * If we were called by __get_user() or __put_user() jump + * to it's exception fixup handler instead of crashing. + */ + if (!user_mode(regs) && fixup_exception(regs)) + return; + printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret); die_if_kernel("Unaligned data reference", regs, 28); From 88ac3837703b3dbe8fdb8b8b8f41143202867aa6 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 1 Jun 2016 11:55:06 +0200 Subject: [PATCH 096/178] ecryptfs: forbid opening files without mmap handler commit 2f36db71009304b3f0b95afacd8eba1f9f046b87 upstream. This prevents users from triggering a stack overflow through a recursive invocation of pagefault handling that involves mapping procfs files into virtual memory. Signed-off-by: Jann Horn Acked-by: Tyler Hicks Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- fs/ecryptfs/kthread.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index f1ea610362c6..9b661a4ccee7 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "ecryptfs_kernel.h" struct ecryptfs_open_req { @@ -147,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file, flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) - goto out; + goto have_file; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; @@ -165,8 +166,16 @@ int ecryptfs_privileged_open(struct file **lower_file, mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); wait_for_completion(&req.done); - if (IS_ERR(*lower_file)) + if (IS_ERR(*lower_file)) { rc = PTR_ERR(*lower_file); + goto out; + } +have_file: + if ((*lower_file)->f_op->mmap == NULL) { + fput(*lower_file); + *lower_file = NULL; + rc = -EMEDIUMTYPE; + } out: return rc; } From 742d555a529c3dbbf7d326620082f7cbb510ac7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2016 21:26:55 -0400 Subject: [PATCH 097/178] fix d_walk()/non-delayed __d_free() race commit 3d56c25e3bb0726a5c5e16fc2d9e38f8ed763085 upstream. Ascend-to-parent logics in d_walk() depends on all encountered child dentries not getting freed without an RCU delay. Unfortunately, in quite a few cases it is not true, with hard-to-hit oopsable race as the result. Fortunately, the fix is simiple; right now the rule is "if it ever been hashed, freeing must be delayed" and changing it to "if it ever had a parent, freeing must be delayed" closes that hole and covers all cases the old rule used to cover. Moreover, pipes and sockets remain _not_ covered, so we do not introduce RCU delay in the cases which are the reason for having that delay conditional in the first place. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman [wt: add the required change to __d_materialise_dentry() for kernels older than v3.17] Signed-off-by: Willy Tarreau --- fs/dcache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 17222fa5bdc6..2d0b9d2f3c43 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1311,7 +1311,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -2101,7 +2101,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2285,6 +2284,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) /* ... and switch the parents */ if (IS_ROOT(dentry)) { + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; INIT_LIST_HEAD(&target->d_child); @@ -2401,6 +2401,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) switch_names(dentry, anon); swap(dentry->d_name.hash, anon->d_name.hash); + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = dentry; list_del_init(&dentry->d_child); anon->d_parent = dparent; From 7e2e2115ed370d58c3835136f4a7914906964925 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jun 2016 14:56:39 +0200 Subject: [PATCH 098/178] crypto: ux500 - memmove the right size commit 19ced623db2fe91604d69f7d86b03144c5107739 upstream. The hash buffer is really HASH_BLOCK_SIZE bytes, someone must have thought that memmove takes n*u32 words by mistake. Tests work as good/bad as before after this patch. Cc: Joakim Bech Cc: stable@vger.kernel.org Reported-by: David Binderman Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu Signed-off-by: Willy Tarreau --- drivers/crypto/ux500/hash/hash_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 6789c1653913..cde4a6e0fab0 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -806,7 +806,7 @@ int hash_process_data( &device_data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "[%s] " "hash_resume_state()" @@ -858,7 +858,7 @@ int hash_process_data( memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "[%s] " "hash_save_state()" From ef15fd6c8acbadcf4be663b7a504aa6b191d8182 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 15 Jun 2016 22:27:05 +0800 Subject: [PATCH 099/178] crypto: gcm - Filter out async ghash if necessary commit b30bdfa86431afbafe15284a3ad5ac19b49b88e3 upstream. As it is if you ask for a sync gcm you may actually end up with an async one because it does not filter out async implementations of ghash. This patch fixes this by adding the necessary filter when looking for ghash. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- crypto/gcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index cd97cdd8cabe..451e420ce56c 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -716,7 +716,9 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb, ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type, CRYPTO_ALG_TYPE_HASH, - CRYPTO_ALG_TYPE_AHASH_MASK); + CRYPTO_ALG_TYPE_AHASH_MASK | + crypto_requires_sync(algt->type, + algt->mask)); if (IS_ERR(ghash_alg)) return ERR_CAST(ghash_alg); From 801b100b137e407e036c282be0191db9f9887e3f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:57 +0800 Subject: [PATCH 100/178] crypto: scatterwalk - Fix test in scatterwalk_done commit 5f070e81bee35f1b7bd1477bb223a873ff657803 upstream. When there is more data to be processed, the current test in scatterwalk_done may prevent us from calling pagedone even when we should. In particular, if we're on an SG entry spanning multiple pages where the last page is not a full page, we will incorrectly skip calling pagedone on the second last page. This patch fixes this by adding a separate test for whether we've reached the end of a page. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Willy Tarreau --- crypto/scatterwalk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 7281b8a93ad3..79cbbbfffffc 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -68,7 +68,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, void scatterwalk_done(struct scatter_walk *walk, int out, int more) { - if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more) + if (!more || walk->offset >= walk->sg->offset + walk->sg->length || + !(walk->offset & (PAGE_SIZE - 1))) scatterwalk_pagedone(walk, out, more); } EXPORT_SYMBOL_GPL(scatterwalk_done); From d09f7f9a7b70ce6cf3439a5b6c953e057c157dd7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 16 Jun 2016 17:06:19 +0900 Subject: [PATCH 101/178] sit: correct IP protocol used in ipip6_err commit d5d8760b78d0cfafe292f965f599988138b06a70 upstream. Since 32b8a8e59c9c ("sit: add IPv4 over IPv4 support") ipip6_err() may be called for packets whose IP protocol is IPPROTO_IPIP as well as those whose IP protocol is IPPROTO_IPV6. In the case of IPPROTO_IPIP packets the correct protocol value is not passed to ipv4_update_pmtu() or ipv4_redirect(). This patch resolves this problem by using the IP protocol of the packet rather than a hard-coded value. This appears to be consistent with the usage of the protocol of a packet by icmp_socket_deliver() the caller of ipip6_err(). I was able to exercise the redirect case by using a setup where an ICMP redirect was received for the destination of the encapsulated packet. However, it appears that although incorrect the protocol field is not used in this case and thus no problem manifests. On inspection it does not appear that a problem will manifest in the fragmentation needed/update pmtu case either. In short I believe this is a cosmetic fix. None the less, the use of IPPROTO_IPV6 seems wrong and confusing. Reviewed-by: Dinan Gunawardena Signed-off-by: Simon Horman Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 4ddf67c6355b..d9535bb8fe2e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -530,13 +530,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } From 23f2a4ce4fc03e4f37f6fe341bbb1baace1b097e Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Thu, 23 Jun 2016 16:11:57 -0400 Subject: [PATCH 102/178] ipmr/ip6mr: Initialize the last assert time of mfc entries. commit 70a0dec45174c976c64b4c8c1d0898581f759948 upstream. This fixes wrong-interface signaling on 32-bit platforms for entries created when jiffies > 2^31 + MFC_ASSERT_THRESH. Signed-off-by: Tom Goff Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b31553d385bb..89570f070e0e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -881,8 +881,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8d69df16f6a8..107f75283b1b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1077,6 +1077,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } From 5bccf7715a1d5c2a9f6d1b9301d6619429a34319 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 24 Jun 2016 15:26:05 +0800 Subject: [PATCH 103/178] net: alx: Work around the DMA RX overflow issue commit 881d0327db37ad917a367c77aff1afa1ee41e0a9 upstream. Note: This is a verified backported patch for stable 4.4 kernel, and it could also be applied to 4.3/4.2/4.1/3.18/3.16 There is a problem with alx devices, that the network link will be lost in 1-5 minutes after the device is up. >From debugging without datasheet, we found the error always happen when the DMA RX address is set to 0x....fc0, which is very likely to be a HW/silicon problem. This patch will apply rx skb with 64 bytes longer space, and if the allocated skb has a 0x...fc0 address, it will use skb_resever(skb, 64) to advance the address, so that the RX overflow can be avoided. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70761 Signed-off-by: Feng Tang Suggested-by: Eric Dumazet Tested-by: Ole Lukoie Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/net/ethernet/atheros/alx/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index a85a9c2f1385..7357e54f1de9 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -86,9 +86,14 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) while (!cur_buf->skb && next != rxq->read_idx) { struct alx_rfd *rfd = &rxq->rfd[cur]; - skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); + skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp); if (!skb) break; + + /* Workround for the HW RX DMA overflow issue */ + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + dma = dma_map_single(&alx->hw.pdev->dev, skb->data, alx->rxbuf_size, DMA_FROM_DEVICE); From 11d714992e62a6ae6c415757239cde6aad47f358 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 15 May 2016 13:19:16 -0400 Subject: [PATCH 104/178] mac80211: mesh: flush mesh paths unconditionally commit fe7a7c57629e8dcbc0e297363a9b2366d67a6dc5 upstream. Currently, the mesh paths associated with a nexthop station are cleaned up in the following code path: __sta_info_destroy_part1 synchronize_net() __sta_info_destroy_part2 -> cleanup_single_sta -> mesh_sta_cleanup -> mesh_plink_deactivate -> mesh_path_flush_by_nexthop However, there are a couple of problems here: 1) the paths aren't flushed at all if the MPM is running in userspace (e.g. when using wpa_supplicant or authsae) 2) there is no synchronize_rcu between removing the path and readers accessing the nexthop, which means the following race is possible: CPU0 CPU1 ~~~~ ~~~~ sta_info_destroy_part1() synchronize_net() rcu_read_lock() mesh_nexthop_resolve() mpath = mesh_path_lookup() [...] -> mesh_path_flush_by_nexthop() sta = rcu_dereference( mpath->next_hop) kfree(sta) access sta <-- CRASH Fix both of these by unconditionally flushing paths before destroying the sta, and by adding a synchronize_net() after path flush to ensure no active readers can still dereference the sta. Fixes this crash: [ 348.529295] BUG: unable to handle kernel paging request at 00020040 [ 348.530014] IP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] *pde = 00000000 [ 348.530014] Oops: 0000 [#1] PREEMPT [ 348.530014] Modules linked in: drbg ansi_cprng ctr ccm ppp_generic slhc ipt_MASQUERADE nf_nat_masquerade_ipv4 8021q ] [ 348.530014] CPU: 0 PID: 20597 Comm: wget Tainted: G O 4.6.0-rc5-wt=V1 #1 [ 348.530014] Hardware name: To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080016 11/07/2014 [ 348.530014] task: f64fa280 ti: f4f9c000 task.ti: f4f9c000 [ 348.530014] EIP: 0060:[] EFLAGS: 00010246 CPU: 0 [ 348.530014] EIP is at ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] EAX: f4ce63e0 EBX: 00000088 ECX: f3788416 EDX: 00020008 [ 348.530014] ESI: 00000000 EDI: 00000088 EBP: f6409a4c ESP: f6409a40 [ 348.530014] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 348.530014] CR0: 80050033 CR2: 00020040 CR3: 33190000 CR4: 00000690 [ 348.530014] Stack: [ 348.530014] 00000000 f4ce63e0 f5f9bd80 f6409a64 f9291d80 0000ce67 f5d51e00 f4ce63e0 [ 348.530014] f3788416 f6409a80 f9291dc1 f4ce8320 f4ce63e0 f5d51e00 f4ce63e0 f4ce8320 [ 348.530014] f6409a98 f9277f6f 00000000 00000000 0000007c 00000000 f6409b2c f9278dd1 [ 348.530014] Call Trace: [ 348.530014] [] mesh_nexthop_lookup+0xbb/0xc8 [mac80211] [ 348.530014] [] mesh_nexthop_resolve+0x34/0xd8 [mac80211] [ 348.530014] [] ieee80211_xmit+0x92/0xc1 [mac80211] [ 348.530014] [] __ieee80211_subif_start_xmit+0x807/0x83c [mac80211] [ 348.530014] [] ? sch_direct_xmit+0xd7/0x1b3 [ 348.530014] [] ? __local_bh_enable_ip+0x5d/0x7b [ 348.530014] [] ? nf_nat_ipv4_out+0x4c/0xd0 [nf_nat_ipv4] [ 348.530014] [] ? iptable_nat_ipv4_fn+0xf/0xf [iptable_nat] [ 348.530014] [] ? netif_skb_features+0x14d/0x30a [ 348.530014] [] ieee80211_subif_start_xmit+0xa/0xe [mac80211] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] batadv_send_skb_packet+0xd6/0xec [batman_adv] [ 348.530014] [] batadv_send_unicast_skb+0x15/0x4a [batman_adv] [ 348.530014] [] batadv_dat_send_data+0x27e/0x310 [batman_adv] [ 348.530014] [] ? batadv_tt_global_hash_find.isra.11+0x8/0xa [batman_adv] [ 348.530014] [] batadv_dat_snoop_outgoing_arp_request+0x208/0x23d [batman_adv] [ 348.530014] [] batadv_interface_tx+0x206/0x385 [batman_adv] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] ? igb_xmit_frame+0x57/0x72 [igb] [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] br_dev_queue_push_xmit+0xeb/0xfb [bridge] [ 348.530014] [] br_forward_finish+0x29/0x74 [bridge] [ 348.530014] [] ? deliver_clone+0x3b/0x3b [bridge] [ 348.530014] [] __br_forward+0x89/0xe7 [bridge] [ 348.530014] [] ? br_dev_queue_push_xmit+0xfb/0xfb [bridge] [ 348.530014] [] deliver_clone+0x34/0x3b [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_flood+0x77/0x95 [bridge] [ 348.530014] [] br_flood_forward+0x13/0x1a [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_handle_frame_finish+0x392/0x3db [bridge] [ 348.530014] [] ? nf_iterate+0x2b/0x6b [ 348.530014] [] br_handle_frame+0x1e6/0x240 [bridge] [ 348.530014] [] ? br_handle_local_finish+0x6a/0x6a [bridge] [ 348.530014] [] __netif_receive_skb_core+0x43a/0x66b [ 348.530014] [] ? br_handle_frame_finish+0x3db/0x3db [bridge] [ 348.530014] [] ? resched_curr+0x19/0x37 [ 348.530014] [] ? check_preempt_wakeup+0xbf/0xfe [ 348.530014] [] ? ktime_get_with_offset+0x5c/0xfc [ 348.530014] [] __netif_receive_skb+0x47/0x55 [ 348.530014] [] netif_receive_skb_internal+0x40/0x5a [ 348.530014] [] napi_gro_receive+0x3a/0x94 [ 348.530014] [] igb_poll+0x6fd/0x9ad [igb] [ 348.530014] [] ? swake_up_locked+0x14/0x26 [ 348.530014] [] net_rx_action+0xde/0x250 [ 348.530014] [] __do_softirq+0x8a/0x163 [ 348.530014] [] ? __hrtimer_tasklet_trampoline+0x19/0x19 [ 348.530014] [] do_softirq_own_stack+0x26/0x2c [ 348.530014] [ 348.530014] [] irq_exit+0x31/0x6f [ 348.530014] [] do_IRQ+0x8d/0xa0 [ 348.530014] [] common_interrupt+0x2c/0x40 [ 348.530014] Code: e7 8c 00 66 81 ff 88 00 75 12 85 d2 75 0e b2 c3 b8 83 e9 29 f9 e8 a7 5f f9 c6 eb 74 66 81 e3 8c 005 [ 348.530014] EIP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] SS:ESP 0068:f6409a40 [ 348.530014] CR2: 0000000000020040 [ 348.530014] ---[ end trace 48556ac26779732e ]--- [ 348.530014] Kernel panic - not syncing: Fatal exception in interrupt [ 348.530014] Kernel Offset: disabled Reported-by: Fred Veldini Tested-by: Fred Veldini Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- net/mac80211/mesh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6952760881c8..f8765cc84e47 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } From 62cfca041862f31200f61ef28077ff07cd9c48b5 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Fri, 13 May 2016 12:41:48 +0200 Subject: [PATCH 105/178] mac80211_hwsim: Add missing check for HWSIM_ATTR_SIGNAL commit 62397da50bb20a6b812c949ef465d7e69fe54bb6 upstream. A wmediumd that does not send this attribute causes a NULL pointer dereference, as the attribute is accessed even if it does not exist. The attribute was required but never checked ever since userspace frame forwarding has been introduced. The issue gets more problematic once we allow wmediumd registration from user namespaces. Fixes: 7882513bacb1 ("mac80211_hwsim driver support userspace frame tx/rx") Signed-off-by: Martin Willi Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index cb34c7895f2a..735c26620387 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1931,6 +1931,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || !info->attrs[HWSIM_ATTR_FLAGS] || !info->attrs[HWSIM_ATTR_COOKIE] || + !info->attrs[HWSIM_ATTR_SIGNAL] || !info->attrs[HWSIM_ATTR_TX_INFO]) goto out; From f40b2f17ede436c060c596799a7660cbb7462ed8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jun 2016 17:28:29 -0600 Subject: [PATCH 106/178] IB/mlx4: Properly initialize GRH TClass and FlowLabel in AHs commit 8c5122e45a10a9262f872b53f151a592e870f905 upstream. When this code was reworked for IBoE support the order of assignments for the sl_tclass_flowlabel got flipped around resulting in TClass & FlowLabel being permanently set to 0 in the packet headers. This breaks IB routers that rely on these headers, but only affects kernel users - libmlx4 does this properly for user space. Cc: stable@vger.kernel.org Fixes: fa417f7b520e ("IB/mlx4: Add support for IBoE") Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/hw/mlx4/ah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 890c23b3d714..f55d69500a5f 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -65,6 +65,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.ib.g_slid = ah_attr->src_path_bits; + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); if (ah_attr->ah_flags & IB_AH_GRH) { ah->av.ib.g_slid |= 0x80; ah->av.ib.gid_index = ah_attr->grh.sgid_index; @@ -82,7 +83,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) --ah->av.ib.stat_rate; } - ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } From 55ac348c6481882218cc2ba6713bc8fb972dcf3d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 10 Apr 2016 19:13:13 -0600 Subject: [PATCH 107/178] IB/security: Restrict use of the write() interface commit e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 upstream. The drivers/infiniband stack uses write() as a replacement for bi-directional ioctl(). This is not safe. There are ways to trigger write calls that result in the return structure that is normally written to user space being shunted off to user specified kernel memory instead. For the immediate repair, detect and deny suspicious accesses to the write API. For long term, update the user space libraries and the kernel API to something that doesn't present the same security vulnerabilities (likely a structured ioctl() interface). The impacted uAPI interfaces are generally only available if hardware from drivers/infiniband is installed in the system. Reported-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Jason Gunthorpe [ Expanded check to all known write() entry points ] Cc: stable@vger.kernel.org Signed-off-by: Doug Ledford [wt: no hfi1 subdir in 3.10. A minimal rdma/ib.h had to be created from 3.11 sources to keep the code similar to mainline] Signed-off-by: Willy Tarreau --- drivers/infiniband/core/ucm.c | 4 ++ drivers/infiniband/core/ucma.c | 4 ++ drivers/infiniband/core/uverbs_main.c | 5 +++ drivers/infiniband/hw/qib/qib_file_ops.c | 5 +++ include/rdma/ib.h | 54 ++++++++++++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 include/rdma/ib.h diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f2f63933e8a9..5befec118a18 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -48,6 +48,7 @@ #include +#include #include #include #include @@ -1104,6 +1105,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, struct ib_ucm_cmd_hdr hdr; ssize_t result; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 5ca44cd9b00c..99f1c170770f 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -1249,6 +1250,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, struct rdma_ucm_cmd_hdr hdr; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b6062b9236a2..f50623d07a75 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -48,6 +48,8 @@ #include +#include + #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); @@ -588,6 +590,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_cmd_hdr hdr; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (count < sizeof hdr) return -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index b56c9428f3c5..8cb29b36c82a 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -45,6 +45,8 @@ #include #include +#include + #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" @@ -1977,6 +1979,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ssize_t ret = 0; void *dest; + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd.type)) { ret = -EINVAL; goto bail; diff --git a/include/rdma/ib.h b/include/rdma/ib.h new file mode 100644 index 000000000000..f09331ad0aba --- /dev/null +++ b/include/rdma/ib.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2010 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_RDMA_IB_H) +#define _RDMA_IB_H + +#include +#include + +/* + * The IB interfaces that use write() as bi-directional ioctl() are + * fundamentally unsafe, since there are lots of ways to trigger "write()" + * calls from various contexts with elevated privileges. That includes the + * traditional suid executable error message writes, but also various kernel + * interfaces that can write to file descriptors. + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static inline bool ib_safe_file_access(struct file *filp) +{ + return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); +} + +#endif /* _RDMA_IB_H */ From 0c3c7f4ca967fffa78c53a3ba154aed20fb60048 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:19 +0300 Subject: [PATCH 108/178] IB/IPoIB: Don't update neigh validity for unresolved entries commit 61c78eea9516a921799c17b4c20558e2aa780fd3 upstream. ipoib_neigh_get unconditionally updates the "alive" variable member on any packet send. This prevents the neighbor garbage collection from cleaning out a dead neighbor entry if we are still queueing packets for it. If the queue for this neighbor is full, then don't update the alive timestamp. That way the neighbor can time out even if packets are still being queued as long as none of them are being sent. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b6e049a3c7a8..a481094af85f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -887,7 +887,9 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) neigh = NULL; goto out_unlock; } - neigh->alive = jiffies; + + if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)) + neigh->alive = jiffies; goto out_unlock; } } From 9a7edde8c09d96b1f79b128a8a7c182afc674cc1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:28 +0300 Subject: [PATCH 109/178] IB/mlx4: Fix the SQ size of an RC QP commit f2940e2c76bb554a7fbdd28ca5b90904117a9e96 upstream. When calculating the required size of an RC QP send queue, leave enough space for masked atomic operations, which require more space than "regular" atomic operation. Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Eran Ben Elisha Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Willy Tarreau --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 262a18437ceb..1fe3bdb0da14 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -346,7 +346,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: From 970e17c2f8384cf7330deecfe85d555a0c3bff7a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 5 Apr 2016 17:01:33 -0700 Subject: [PATCH 110/178] x86, build: copy ldlinux.c32 to image.iso commit 9c77679cadb118c0aa99e6f88533d91765a131ba upstream. For newer versions of Syslinux, we need ldlinux.c32 in addition to isolinux.bin to reside on the boot disk, so if the latter is found, copy it, too, to the isoimage tree. Signed-off-by: H. Peter Anvin Cc: Linux Stable Tree Signed-off-by: Willy Tarreau --- arch/x86/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 6cf0111783d3..368f3582c93e 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -168,6 +168,9 @@ isoimage: $(obj)/bzImage for i in lib lib64 share end ; do \ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \ + cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \ + fi ; \ break ; \ fi ; \ if [ $$i = end ] ; then exit 1 ; fi ; \ From b92e992e592b68fa7a7b3e3ba19c473d3d396292 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jun 2016 23:06:53 +0900 Subject: [PATCH 111/178] kprobes/x86: Clear TF bit in fault on single-stepping commit dcfc47248d3f7d28df6f531e6426b933de94370d upstream. Fix kprobe_fault_handler() to clear the TF (trap flag) bit of the flags register in the case of a fault fixup on single-stepping. If we put a kprobe on the instruction which caused a page fault (e.g. actual mov instructions in copy_user_*), that fault happens on the single-stepping buffer. In this case, kprobes resets running instance so that the CPU can retry execution on the original ip address. However, current code forgets to reset the TF bit. Since this fault happens with TF bit set for enabling single-stepping, when it retries, it causes a debug exception and kprobes can not handle it because it already reset itself. On the most of x86-64 platform, it can be easily reproduced by using kprobe tracer. E.g. # cd /sys/kernel/debug/tracing # echo p copy_user_enhanced_fast_string+5 > kprobe_events # echo 1 > events/kprobes/enable And you'll see a kernel panic on do_debug(), since the debug trap is not handled by kprobes. To fix this problem, we just need to clear the TF bit when resetting running kprobe. Signed-off-by: Masami Hiramatsu Reviewed-by: Ananth N Mavinakayanahalli Acked-by: Steven Rostedt Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: systemtap@sourceware.org Cc: stable@vger.kernel.org # All the way back to ancient kernels Link: http://lkml.kernel.org/r/20160611140648.25885.37482.stgit@devbox [ Updated the comments. ] Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- arch/x86/kernel/kprobes/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0c6c07cea3f7..766aa3bf1798 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -908,7 +908,19 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->ip = (unsigned long)cur->addr; + /* + * Trap flag (TF) has been set here because this fault + * happened where the single stepping will be done. + * So clear it by resetting the current kprobe: + */ + regs->flags &= ~X86_EFLAGS_TF; + + /* + * If the TF flag was set before the kprobe hit, + * don't touch it: + */ regs->flags |= kcb->kprobe_old_flags; + if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else From 583e3ee15bf87946667d30f238bcd78585b208da Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 16 Jun 2016 19:13:49 +0200 Subject: [PATCH 112/178] x86/amd_nb: Fix boot crash on non-AMD systems commit 1ead852dd88779eda12cb09cc894a03d9abfe1ec upstream. Fix boot crash that triggers if this driver is built into a kernel and run on non-AMD systems. AMD northbridges users call amd_cache_northbridges() and it returns a negative value to signal that we weren't able to cache/detect any northbridges on the system. At least, it should do so as all its callers expect it to do so. But it does return a negative value only when kmalloc() fails. Fix it to return -ENODEV if there are no NBs cached as otherwise, amd_nb users like amd64_edac, for example, which relies on it to know whether it should load or not, gets loaded on systems like Intel Xeons where it shouldn't. Reported-and-tested-by: Tony Battersby Signed-off-by: Borislav Petkov Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1466097230-5333-2-git-send-email-bp@alien8.de Link: https://lkml.kernel.org/r/5761BEB0.9000807@cybernetics.com Signed-off-by: Ingo Molnar Signed-off-by: Willy Tarreau --- arch/x86/kernel/amd_nb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 59554dca96ec..e6a3b1e35fae 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -67,8 +67,8 @@ int amd_cache_northbridges(void) while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) i++; - if (i == 0) - return 0; + if (!i) + return -ENODEV; nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) From 1e766c1750165734c3030f9639a52230d4ea94e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 25 Jun 2016 19:19:28 -0400 Subject: [PATCH 113/178] NFS: Fix another OPEN_DOWNGRADE bug commit e547f2628327fec6afd2e03b46f113f614cca05b upstream. Olga Kornievskaia reports that the following test fails to trigger an OPEN_DOWNGRADE on the wire, and only triggers the final CLOSE. fd0 = open(foo, RDRW) -- should be open on the wire for "both" fd1 = open(foo, RDONLY) -- should be open on the wire for "read" close(fd0) -- should trigger an open_downgrade read(fd1) close(fd1) The issue is that we're missing a check for whether or not the current state transitioned from an O_RDWR state as opposed to having transitioned from a combination of O_RDONLY and O_WRONLY. Reported-by: Olga Kornievskaia Fixes: cd9288ffaea4 ("NFSv4: Fix another bug in the close/open_downgrade code") Cc: stable@vger.kernel.org # 2.6.33+ Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Willy Tarreau --- fs/nfs/nfs4proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d8ac734a1e44..c2b89a1a403b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2332,12 +2332,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) call_close |= is_wronly; else if (is_wronly) calldata->arg.fmode |= FMODE_WRITE; + if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) + call_close |= is_rdwr; } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (calldata->arg.fmode == 0) - call_close |= is_rdwr; - if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); From 31f476515df016bc1c9c119a7c7ebc1d274f04ad Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 16 Jun 2016 23:26:14 +0200 Subject: [PATCH 114/178] mm: Export migrate_page_move_mapping and migrate_page_copy commit 1118dce773d84f39ebd51a9fe7261f9169cb056e upstream. Export these symbols such that UBIFS can implement ->migratepage. Signed-off-by: Richard Weinberger Acked-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman [wt: also add the prototype to include/linux/migrate.h] Signed-off-by: Willy Tarreau --- include/linux/migrate.h | 3 +++ mm/migrate.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a405d3dc0f61..e98692748066 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -55,6 +55,9 @@ extern int migrate_vmas(struct mm_struct *mm, extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); +extern int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, + struct buffer_head *head, enum migrate_mode mode); #else static inline void putback_lru_pages(struct list_head *l) {} diff --git a/mm/migrate.c b/mm/migrate.c index a61500f2671f..2ee28c2be6a5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -412,6 +412,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } +EXPORT_SYMBOL(migrate_page_move_mapping); /* * The expected number of remaining references is the same as that @@ -503,6 +504,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) if (PageWriteback(newpage)) end_page_writeback(newpage); } +EXPORT_SYMBOL(migrate_page_copy); /************************************************************ * Migration functions From eed1a4028c96cabb79747ee01e17b1057b01027c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 16 Jun 2016 23:26:15 +0200 Subject: [PATCH 115/178] UBIFS: Implement ->migratepage() commit 4ac1c17b2044a1b4b2fbed74451947e905fc2992 upstream. During page migrations UBIFS might get confused and the following assert triggers: [ 213.480000] UBIFS assert failed in ubifs_set_page_dirty at 1451 (pid 436) [ 213.490000] CPU: 0 PID: 436 Comm: drm-stress-test Not tainted 4.4.4-00176-geaa802524636-dirty #1008 [ 213.490000] Hardware name: Allwinner sun4i/sun5i Families [ 213.490000] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 213.490000] [] (show_stack) from [] (dump_stack+0x8c/0xa0) [ 213.490000] [] (dump_stack) from [] (ubifs_set_page_dirty+0x44/0x50) [ 213.490000] [] (ubifs_set_page_dirty) from [] (try_to_unmap_one+0x10c/0x3a8) [ 213.490000] [] (try_to_unmap_one) from [] (rmap_walk+0xb4/0x290) [ 213.490000] [] (rmap_walk) from [] (try_to_unmap+0x64/0x80) [ 213.490000] [] (try_to_unmap) from [] (migrate_pages+0x328/0x7a0) [ 213.490000] [] (migrate_pages) from [] (alloc_contig_range+0x168/0x2f4) [ 213.490000] [] (alloc_contig_range) from [] (cma_alloc+0x170/0x2c0) [ 213.490000] [] (cma_alloc) from [] (__alloc_from_contiguous+0x38/0xd8) [ 213.490000] [] (__alloc_from_contiguous) from [] (__dma_alloc+0x23c/0x274) [ 213.490000] [] (__dma_alloc) from [] (arm_dma_alloc+0x54/0x5c) [ 213.490000] [] (arm_dma_alloc) from [] (drm_gem_cma_create+0xb8/0xf0) [ 213.490000] [] (drm_gem_cma_create) from [] (drm_gem_cma_create_with_handle+0x1c/0xe8) [ 213.490000] [] (drm_gem_cma_create_with_handle) from [] (drm_gem_cma_dumb_create+0x3c/0x48) [ 213.490000] [] (drm_gem_cma_dumb_create) from [] (drm_ioctl+0x12c/0x444) [ 213.490000] [] (drm_ioctl) from [] (do_vfs_ioctl+0x3f4/0x614) [ 213.490000] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 213.490000] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) UBIFS is using PagePrivate() which can have different meanings across filesystems. Therefore the generic page migration code cannot handle this case correctly. We have to implement our own migration function which basically does a plain copy but also duplicates the page private flag. UBIFS is not a block device filesystem and cannot use buffer_migrate_page(). Signed-off-by: Kirill A. Shutemov [rw: Massaged changelog, build fixes, etc...] Signed-off-by: Richard Weinberger Acked-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- fs/ubifs/file.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 881324c08430..a335e4e6aba1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -54,6 +54,7 @@ #include #include #include +#include static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -1422,6 +1423,26 @@ static int ubifs_set_page_dirty(struct page *page) return ret; } +#ifdef CONFIG_MIGRATION +static int ubifs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + int rc; + + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + SetPagePrivate(newpage); + } + + migrate_page_copy(newpage, page); + return MIGRATEPAGE_SUCCESS; +} +#endif + static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) { /* @@ -1558,6 +1579,9 @@ const struct address_space_operations ubifs_file_address_operations = { .write_end = ubifs_write_end, .invalidatepage = ubifs_invalidatepage, .set_page_dirty = ubifs_set_page_dirty, +#ifdef CONFIG_MIGRATION + .migratepage = ubifs_migrate_page, +#endif .releasepage = ubifs_releasepage, }; From 21e5755c75ed7354866b35c1cc100ea323e15602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sun, 3 Jul 2016 22:24:50 +0200 Subject: [PATCH 116/178] cdc_ncm: workaround for EM7455 "silent" data interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c086e7096170390594c425114d98172bc9aceb8a upstream. Several Lenovo users have reported problems with their Sierra Wireless EM7455 modem. The driver has loaded successfully and the MBIM management channel has appeared to work, including establishing a connection to the mobile network. But no frames have been received over the data interface. The problem affects all EM7455 and MC7455, and is assumed to affect other modems based on the same Qualcomm chipset and baseband firmware. Testing narrowed the problem down to what seems to be a firmware timing bug during initialization. Adding a short sleep while probing is sufficient to make the problem disappear. Experiments have shown that 1-2 ms is too little to have any effect, while 10-20 ms is enough to reliably succeed. Reported-by: Stefan Armbruster Reported-by: Ralph Plawetzki Reported-by: Andreas Fett Reported-by: Rasmus Lerdorf Reported-by: Samo Ratnik Reported-and-tested-by: Aleksander Morgado Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/net/usb/cdc_ncm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 6ee9665e20b2..a5802419381f 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -477,6 +477,13 @@ advance: if (cdc_ncm_setup(ctx)) goto error2; + /* Some firmwares need a pause here or they will silently fail + * to set up the interface properly. This value was decided + * empirically on a Sierra Wireless MC7455 running 02.08.02.00 + * firmware. + */ + usleep_range(10000, 20000); + /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) From 01fcef2cad3cbc8e5ce6b3d0a6b05bce95c73475 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 15 Jun 2016 18:00:33 +0800 Subject: [PATCH 117/178] kvm: Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES commit caf1ff26e1aa178133df68ac3d40815fed2187d9 upstream. These days, we experienced one guest crash with 8 cores and 3 disks, with qemu error logs as bellow: qemu-system-x86_64: /build/qemu-2.0.0/kvm-all.c:984: kvm_irqchip_commit_routes: Assertion `ret == 0' failed. And then we found one patch(bdf026317d) in qemu tree, which said could fix this bug. Execute the following script will reproduce the BUG quickly: irq_affinity.sh ======================================================================== vda_irq_num=25 vdb_irq_num=27 while [ 1 ] do for irq in {1,2,4,8,10,20,40,80} do echo $irq > /proc/irq/$vda_irq_num/smp_affinity echo $irq > /proc/irq/$vdb_irq_num/smp_affinity dd if=/dev/vda of=/dev/zero bs=4K count=100 iflag=direct dd if=/dev/vdb of=/dev/zero bs=4K count=100 iflag=direct done done ======================================================================== The following qemu log is added in the qemu code and is displayed when this bug reproduced: kvm_irqchip_commit_routes: max gsi: 1008, nr_allocated_irq_routes: 1024, irq_routes->nr: 1024, gsi_count: 1024. That's to say when irq_routes->nr == 1024, there are 1024 routing entries, but in the kernel code when routes->nr >= 1024, will just return -EINVAL; The nr is the number of the routing entries which is in of [1 ~ KVM_MAX_IRQ_ROUTES], not the index in [0 ~ KVM_MAX_IRQ_ROUTES - 1]. This patch fix the BUG above. Cc: stable@vger.kernel.org Signed-off-by: Xiubo Li Signed-off-by: Wei Tang Signed-off-by: Zhang Zhuoyu Signed-off-by: Paolo Bonzini Signed-off-by: Willy Tarreau --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4f865e122c21..f71c4ad425c6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2447,7 +2447,7 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&routing, argp, sizeof(routing))) goto out; r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) + if (routing.nr > KVM_MAX_IRQ_ROUTES) goto out; if (routing.flags) goto out; From a836c42ecad68bb4ee74010201fbcef7a77f0f68 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 17 Jun 2016 16:10:42 -0400 Subject: [PATCH 118/178] tracing: Handle NULL formats in hold_module_trace_bprintk_format() commit 70c8217acd4383e069fe1898bbad36ea4fcdbdcc upstream. If a task uses a non constant string for the format parameter in trace_printk(), then the trace_printk_fmt variable is set to NULL. This variable is then saved in the __trace_printk_fmt section. The function hold_module_trace_bprintk_format() checks to see if duplicate formats are used by modules, and reuses them if so (saves them to the list if it is new). But this function calls lookup_format() that does a strcmp() to the value (which is now NULL) and can cause a kernel oops. This wasn't an issue till 3debb0a9ddb ("tracing: Fix trace_printk() to print when not using bprintk()") which added "__used" to the trace_printk_fmt variable, and before that, the kernel simply optimized it out (no NULL value was saved). The fix is simply to handle the NULL pointer in lookup_format() and have the caller ignore the value if it was NULL. Link: http://lkml.kernel.org/r/1464769870-18344-1-git-send-email-zhengjun.xing@intel.com Reported-by: xingzhen Acked-by: Namhyung Kim Fixes: 3debb0a9ddb ("tracing: Fix trace_printk() to print when not using bprintk()") Cc: stable@vger.kernel.org # v3.5+ Signed-off-by: Steven Rostedt Signed-off-by: Willy Tarreau --- kernel/trace/trace_printk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index fdb23e84b011..7be4d67cecbd 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -38,6 +38,10 @@ struct trace_bprintk_fmt { static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) { struct trace_bprintk_fmt *pos; + + if (!fmt) + return ERR_PTR(-EINVAL); + list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { if (!strcmp(pos->fmt, fmt)) return pos; @@ -59,7 +63,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) for (iter = start; iter < end; iter++) { struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); if (tb_fmt) { - *iter = tb_fmt->fmt; + if (!IS_ERR(tb_fmt)) + *iter = tb_fmt->fmt; continue; } From 19a98b0bd8783be5bfabddb8dec11b1fe42a28ef Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 10 Jun 2016 10:54:32 +0200 Subject: [PATCH 119/178] base: make module_create_drivers_dir race-free commit 7e1b1fc4dabd6ec8e28baa0708866e13fa93c9b3 upstream. Modules which register drivers via standard path (driver_register) in parallel can cause a warning: WARNING: CPU: 2 PID: 3492 at ../fs/sysfs/dir.c:31 sysfs_warn_dup+0x62/0x80 sysfs: cannot create duplicate filename '/module/saa7146/drivers' Modules linked in: hexium_gemini(+) mxb(+) ... ... Call Trace: ... [] sysfs_warn_dup+0x62/0x80 [] sysfs_create_dir_ns+0x77/0x90 [] kobject_add_internal+0xb4/0x340 [] kobject_add+0x68/0xb0 [] kobject_create_and_add+0x31/0x70 [] module_add_driver+0xc3/0xd0 [] bus_add_driver+0x154/0x280 [] driver_register+0x60/0xe0 [] __pci_register_driver+0x60/0x70 [] saa7146_register_extension+0x64/0x90 [saa7146] [] hexium_init_module+0x11/0x1000 [hexium_gemini] ... As can be (mostly) seen, driver_register causes this call sequence: -> bus_add_driver -> module_add_driver -> module_create_drivers_dir The last one creates "drivers" directory in /sys/module/<...>. When this is done in parallel, the directory is attempted to be created twice at the same time. This can be easily reproduced by loading mxb and hexium_gemini in parallel: while :; do modprobe mxb & modprobe hexium_gemini wait rmmod mxb hexium_gemini saa7146_vv saa7146 done saa7146 calls pci_register_driver for both mxb and hexium_gemini, which means /sys/module/saa7146/drivers is to be created for both of them. Fix this by a new mutex in module_create_drivers_dir which makes the test-and-create "drivers" dir atomic. I inverted the condition and removed 'return' to avoid multiple unlocks or a goto. Signed-off-by: Jiri Slaby Fixes: fe480a2675ed (Modules: only add drivers/ direcory if needed) Cc: v2.6.21+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/base/module.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index db930d3ee312..2a215780eda2 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -24,10 +24,12 @@ static char *make_driver_name(struct device_driver *drv) static void module_create_drivers_dir(struct module_kobject *mk) { - if (!mk || mk->drivers_dir) - return; + static DEFINE_MUTEX(drivers_dir_mutex); - mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj); + mutex_lock(&drivers_dir_mutex); + if (mk && !mk->drivers_dir) + mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj); + mutex_unlock(&drivers_dir_mutex); } void module_add_driver(struct module *mod, struct device_driver *drv) From c24113843be9eb9ac27e8b2d9bbce6d2c8d01e6c Mon Sep 17 00:00:00 2001 From: Crestez Dan Leonard Date: Tue, 3 May 2016 15:27:09 +0300 Subject: [PATCH 120/178] iio: Fix error handling in iio_trigger_attach_poll_func commit 99543823357966ac938d9a310947e731b67338e6 upstream. When attaching a pollfunc iio_trigger_attach_poll_func will allocate a virtual irq and call the driver's set_trigger_state function. Fix error handling to undo previous steps if any fails. In particular this fixes handling errors from a driver's set_trigger_state function. When using triggered buffers a failure to enable the trigger used to make the buffer unusable. Signed-off-by: Crestez Dan Leonard Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/iio/industrialio-trigger.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index 4d6c7d84e155..301becccf5ed 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -203,22 +203,35 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig, /* Prevent the module from being removed whilst attached to a trigger */ __module_get(pf->indio_dev->info->driver_module); + + /* Get irq number */ pf->irq = iio_trigger_get_irq(trig); + if (pf->irq < 0) + goto out_put_module; + + /* Request irq */ ret = request_threaded_irq(pf->irq, pf->h, pf->thread, pf->type, pf->name, pf); - if (ret < 0) { - module_put(pf->indio_dev->info->driver_module); - return ret; - } + if (ret < 0) + goto out_put_irq; + /* Enable trigger in driver */ if (trig->ops && trig->ops->set_trigger_state && notinuse) { ret = trig->ops->set_trigger_state(trig, true); if (ret < 0) - module_put(pf->indio_dev->info->driver_module); + goto out_free_irq; } return ret; + +out_free_irq: + free_irq(pf->irq, pf); +out_put_irq: + iio_trigger_put_irq(trig, pf->irq); +out_put_module: + module_put(pf->indio_dev->info->driver_module); + return ret; } static int iio_trigger_detach_poll_func(struct iio_trigger *trig, From 56d7f3422d209a1b56650d4d61305bec21c6dcd6 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 22 Jun 2016 20:43:30 +0100 Subject: [PATCH 121/178] staging: iio: accel: fix error check commit ef3149eb3ddb7f9125e11c90f8330e371b55cffd upstream. sca3000_read_ctrl_reg() returns a negative number on failure, check for this instead of zero. Signed-off-by: Luis de Bethencourt Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/staging/iio/accel/sca3000_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c index 32950ad94857..b30c41b3e0cc 100644 --- a/drivers/staging/iio/accel/sca3000_core.c +++ b/drivers/staging/iio/accel/sca3000_core.c @@ -588,7 +588,7 @@ static ssize_t sca3000_read_frequency(struct device *dev, goto error_ret_mut; ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL); mutex_unlock(&st->lock); - if (ret) + if (ret < 0) goto error_ret; val = ret; if (base_freq > 0) From bb29eeb462f11a922405aedb0a6ca9f5a1f86d44 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 17 Jun 2016 15:22:24 +0200 Subject: [PATCH 122/178] iio: accel: kxsd9: fix the usage of spi_w8r8() commit 0c1f91b98552da49d9d8eed32b3132a58d2f4598 upstream. These two spi_w8r8() calls return a value with is used by the code following the error check. The dubious use was caused by a cleanup patch. Fixes: d34dbee8ac8e ("staging:iio:accel:kxsd9 cleanup and conversion to iio_chan_spec.") Signed-off-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/iio/accel/kxsd9.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 7c9a1d97dc68..a22c427454db 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro) mutex_lock(&st->buf_lock); ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); - if (ret) + if (ret < 0) goto error_ret; st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C); st->tx[1] = (ret & ~KXSD9_FS_MASK) | i; @@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev, break; case IIO_CHAN_INFO_SCALE: ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); - if (ret) + if (ret < 0) goto error_ret; *val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK]; ret = IIO_VAL_INT_PLUS_MICRO; From 0ff80acdf9a8179dcaf361c1ff5375b50c3dd6d6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Jun 2016 13:53:32 +0100 Subject: [PATCH 123/178] iio:ad7266: Fix broken regulator error handling commit 6b7f4e25f3309f106a5c7ff42c8231494cf285d3 upstream. All regulator_get() variants return either a pointer to a regulator or an ERR_PTR() so testing for NULL makes no sense and may lead to bugs if we use NULL as a valid regulator. Fix this by using IS_ERR() as expected. Signed-off-by: Mark Brown Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/iio/adc/ad7266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index c2744a75c3b0..d08f4175938c 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -406,7 +406,7 @@ static int ad7266_probe(struct spi_device *spi) st = iio_priv(indio_dev); st->reg = regulator_get(&spi->dev, "vref"); - if (!IS_ERR_OR_NULL(st->reg)) { + if (!IS_ERR(st->reg)) { ret = regulator_enable(st->reg); if (ret) goto error_put_reg; From 585ec8b505e68995bd54e9812e1ea68d196a4a2f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Jun 2016 13:53:34 +0100 Subject: [PATCH 124/178] iio:ad7266: Fix probe deferral for vref commit 68b356eb3d9f5e38910fb62e22a78e2a18d544ae upstream. Currently the ad7266 driver treats any failure to get vref as though the regulator were not present but this means that if probe deferral is triggered the driver will act as though the regulator were not present. Instead only use the internal reference if we explicitly got -ENODEV which is what is returned for absent regulators. Signed-off-by: Mark Brown Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Willy Tarreau --- drivers/iio/adc/ad7266.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index d08f4175938c..6569a4e2a436 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -417,6 +417,10 @@ static int ad7266_probe(struct spi_device *spi) st->vref_uv = ret; } else { + /* Any other error indicates that the regulator does exist */ + if (PTR_ERR(st->reg) != -ENODEV) + return PTR_ERR(st->reg); + /* Use internal reference */ st->vref_uv = 2500000; } From 074ed3fd042d2a8bebef38147ce4de3ac6933da3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 27 Jun 2016 14:12:34 -0700 Subject: [PATCH 125/178] tty/vt/keyboard: fix OOB access in do_compute_shiftstate() commit 510cccb5b0c8868a2b302a0ab524da7912da648b upstream. The size of individual keymap in drivers/tty/vt/keyboard.c is NR_KEYS, which is currently 256, whereas number of keys/buttons in input device (and therefor in key_down) is much larger - KEY_CNT - 768, and that can cause out-of-bound access when we do sym = U(key_maps[0][k]); with large 'k'. To fix it we should not attempt iterating beyond smaller of NR_KEYS and KEY_CNT. Also while at it let's switch to for_each_set_bit() instead of open-coding it. Reported-by: Sasha Levin Reviewed-by: Guenter Roeck Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Willy Tarreau --- drivers/tty/vt/keyboard.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index a9af1b9ae160..1f6e09649e5a 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -371,34 +371,22 @@ static void to_utf8(struct vc_data *vc, uint c) static void do_compute_shiftstate(void) { - unsigned int i, j, k, sym, val; + unsigned int k, sym, val; shift_state = 0; memset(shift_down, 0, sizeof(shift_down)); - for (i = 0; i < ARRAY_SIZE(key_down); i++) { - - if (!key_down[i]) + for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) { + sym = U(key_maps[0][k]); + if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK) continue; - k = i * BITS_PER_LONG; + val = KVAL(sym); + if (val == KVAL(K_CAPSSHIFT)) + val = KVAL(K_SHIFT); - for (j = 0; j < BITS_PER_LONG; j++, k++) { - - if (!test_bit(k, key_down)) - continue; - - sym = U(key_maps[0][k]); - if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK) - continue; - - val = KVAL(sym); - if (val == KVAL(K_CAPSSHIFT)) - val = KVAL(K_SHIFT); - - shift_down[val]++; - shift_state |= (1 << val); - } + shift_down[val]++; + shift_state |= BIT(val); } } From c3fdd7d30a36cb393b0a2c09bb693dfa6a3b04a5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Jun 2016 15:15:26 +0200 Subject: [PATCH 126/178] ALSA: dummy: Fix a use-after-free at closing commit d5dbbe6569481bf12dcbe3e12cff72c5f78d272c upstream. syzkaller fuzzer spotted a potential use-after-free case in snd-dummy driver when hrtimer is used as backend: > ================================================================== > BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr ffff88005e5b6f68 > Read of size 8 by task syz-executor/8984 > ============================================================================= > BUG kmalloc-192 (Not tainted): kasan: bad access detected > ----------------------------------------------------------------------------- > > Disabling lock debugging due to kernel taint > INFO: Allocated in 0xbbbbbbbbbbbbbbbb age=18446705582212484632 > .... > [< none >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464 > .... > INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1 > [< none >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481 > .... > Call Trace: > [] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:333 > [< inline >] rb_set_parent include/linux/rbtree_augmented.h:111 > [< inline >] __rb_erase_augmented include/linux/rbtree_augmented.h:218 > [] rb_erase+0x1b17/0x2010 lib/rbtree.c:427 > [] timerqueue_del+0x78/0x170 lib/timerqueue.c:86 > [] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903 > [< inline >] remove_hrtimer kernel/time/hrtimer.c:945 > [] hrtimer_try_to_cancel+0x22a/0x570 kernel/time/hrtimer.c:1046 > [] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066 > [] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417 > [] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507 > [] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106 > [] snd_pcm_action_single+0x76/0x120 sound/core/pcm_native.c:956 > [] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974 > [< inline >] snd_pcm_stop sound/core/pcm_native.c:1139 > [] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784 > [] snd_pcm_common_ioctl1+0xfae/0x2150 sound/core/pcm_native.c:2805 > [] snd_pcm_capture_ioctl1+0x2a1/0x5e0 sound/core/pcm_native.c:2976 > [] snd_pcm_kernel_ioctl+0x11c/0x160 sound/core/pcm_native.c:3020 > [] snd_pcm_oss_sync+0x3a4/0xa30 sound/core/oss/pcm_oss.c:1693 > [] snd_pcm_oss_release+0x1ad/0x280 sound/core/oss/pcm_oss.c:2483 > ..... A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which is called certainly before other blocking ops. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/drivers/dummy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 982a2c2faf24..7f400a1d42e4 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -422,6 +422,7 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream) static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) { + hrtimer_cancel(&dpcm->timer); tasklet_kill(&dpcm->tasklet); } From b753d79cc246a290c7b64d38091d47acfc577e0c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 29 Jun 2016 15:23:08 +0200 Subject: [PATCH 127/178] ALSA: au88x0: Fix calculation in vortex_wtdma_bufshift() commit 62db7152c924e4c060e42b34a69cd39658e8a0dc upstream. vortex_wtdma_bufshift() function does calculate the page index wrongly, first masking then shift, which always results in zero. The proper computation is to first shift, then mask. Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/pci/au88x0/au88x0_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index ae59dbaa53d9..42d4b13f1fa7 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -1442,9 +1442,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma) int page, p, pp, delta, i; page = - (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) & - WT_SUBBUF_MASK) - >> WT_SUBBUF_SHIFT; + (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) + >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK; if (dma->nr_periods >= 4) delta = (page - dma->period_real) & 3; else { From 34abd998cd244b5f53cfe2e9f26ab3c6289a362b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jul 2016 08:05:19 +0200 Subject: [PATCH 128/178] ALSA: ctl: Stop notification after disconnection commit f388cdcdd160687c6650833f286b9c89c50960ff upstream. snd_ctl_remove() has a notification for the removal event. It's superfluous when done during the device got disconnected. Although the notification itself is mostly harmless, it may potentially be harmful, and should be suppressed. Actually some components PCM may free ctl elements during the disconnect or free callbacks, thus it's no theoretical issue. This patch adds the check of card->shutdown flag for avoiding unnecessary notifications after (or during) the disconnect. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- sound/core/control.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/control.c b/sound/core/control.c index 3fcead61f0ef..251bc575f5c3 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -150,6 +150,8 @@ void snd_ctl_notify(struct snd_card *card, unsigned int mask, if (snd_BUG_ON(!card || !id)) return; + if (card->shutdown) + return; read_lock(&card->ctl_files_rwlock); #if defined(CONFIG_SND_MIXER_OSS) || defined(CONFIG_SND_MIXER_OSS_MODULE) card->mixer_oss_change_count++; From 77a13dd0d162a0f569d16c9210b585120f870a45 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:44:07 -0400 Subject: [PATCH 129/178] ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cec8f96e49d9be372fdb0c3836dcf31ec71e457e upstream. The stack object “tread” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 38742e826900..54ff806126a3 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1707,6 +1707,7 @@ static int snd_timer_user_params(struct file *file, if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) { if (tu->tread) { struct snd_timer_tread tread; + memset(&tread, 0, sizeof(tread)); tread.event = SNDRV_TIMER_EVENT_EARLY; tread.tstamp.tv_sec = 0; tread.tstamp.tv_nsec = 0; From 9bad15dba640ca854d97aec78e050f70a33d0508 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:44:20 -0400 Subject: [PATCH 130/178] ALSA: timer: Fix leak in events via snd_timer_user_ccallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9a47e9cff994f37f7f0dbd9ae23740d0f64f9fe6 upstream. The stack object “r1” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 54ff806126a3..7ba0709726b5 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1208,6 +1208,7 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri, tu->tstamp = *tstamp; if ((tu->filter & (1 << event)) == 0 || !tu->tread) return; + memset(&r1, 0, sizeof(r1)); r1.event = event; r1.tstamp = *tstamp; r1.val = resolution; From ed82c388d45df9dbc456864ff31da433c4566316 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 3 May 2016 16:44:32 -0400 Subject: [PATCH 131/178] ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e4ec8cc8039a7063e24204299b462bd1383184a5 upstream. The stack object “r1” has a total size of 32 bytes. Its field “event” and “val” both contain 4 bytes padding. These 8 bytes padding bytes are sent to user without being initialized. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 7ba0709726b5..3476895ee1fb 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1243,6 +1243,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, } if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) && tu->last_resolution != resolution) { + memset(&r1, 0, sizeof(r1)); r1.event = SNDRV_TIMER_EVENT_RESOLUTION; r1.tstamp = tstamp; r1.val = resolution; From ec032b74164fab3dabeceb37362f2a58227c51b8 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 7 Jun 2016 14:53:56 +0800 Subject: [PATCH 132/178] scsi: fix race between simultaneous decrements of ->host_failed commit 72d8c36ec364c82bf1bf0c64dfa1041cfaf139f7 upstream. sas_ata_strategy_handler() adds the works of the ata error handler to system_unbound_wq. This workqueue asynchronously runs work items, so the ata error handler will be performed concurrently on different CPUs. In this case, ->host_failed will be decreased simultaneously in scsi_eh_finish_cmd() on different CPUs, and become abnormal. It will lead to permanently inequality between ->host_failed and ->host_busy, and scsi error handler thread won't start running. IO errors after that won't be handled. Since all scmds must have been handled in the strategy handler, just remove the decrement in scsi_eh_finish_cmd() and zero ->host_busy after the strategy handler to fix this race. Fixes: 50824d6c5657 ("[SCSI] libsas: async ata-eh") Cc: stable@vger.kernel.org Signed-off-by: Wei Fang Reviewed-by: James Bottomley Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- Documentation/scsi/scsi_eh.txt | 8 ++++++-- drivers/ata/libata-eh.c | 2 +- drivers/scsi/scsi_error.c | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt index 6ff16b620d84..c08b62d63afa 100644 --- a/Documentation/scsi/scsi_eh.txt +++ b/Documentation/scsi/scsi_eh.txt @@ -255,19 +255,23 @@ scmd->allowed. 3. scmd recovered ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd - - shost->host_failed-- - clear scmd->eh_eflags - scsi_setup_cmd_retry() - move from local eh_work_q to local eh_done_q LOCKING: none + CONCURRENCY: at most one thread per separate eh_work_q to + keep queue manipulation lockless 4. EH completes ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper - layer of failure. + layer of failure. May be called concurrently but must have + a no more than one thread per separate eh_work_q to + manipulate the queue locklessly - scmd is removed from eh_done_q and scmd->eh_entry is cleared - if retry is necessary, scmd is requeued using scsi_queue_insert() - otherwise, scsi_finish_command() is invoked for scmd + - zero shost->host_failed LOCKING: queue or finish function performs appropriate locking diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 063036d876b0..126eb86f239f 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -604,7 +604,7 @@ void ata_scsi_error(struct Scsi_Host *host) ata_scsi_port_error_handler(host, ap); /* finish or retry handled scmd's and clean up */ - WARN_ON(host->host_failed || !list_empty(&eh_work_q)); + WARN_ON(!list_empty(&eh_work_q)); DPRINTK("EXIT\n"); } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 9acbc885239b..5ba69ea8eb92 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -898,7 +898,6 @@ static int scsi_request_sense(struct scsi_cmnd *scmd) */ void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) { - scmd->device->host->host_failed--; scmd->eh_eflags = 0; list_move_tail(&scmd->eh_entry, done_q); } @@ -1892,6 +1891,9 @@ int scsi_error_handler(void *data) else scsi_unjam_host(shost); + /* All scmds have been handled */ + shost->host_failed = 0; + /* * Note - if the above fails completely, the action is to take * individual devices offline and flush the queue of any From 75b319d68655ff8790f9c03f294c31d81de3b08c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 May 2014 16:51:03 +0200 Subject: [PATCH 133/178] scsi: remove scsi_end_request commit bc85dc500f9df9b2eec15077e5046672c46adeaa upstream. By folding scsi_end_request into its only caller we can significantly clean up the completion logic. We can use simple goto labels now to only have a single place to finish or requeue command there instead of the previous convoluted logic. Signed-off-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Reviewed-by: Mike Christie Reviewed-by: Hannes Reinecke [jwang: backport to 3.12] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/scsi/scsi_lib.c | 113 +++++++++++----------------------------- 1 file changed, 31 insertions(+), 82 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9f3168e8e5a8..dc969055d2c5 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -546,66 +546,6 @@ void scsi_run_host_queues(struct Scsi_Host *shost) static void __scsi_release_buffers(struct scsi_cmnd *, int); -/* - * Function: scsi_end_request() - * - * Purpose: Post-processing of completed commands (usually invoked at end - * of upper level post-processing and scsi_io_completion). - * - * Arguments: cmd - command that is complete. - * error - 0 if I/O indicates success, < 0 for I/O error. - * bytes - number of bytes of completed I/O - * requeue - indicates whether we should requeue leftovers. - * - * Lock status: Assumed that lock is not held upon entry. - * - * Returns: cmd if requeue required, NULL otherwise. - * - * Notes: This is called for block device requests in order to - * mark some number of sectors as complete. - * - * We are guaranteeing that the request queue will be goosed - * at some point during this call. - * Notes: If cmd was requeued, upon return it will be a stale pointer. - */ -static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error, - int bytes, int requeue) -{ - struct request_queue *q = cmd->device->request_queue; - struct request *req = cmd->request; - - /* - * If there are blocks left over at the end, set up the command - * to queue the remainder of them. - */ - if (blk_end_request(req, error, bytes)) { - /* kill remainder if no retrys */ - if (error && scsi_noretry_cmd(cmd)) - blk_end_request_all(req, error); - else { - if (requeue) { - /* - * Bleah. Leftovers again. Stick the - * leftovers in the front of the - * queue, and goose the queue again. - */ - scsi_release_buffers(cmd); - scsi_requeue_command(q, cmd); - cmd = NULL; - } - return cmd; - } - } - - /* - * This will goose the queue request function at the end, so we don't - * need to worry about launching another command. - */ - __scsi_release_buffers(cmd, 0); - scsi_next_command(cmd); - return NULL; -} - static inline unsigned int scsi_sgtable_index(unsigned short nents) { unsigned int index; @@ -735,16 +675,9 @@ static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) * * Returns: Nothing * - * Notes: This function is matched in terms of capabilities to - * the function that created the scatter-gather list. - * In other words, if there are no bounce buffers - * (the normal case for most drivers), we don't need - * the logic to deal with cleaning up afterwards. - * - * We must call scsi_end_request(). This will finish off - * the specified number of sectors. If we are done, the - * command block will be released and the queue function - * will be goosed. If we are not done then we have to + * Notes: We will finish off the specified number of sectors. If we + * are done, the command block will be released and the queue + * function will be goosed. If we are not done then we have to * figure out what to do next: * * a) We can call scsi_requeue_command(). The request @@ -753,7 +686,7 @@ static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) * be used if we made forward progress, or if we want * to switch from READ(10) to READ(6) for example. * - * b) We can call scsi_queue_insert(). The request will + * b) We can call __scsi_queue_insert(). The request will * be put back on the queue and retried using the same * command as before, possibly after a delay. * @@ -857,12 +790,25 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) } /* - * A number of bytes were successfully read. If there - * are leftovers and there is some kind of error - * (result != 0), retry the rest. + * If we finished all bytes in the request we are done now. */ - if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL) - return; + if (!blk_end_request(req, error, good_bytes)) + goto next_command; + + /* + * Kill remainder if no retrys. + */ + if (error && scsi_noretry_cmd(cmd)) { + blk_end_request_all(req, error); + goto next_command; + } + + /* + * If there had been no error, but we have leftover bytes in the + * requeues just queue the command up again. + */ + if (result == 0) + goto requeue; error = __scsi_error_from_host_byte(cmd, result); @@ -984,7 +930,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) switch (action) { case ACTION_FAIL: /* Give up and fail the remainder of the request */ - scsi_release_buffers(cmd); if (!(req->cmd_flags & REQ_QUIET)) { if (description) scmd_printk(KERN_INFO, cmd, "%s\n", @@ -994,12 +939,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_print_sense("", cmd); scsi_print_command(cmd); } - if (blk_end_request_err(req, error)) - scsi_requeue_command(q, cmd); - else - scsi_next_command(cmd); - break; + if (!blk_end_request_err(req, error)) + goto next_command; + /*FALLTHRU*/ case ACTION_REPREP: + requeue: /* Unprep the request and put it back at the head of the queue. * A new command will be prepared and issued. */ @@ -1015,6 +959,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, 0); break; } + return; + +next_command: + __scsi_release_buffers(cmd, 0); + scsi_next_command(cmd); } static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, From 655e0c067f0e02ece03fd0591dabe3db2ae27552 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 22 Jun 2016 20:12:05 -0500 Subject: [PATCH 134/178] Fix reconnect to not defer smb3 session reconnect long after socket reconnect commit 4fcd1813e6404dd4420c7d12fb483f9320f0bf93 upstream. Azure server blocks clients that open a socket and don't do anything on it. In our reconnect scenarios, we can reconnect the tcp session and detect the socket is available but we defer the negprot and SMB3 session setup and tree connect reconnection until the next i/o is requested, but this looks suspicous to some servers who expect SMB3 negprog and session setup soon after a socket is created. In the echo thread, reconnect SMB3 sessions and tree connections that are disconnected. A later patch will replay persistent (and resilient) handle opens. Signed-off-by: Steve French Acked-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- fs/cifs/connect.c | 4 +++- fs/cifs/smb2pdu.c | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d05a30072023..7c33afd7d5d3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -408,7 +408,9 @@ cifs_echo_request(struct work_struct *work) * server->ops->need_neg() == true. Also, no need to ping if * we got a response recently. */ - if (!server->ops->need_neg || server->ops->need_neg(server) || + + if (server->tcpStatus == CifsNeedReconnect || + server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index eb0de4c3ca76..9dd8c968d94e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1250,6 +1250,33 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); + if (server->tcpStatus == CifsNeedNegotiate) { + struct list_head *tmp, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + /* add check for persistent handle reconnect */ + if (tcon && tcon->need_reconnect) { + spin_unlock(&cifs_tcp_ses_lock); + rc = smb2_reconnect(SMB2_ECHO, tcon); + spin_lock(&cifs_tcp_ses_lock); + } + } + } + spin_unlock(&cifs_tcp_ses_lock); + } + + /* if no session, renegotiate failed above */ + if (server->tcpStatus == CifsNeedNegotiate) + return -EIO; + rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; From 12f63d9dba9e36a1a0176df42e53ae6167cdae09 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 8 Jul 2016 06:15:07 -0600 Subject: [PATCH 135/178] xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7 commit 6f2d9d99213514360034c6d52d2c3919290b3504 upstream. As of Xen 4.7 PV CPUID doesn't expose either of CPUID[1].ECX[7] and CPUID[0x80000007].EDX[7] anymore, causing the driver to fail to load on both Intel and AMD systems. Doing any kind of hardware capability checks in the driver as a prerequisite was wrong anyway: With the hypervisor being in charge, all such checking should be done by it. If ACPI data gets uploaded despite some missing capability, the hypervisor is free to ignore part or all of that data. Ditch the entire check_prereq() function, and do the only valid check (xen_initial_domain()) in the caller in its place. Signed-off-by: Jan Beulich Cc: Signed-off-by: David Vrabel Signed-off-by: Willy Tarreau --- drivers/xen/xen-acpi-processor.c | 35 +++----------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 8abd7d579037..2e4517277e80 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -426,36 +426,7 @@ upload: return 0; } -static int __init check_prereq(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - if (!xen_initial_domain()) - return -ENODEV; - - if (!acpi_gbl_FADT.smi_command) - return -ENODEV; - - if (c->x86_vendor == X86_VENDOR_INTEL) { - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - - return 0; - } - if (c->x86_vendor == X86_VENDOR_AMD) { - /* Copied from powernow-k8.h, can't include ../cpufreq/powernow - * as we get compile warnings for the static functions. - */ -#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 -#define USE_HW_PSTATE 0x00000080 - u32 eax, ebx, ecx, edx; - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE) - return -ENODEV; - return 0; - } - return -ENODEV; -} /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; @@ -511,10 +482,10 @@ static struct syscore_ops xap_syscore_ops = { static int __init xen_acpi_processor_init(void) { unsigned int i; - int rc = check_prereq(); + int rc; - if (rc) - return rc; + if (!xen_initial_domain()) + return -ENODEV; nr_acpi_bits = get_max_acpi_id() + 1; acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); From 884c6beca31ef639a7002f461efbe036c22006de Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Tue, 22 Jul 2014 16:50:57 +0200 Subject: [PATCH 136/178] s390/seccomp: fix error return for filtered system calls commit dc295880c6752076f8b94ba3885d0bfff09e3e82 upstream. The syscall_set_return_value function of s390 negates the error argument before storing the value to the return register gpr2. This is incorrect, the seccomp code already passes the negative error value. Store the unmodified error value to gpr2. Signed-off-by: Jan Willeke Signed-off-by: Martin Schwidefsky Signed-off-by: Willy Tarreau --- arch/s390/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index cd29d2f4e4f3..749313b452ae 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -54,7 +54,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->gprs[2] = error ? -error : val; + regs->gprs[2] = error ? error : val; } static inline void syscall_get_arguments(struct task_struct *task, From e1e78c062e8786e93e9d0e07bf42c618d9fd09c8 Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Fri, 24 Jun 2016 14:50:18 -0700 Subject: [PATCH 137/178] fs/nilfs2: fix potential underflow in call to crc32_le commit 63d2f95d63396059200c391ca87161897b99e74a upstream. The value `bytes' comes from the filesystem which is about to be mounted. We cannot trust that the value is always in the range we expect it to be. Check its value before using it to calculate the length for the crc32_le call. It value must be larger (or equal) sumoff + 4. This fixes a kernel bug when accidentially mounting an image file which had the nilfs2 magic value 0x3434 at the right offset 0x406 by chance. The bytes 0x01 0x00 were stored at 0x408 and were interpreted as a s_bytes value of 1. This caused an underflow when substracting sumoff + 4 (20) in the call to crc32_le. BUG: unable to handle kernel paging request at ffff88021e600000 IP: crc32_le+0x36/0x100 ... Call Trace: nilfs_valid_sb.part.5+0x52/0x60 [nilfs2] nilfs_load_super_block+0x142/0x300 [nilfs2] init_nilfs+0x60/0x390 [nilfs2] nilfs_mount+0x302/0x520 [nilfs2] mount_fs+0x38/0x160 vfs_kern_mount+0x67/0x110 do_mount+0x269/0xe00 SyS_mount+0x9f/0x100 entry_SYSCALL_64_fastpath+0x16/0x71 Link: http://lkml.kernel.org/r/1466778587-5184-2-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Torsten Hilbrich Tested-by: Torsten Hilbrich Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Willy Tarreau --- fs/nilfs2/the_nilfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 41e6a04a561f..0f9a5b4ad53b 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -431,7 +431,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) return 0; bytes = le16_to_cpu(sbp->s_bytes); - if (bytes > BLOCK_SIZE) + if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) return 0; crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, sumoff); From 6b03918b1d1df7d831a4a546d2b044258e641aa6 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 23 Jun 2016 11:00:39 +0300 Subject: [PATCH 138/178] arc: unwind: warn only once if DW2_UNWIND is disabled commit 9bd54517ee86cb164c734f72ea95aeba4804f10b upstream. If CONFIG_ARC_DW2_UNWIND is disabled every time arc_unwind_core() gets called following message gets printed in debug console: ----------------->8--------------- CONFIG_ARC_DW2_UNWIND needs to be enabled ----------------->8--------------- That message makes sense if user indeed wants to see a backtrace or get nice function call-graphs in perf but what if user disabled unwinder for the purpose? Why pollute his debug console? So instead we'll warn user about possibly missing feature once and let him decide if that was what he or she really wanted. Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Willy Tarreau --- arch/arc/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index ca0207b9d5b6..06997ad70725 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -131,7 +131,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, * prelogue is setup (callee regs saved and then fp set and not other * way around */ - pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); return 0; #endif From 655a7f19458bbdaa0143aace9e3f878295850b83 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 21 Jun 2016 14:26:36 -0400 Subject: [PATCH 139/178] xen/pciback: Fix conf_space read/write overlap check. commit 02ef871ecac290919ea0c783d05da7eedeffc10e upstream. Current overlap check is evaluating to false a case where a filter field is fully contained (proper subset) of a r/w request. This change applies classical overlap check instead to include all the scenarios. More specifically, for (Hilscher GmbH CIFX 50E-DP(M/S)) device driver the logic is such that the entire confspace is read and written in 4 byte chunks. In this case as an example, CACHE_LINE_SIZE, LATENCY_TIMER and PCI_BIST are arriving together in one call to xen_pcibk_config_write() with offset == 0xc and size == 4. With the exsisting overlap check the LATENCY_TIMER field (offset == 0xd, length == 1) is fully contained in the write request and hence is excluded from write, which is incorrect. Signed-off-by: Andrey Grodzovsky Reviewed-by: Boris Ostrovsky Reviewed-by: Jan Beulich Cc: Signed-off-by: David Vrabel Signed-off-by: Willy Tarreau --- drivers/xen/xen-pciback/conf_space.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 75fe3d466515..ba3fac8318bb 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if ((req_start >= field_start && req_start < field_end) - || (req_end > field_start && req_end <= field_end)) { + if (req_end > field_start && field_end > req_start) { err = conf_space_read(dev, cfg_entry, field_start, &tmp_val); if (err) @@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if ((req_start >= field_start && req_start < field_end) - || (req_end > field_start && req_end <= field_end)) { + if (req_end > field_start && field_end > req_start) { tmp_val = 0; err = xen_pcibk_config_read(dev, field_start, From 6e47981c293625dae096c6cccbd49e2c4071139d Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 Jul 2016 17:32:29 -0400 Subject: [PATCH 140/178] Revert "ecryptfs: forbid opening files without mmap handler" commit 78c4e172412de5d0456dc00d2b34050aa0b683b5 upstream. This reverts commit 2f36db71009304b3f0b95afacd8eba1f9f046b87. It fixed a local root exploit but also introduced a dependency on the lower file system implementing an mmap operation just to open a file, which is a bit of a heavy hammer. The right fix is to have mmap depend on the existence of the mmap handler instead. Signed-off-by: Jeff Mahoney Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- fs/ecryptfs/kthread.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 9b661a4ccee7..f1ea610362c6 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "ecryptfs_kernel.h" struct ecryptfs_open_req { @@ -148,7 +147,7 @@ int ecryptfs_privileged_open(struct file **lower_file, flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) - goto have_file; + goto out; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; @@ -166,16 +165,8 @@ int ecryptfs_privileged_open(struct file **lower_file, mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); wait_for_completion(&req.done); - if (IS_ERR(*lower_file)) { + if (IS_ERR(*lower_file)) rc = PTR_ERR(*lower_file); - goto out; - } -have_file: - if ((*lower_file)->f_op->mmap == NULL) { - fput(*lower_file); - *lower_file = NULL; - rc = -EMEDIUMTYPE; - } out: return rc; } From e5201134028bd04d5f0fb020a2579605b61a3078 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 Jul 2016 17:32:30 -0400 Subject: [PATCH 141/178] ecryptfs: don't allow mmap when the lower fs doesn't support it commit f0fe970df3838c202ef6c07a4c2b36838ef0a88b upstream. There are legitimate reasons to disallow mmap on certain files, notably in sysfs or procfs. We shouldn't emulate mmap support on file systems that don't offer support natively. CVE-2016-1583 Signed-off-by: Jeff Mahoney [tyhicks: clean up f_op check by using ecryptfs_file_to_lower()] Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- fs/ecryptfs/file.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9ff3664bb3ea..d4644cc938ba 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -183,6 +183,19 @@ out: return rc; } +static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + /* + * Don't allow mmap on top of file systems that don't support it + * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs + * allows recursive mounting, this will need to be extended. + */ + if (!lower_file->f_op->mmap) + return -ENODEV; + return generic_file_mmap(file, vma); +} + /** * ecryptfs_open * @inode: inode speciying file to open @@ -358,7 +371,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = ecryptfs_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, From 46d597e212135b8cd6d4752babcfad8f1669dbc1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Nov 2015 09:13:31 +0530 Subject: [PATCH 142/178] ARC: use ASL assembler mnemonic commit a6416f57ce57fb390b6ee30b12c01c29032a26af upstream. ARCompact and ARCv2 only have ASL, while binutils used to support LSL as a alias mnemonic. Newer binutils (upstream) don't want to do that so replace it. Signed-off-by: Vineet Gupta Signed-off-by: Willy Tarreau --- arch/arc/mm/tlbex.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 3357d26ffe54..74691e652a3a 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -219,7 +219,7 @@ ex_saved_reg1: #ifdef CONFIG_SMP sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with GET_CPU_ID r0 ; get to per cpu scratch mem, - lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu add r0, @ex_saved_reg1, r0 #else st r0, [@ex_saved_reg1] @@ -239,7 +239,7 @@ ex_saved_reg1: .macro TLBMISS_RESTORE_REGS #ifdef CONFIG_SMP GET_CPU_ID r0 ; get to per cpu scratch mem - lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide add r0, @ex_saved_reg1, r0 ld_s r3, [r0,12] ld_s r2, [r0, 8] From 80e90a31236a6e4748a25bb6b6e8812c424f408c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Jul 2016 14:07:16 +0200 Subject: [PATCH 143/178] qeth: delete napi struct when removing a qeth device commit 7831b4ff0d926e0deeaabef9db8800ed069a2757 upstream. A qeth_card contains a napi_struct linked to the net_device during device probing. This struct must be deleted when removing the qeth device, otherwise Panic on oops can occur when qeth devices are repeatedly removed and added. Fixes: a1c3ed4c9ca ("qeth: NAPI support for l2 and l3 discipline") Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Ursula Braun Tested-by: Alexander Klein Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/s390/net/qeth_l2_main.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index ec8ccdae7aba..0090de46aa5e 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -898,6 +898,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_l2_set_offline(cgdev); if (card->dev) { + netif_napi_del(&card->napi); unregister_netdev(card->dev); card->dev = NULL; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c1b0b2761f8d..7366bef742de 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3333,6 +3333,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_l3_set_offline(cgdev); if (card->dev) { + netif_napi_del(&card->napi); unregister_netdev(card->dev); card->dev = NULL; } From 389ca69d3d92b48340928b592b4d80948e284b4c Mon Sep 17 00:00:00 2001 From: Taras Kondratiuk Date: Wed, 13 Jul 2016 22:05:38 +0000 Subject: [PATCH 144/178] mmc: block: fix packed command header endianness commit f68381a70bb2b26c31b13fdaf67c778f92fd32b4 upstream. The code that fills packed command header assumes that CPU runs in little-endian mode. Hence the header is malformed in big-endian mode and causes MMC data transfer errors: [ 563.200828] mmcblk0: error -110 transferring data, sector 2048, nr 8, cmd response 0x900, card status 0xc40 [ 563.219647] mmcblk0: packed cmd failed, nr 2, sectors 16, failure index: -1 Convert header data to LE. Signed-off-by: Taras Kondratiuk Fixes: ce39f9d17c14 ("mmc: support packed write command for eMMC4.5 devices") Cc: Signed-off-by: Ulf Hansson Signed-off-by: Willy Tarreau --- drivers/mmc/card/block.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index c6bf23599eb9..a2863b7b9e21 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1582,8 +1582,8 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq, packed_cmd_hdr = packed->cmd_hdr; memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr)); - packed_cmd_hdr[0] = (packed->nr_entries << 16) | - (PACKED_CMD_WR << 8) | PACKED_CMD_VER; + packed_cmd_hdr[0] = cpu_to_le32((packed->nr_entries << 16) | + (PACKED_CMD_WR << 8) | PACKED_CMD_VER); hdr_blocks = mmc_large_sector(card) ? 8 : 1; /* @@ -1597,14 +1597,14 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq, ((brq->data.blocks * brq->data.blksz) >= card->ext_csd.data_tag_unit_size); /* Argument of CMD23 */ - packed_cmd_hdr[(i * 2)] = + packed_cmd_hdr[(i * 2)] = cpu_to_le32( (do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) | (do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) | - blk_rq_sectors(prq); + blk_rq_sectors(prq)); /* Argument of CMD18 or CMD25 */ - packed_cmd_hdr[((i * 2)) + 1] = + packed_cmd_hdr[((i * 2)) + 1] = cpu_to_le32( mmc_card_blockaddr(card) ? - blk_rq_pos(prq) : blk_rq_pos(prq) << 9; + blk_rq_pos(prq) : blk_rq_pos(prq) << 9); packed->blocks += blk_rq_sectors(prq); i++; } From 56a5db827f79ab8ed1161d1b2d6d14d2a4d60624 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Mon, 13 Jun 2016 15:44:19 +0200 Subject: [PATCH 145/178] can: at91_can: RX queue could get stuck at high bus load commit 43200a4480cbbe660309621817f54cbb93907108 upstream. At high bus load it could happen that "at91_poll()" enters with all RX message boxes filled up. If then at the end the "quota" is exceeded as well, "rx_next" will not be reset to the first RX mailbox and hence the interrupts remain disabled. Signed-off-by: Wolfgang Grandegger Tested-by: Amr Bekhit Cc: Signed-off-by: Marc Kleine-Budde Signed-off-by: Willy Tarreau --- drivers/net/can/at91_can.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 535d5dd8d816..024078c5fb16 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -731,9 +731,10 @@ static int at91_poll_rx(struct net_device *dev, int quota) /* upper group completed, look again in lower */ if (priv->rx_next > get_mb_rx_low_last(priv) && - quota > 0 && mb > get_mb_rx_last(priv)) { + mb > get_mb_rx_last(priv)) { priv->rx_next = get_mb_rx_first(priv); - goto again; + if (quota > 0) + goto again; } return received; From e5a30a7a2b6017130559f78a8765b8ac179b4310 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 21 Jun 2016 15:45:47 +0200 Subject: [PATCH 146/178] can: fix oops caused by wrong rtnl dellink usage commit 25e1ed6e64f52a692ba3191c4fde650aab3ecc07 upstream. For 'real' hardware CAN devices the netlink interface is used to set CAN specific communication parameters. Real CAN hardware can not be created nor removed with the ip tool ... This patch adds a private dellink function for the CAN device driver interface that does just nothing. It's a follow up to commit 993e6f2fd ("can: fix oops caused by wrong rtnl newlink usage") but for dellink. Reported-by: ajneu Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/net/can/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index f66aeb79abdf..464e5f66b66d 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -772,6 +772,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev, return -EOPNOTSUPP; } +static void can_dellink(struct net_device *dev, struct list_head *head) +{ + return; +} + static struct rtnl_link_ops can_link_ops __read_mostly = { .kind = "can", .maxtype = IFLA_CAN_MAX, @@ -779,6 +784,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = { .setup = can_setup, .newlink = can_newlink, .changelink = can_changelink, + .dellink = can_dellink, .get_size = can_get_size, .fill_info = can_fill_info, .get_xstats_size = can_get_xstats_size, From fbcf1c564cbeeaa584e7b871c4a1b8c65da78f30 Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 27 Jun 2016 09:09:40 -0500 Subject: [PATCH 147/178] ipr: Clear interrupt on croc/crocodile when running with LSI commit 54e430bbd490e18ab116afa4cd90dcc45787b3df upstream. If we fall back to using LSI on the Croc or Crocodile chip we need to clear the interrupt so we don't hang the system. Cc: Tested-by: Benjamin Herrenschmidt Signed-off-by: Brian King Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/ipr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 25ac2c00f8b3..2891faa8e384 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9607,6 +9607,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, ioa_cfg->intr_flag = IPR_USE_MSI; else { ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->clear_isr = 1; ioa_cfg->nvectors = 1; dev_info(&pdev->dev, "Cannot enable MSI.\n"); } From 72d4c3078653e81a9cc8419be6175f42f35bc463 Mon Sep 17 00:00:00 2001 From: Dmitri Epshtein Date: Wed, 6 Jul 2016 04:18:58 +0200 Subject: [PATCH 148/178] net: mvneta: set real interrupt per packet for tx_done commit 06708f81528725148473c0869d6af5f809c6824b upstream. Commit aebea2ba0f74 ("net: mvneta: fix Tx interrupt delay") intended to set coalescing threshold to a value guaranteeing interrupt generation per each sent packet, so that buffers can be released with no delay. In fact setting threshold to '1' was wrong, because it causes interrupt every two packets. According to the documentation a reason behind it is following - interrupt occurs once sent buffers counter reaches a value, which is higher than one specified in MVNETA_TXQ_SIZE_REG(q). This behavior was confirmed during tests. Also when testing the SoC working as a NAS device, better performance was observed with int-per-packet, as it strongly depends on the fact that all transmitted packets are released immediately. This commit enables NETA controller work in interrupt per sent packet mode by setting coalescing threshold to 0. Signed-off-by: Dmitri Epshtein Signed-off-by: Marcin Wojtas Cc: # v3.10+ Fixes aebea2ba0f74 ("net: mvneta: fix Tx interrupt delay") Acked-by: Willy Tarreau Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d5643c143bb8..df3af299a7d2 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -210,7 +210,7 @@ /* Various constants */ /* Coalescing */ -#define MVNETA_TXDONE_COAL_PKTS 1 +#define MVNETA_TXDONE_COAL_PKTS 0 /* interrupt per packet */ #define MVNETA_RX_COAL_PKTS 32 #define MVNETA_RX_COAL_USEC 100 From 1cf7c76efb3c0f5439d2c9461502f33817469371 Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:07 -0400 Subject: [PATCH 149/178] sctp: Prevent soft lockup when sctp_accept() is called during a timeout event commit 635682a14427d241bab7bbdeebb48a7d7b91638e upstream. A case can occur when sctp_accept() is called by the user during a heartbeat timeout event after the 4-way handshake. Since sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the bh_sock_lock in sctp_generate_heartbeat_event() will be taken with the listening socket but released with the new association socket. The result is a deadlock on any future attempts to take the listening socket lock. Note that this race can occur with other SCTP timeouts that take the bh_lock_sock() in the event sctp_accept() is called. BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0] ... RIP: 0010:[] [] _spin_lock+0x1e/0x30 RSP: 0018:ffff880028323b20 EFLAGS: 00000206 RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48 RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000 R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0 R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225 FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0) Stack: ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00 ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8 Call Trace: [] ? sctp_rcv+0x492/0xa10 [sctp] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? ip_local_deliver_finish+0xdd/0x2d0 [] ? ip_local_deliver+0x98/0xa0 [] ? ip_rcv_finish+0x12d/0x440 [] ? ip_rcv+0x275/0x350 [] ? __netif_receive_skb+0x4ab/0x750 ... With lockdep debugging: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- CslRx/12087 is trying to release lock (slock-AF_INET) at: [] sctp_generate_timeout_event+0x40/0xe0 [sctp] but there are no more locks to release! other info that might help us debug this: 2 locks held by CslRx/12087: #0: (&asoc->timers[i]){+.-...}, at: [] run_timer_softirq+0x16f/0x3e0 #1: (slock-AF_INET){+.-...}, at: [] sctp_generate_timeout_event+0x23/0xe0 [sctp] Ensure the socket taken is also the same one that is released by saving a copy of the socket before entering the timeout event critical section. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller [wt: adjusted, 3.10 uses sctp_bh_unlock_sock() instead of bh_lock_sock()] Signed-off-by: Willy Tarreau --- net/sctp/sm_sideeffect.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8aab894aeabe..730914cdb7a1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -251,12 +251,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - sctp_bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + sctp_bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); /* Try again later. */ @@ -279,10 +280,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + sctp_bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -292,11 +293,12 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - sctp_bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + sctp_bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n", __func__, timeout_type); @@ -320,10 +322,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + sctp_bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -373,10 +375,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - sctp_bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + sctp_bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); /* Try again later. */ @@ -397,10 +400,10 @@ void sctp_generate_heartbeat_event(unsigned long data) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + sctp_bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -411,10 +414,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - sctp_bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + sctp_bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); /* Try again later. */ @@ -435,7 +439,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + sctp_bh_unlock_sock(sk); sctp_association_put(asoc); } From d5a5d0bcb399e6b275a65226c3b35c2659ecd64c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Jan 2016 12:47:40 -0800 Subject: [PATCH 150/178] x86/mm: Improve switch_mm() barrier comments commit 4eaffdd5a5fe6ff9f95e1ab4de1ac904d5e0fa8b upstream. My previous comments were still a bit confusing and there was a typo. Fix it up. Reported-by: Peter Zijlstra Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Rik van Riel Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 71b3c126e611 ("x86/mm: Add barriers and document switch_mm()-vs-flush synchronization") Link: http://lkml.kernel.org/r/0a0b43cdcdd241c5faaaecfbcc91a155ddedc9a1.1452631609.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- arch/x86/include/asm/mmu_context.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c0d2f6b668ec..29a3d1b00ca9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -58,14 +58,16 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * be sent, and CPU 0's TLB will contain a stale entry.) * * The bad outcome can occur if either CPU's load is - * reordered before that CPU's store, so both CPUs much + * reordered before that CPU's store, so both CPUs must * execute full barriers to prevent this from happening. * * Thus, switch_mm needs a full barrier between the * store to mm_cpumask and any operation that could load - * from next->pgd. This barrier synchronizes with - * remote TLB flushers. Fortunately, load_cr3 is - * serializing and thus acts as a full barrier. + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. * */ load_cr3(next->pgd); @@ -96,9 +98,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. * - * As above, this is a barrier that forces - * TLB repopulation to be ordered after the - * store to mm_cpumask. + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); load_LDT_nolock(&next->context); From f7f15c543ba50623bd064ebdfc11de127a58d59e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Jul 2016 11:43:37 +0100 Subject: [PATCH 151/178] KEYS: 64-bit MIPS needs to use compat_sys_keyctl for 32-bit userspace commit 20f06ed9f61a185c6dabd662c310bed6189470df upstream. MIPS64 needs to use compat_sys_keyctl for 32-bit userspace rather than calling sys_keyctl. The latter will work in a lot of cases, thereby hiding the issue. Reported-by: Stephan Mueller Signed-off-by: David Howells cc: stable@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: keyrings@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13832/ Signed-off-by: Ralf Baechle Signed-off-by: Willy Tarreau --- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index cab150789c8d..b657fbefc466 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -349,7 +349,7 @@ EXPORT(sysn32_call_table) PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key PTR sys_request_key - PTR sys_keyctl /* 6245 */ + PTR compat_sys_keyctl /* 6245 */ PTR sys_set_thread_area PTR sys_inotify_init PTR sys_inotify_add_watch diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 37605dc8eef7..bf56d7e271dd 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -474,7 +474,7 @@ sys_call_table: PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ PTR sys_request_key - PTR sys_keyctl + PTR compat_sys_keyctl PTR sys_set_thread_area PTR sys_inotify_init PTR sys_inotify_add_watch /* 4285 */ From f8f3d2705f0b7aedb888f56520b121cc809205d7 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 13 May 2016 12:04:06 -0700 Subject: [PATCH 152/178] scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands commit a621bac3044ed6f7ec5fa0326491b2d4838bfa93 upstream. When SCSI was written, all commands coming from the filesystem (REQ_TYPE_FS commands) had data. This meant that our signal for needing to complete the command was the number of bytes completed being equal to the number of bytes in the request. Unfortunately, with the advent of flush barriers, we can now get zero length REQ_TYPE_FS commands, which confuse this logic because they satisfy the condition every time. This means they never get retried even for retryable conditions, like UNIT ATTENTION because we complete them early assuming they're done. Fix this by special casing the early completion condition to recognise zero length commands with errors and let them drop through to the retry code. Reported-by: Sebastian Parschauer Signed-off-by: James E.J. Bottomley Tested-by: Jack Wang Signed-off-by: Martin K. Petersen [ jwang: backport from upstream 4.7 to fix scsi resize issue ] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/scsi/scsi_lib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index dc969055d2c5..60031e15d562 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -790,9 +790,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) } /* - * If we finished all bytes in the request we are done now. + * special case: failed zero length commands always need to + * drop down into the retry code. Otherwise, if we finished + * all bytes in the request we are done now. */ - if (!blk_end_request(req, error, good_bytes)) + if (!(blk_rq_bytes(req) == 0 && error) && + !blk_end_request(req, error, good_bytes)) goto next_command; /* From 23cf0b7eeda4777d0bac40f05b3ce3c62e34c957 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 29 Jul 2016 10:40:31 +0200 Subject: [PATCH 153/178] block: fix use-after-free in seq file commit 77da160530dd1dc94f6ae15a981f24e5f0021e84 upstream. I got a KASAN report of use-after-free: ================================================================== BUG: KASAN: use-after-free in klist_iter_exit+0x61/0x70 at addr ffff8800b6581508 Read of size 8 by task trinity-c1/315 ============================================================================= BUG kmalloc-32 (Not tainted): kasan: bad access detected ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in disk_seqf_start+0x66/0x110 age=144 cpu=1 pid=315 ___slab_alloc+0x4f1/0x520 __slab_alloc.isra.58+0x56/0x80 kmem_cache_alloc_trace+0x260/0x2a0 disk_seqf_start+0x66/0x110 traverse+0x176/0x860 seq_read+0x7e3/0x11a0 proc_reg_read+0xbc/0x180 do_loop_readv_writev+0x134/0x210 do_readv_writev+0x565/0x660 vfs_readv+0x67/0xa0 do_preadv+0x126/0x170 SyS_preadv+0xc/0x10 do_syscall_64+0x1a1/0x460 return_from_SYSCALL_64+0x0/0x6a INFO: Freed in disk_seqf_stop+0x42/0x50 age=160 cpu=1 pid=315 __slab_free+0x17a/0x2c0 kfree+0x20a/0x220 disk_seqf_stop+0x42/0x50 traverse+0x3b5/0x860 seq_read+0x7e3/0x11a0 proc_reg_read+0xbc/0x180 do_loop_readv_writev+0x134/0x210 do_readv_writev+0x565/0x660 vfs_readv+0x67/0xa0 do_preadv+0x126/0x170 SyS_preadv+0xc/0x10 do_syscall_64+0x1a1/0x460 return_from_SYSCALL_64+0x0/0x6a CPU: 1 PID: 315 Comm: trinity-c1 Tainted: G B 4.7.0+ #62 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 ffffea0002d96000 ffff880119b9f918 ffffffff81d6ce81 ffff88011a804480 ffff8800b6581500 ffff880119b9f948 ffffffff8146c7bd ffff88011a804480 ffffea0002d96000 ffff8800b6581500 fffffffffffffff4 ffff880119b9f970 Call Trace: [] dump_stack+0x65/0x84 [] print_trailer+0x10d/0x1a0 [] object_err+0x2f/0x40 [] kasan_report_error+0x221/0x520 [] __asan_report_load8_noabort+0x3e/0x40 [] klist_iter_exit+0x61/0x70 [] class_dev_iter_exit+0x9/0x10 [] disk_seqf_stop+0x3a/0x50 [] seq_read+0x4b2/0x11a0 [] proc_reg_read+0xbc/0x180 [] do_loop_readv_writev+0x134/0x210 [] do_readv_writev+0x565/0x660 [] vfs_readv+0x67/0xa0 [] do_preadv+0x126/0x170 [] SyS_preadv+0xc/0x10 This problem can occur in the following situation: open() - pread() - .seq_start() - iter = kmalloc() // succeeds - seqf->private = iter - .seq_stop() - kfree(seqf->private) - pread() - .seq_start() - iter = kmalloc() // fails - .seq_stop() - class_dev_iter_exit(seqf->private) // boom! old pointer As the comment in disk_seqf_stop() says, stop is called even if start failed, so we need to reinitialise the private pointer to NULL when seq iteration stops. An alternative would be to set the private pointer to NULL when the kmalloc() in disk_seqf_start() fails. Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Acked-by: Tejun Heo Signed-off-by: Jens Axboe Signed-off-by: Willy Tarreau --- block/genhd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/genhd.c b/block/genhd.c index b09f5fc94dee..7af2f6a18d9b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -829,6 +829,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v) if (iter) { class_dev_iter_exit(iter); kfree(iter); + seqf->private = NULL; } } From 7f92e56587a6e45f5941351b4f54787066408f46 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 25 Jul 2016 21:17:04 +0800 Subject: [PATCH 154/178] fuse: fix wrong assignment of ->flags in fuse_send_init() commit 9446385f05c9af25fed53dbed3cc75763730be52 upstream. FUSE_HAS_IOCTL_DIR should be assigned to ->flags, it may be a typo. Signed-off-by: Wei Fang Signed-off-by: Miklos Szeredi Fixes: 69fe05c90ed5 ("fuse: add missing INIT flags") Cc: Signed-off-by: Willy Tarreau --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4d371f3b9a45..efe802e5bb3d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -913,7 +913,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; From 3787fb664c0ce85a6205e2e852dcc6a270ae23f9 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 23 Jul 2016 07:43:50 +0200 Subject: [PATCH 155/178] net/irda: fix NULL pointer dereference on memory allocation failure commit d3e6952cfb7ba5f4bfa29d4803ba91f96ce1204d upstream. I ran into this: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 2 PID: 2012 Comm: trinity-c3 Not tainted 4.7.0-rc7+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff8800b745f2c0 ti: ffff880111740000 task.ti: ffff880111740000 RIP: 0010:[] [] irttp_connect_request+0x36/0x710 RSP: 0018:ffff880111747bb8 EFLAGS: 00010286 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000069dd8358 RDX: 0000000000000009 RSI: 0000000000000027 RDI: 0000000000000048 RBP: ffff880111747c00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000069dd8358 R11: 1ffffffff0759723 R12: 0000000000000000 R13: ffff88011a7e4780 R14: 0000000000000027 R15: 0000000000000000 FS: 00007fc738404700(0000) GS:ffff88011af00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc737fdfb10 CR3: 0000000118087000 CR4: 00000000000006e0 Stack: 0000000000000200 ffff880111747bd8 ffffffff810ee611 ffff880119f1f220 ffff880119f1f4f8 ffff880119f1f4f0 ffff88011a7e4780 ffff880119f1f232 ffff880119f1f220 ffff880111747d58 ffffffff82bca542 0000000000000000 Call Trace: [] irda_connect+0x562/0x1190 [] SYSC_connect+0x202/0x2a0 [] SyS_connect+0x9/0x10 [] do_syscall_64+0x19c/0x410 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: 41 89 ca 48 89 e5 41 57 41 56 41 55 41 54 41 89 d7 53 48 89 fb 48 83 c7 48 48 89 fa 41 89 f6 48 c1 ea 03 48 83 ec 20 4c 8b 65 10 <0f> b6 04 02 84 c0 74 08 84 c0 0f 8e 4c 04 00 00 80 7b 48 00 74 RIP [] irttp_connect_request+0x36/0x710 RSP ---[ end trace 4cda2588bc055b30 ]--- The problem is that irda_open_tsap() can fail and leave self->tsap = NULL, and then irttp_connect_request() almost immediately dereferences it. Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- net/irda/af_irda.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index f8133ff5b081..c95bafa65f5b 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1039,8 +1039,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, } /* Check if we have opened a local TSAP */ - if (!self->tsap) - irda_open_tsap(self, LSAP_ANY, addr->sir_name); + if (!self->tsap) { + err = irda_open_tsap(self, LSAP_ANY, addr->sir_name); + if (err) + goto out; + } /* Move to connecting socket, start sending Connect Requests */ sock->state = SS_CONNECTING; From 68a444857c7965327f3a4ad79c0b0ddee4de2915 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Thu, 9 Jun 2016 11:02:04 +0530 Subject: [PATCH 156/178] gpio: pca953x: Fix NBANK calculation for PCA9536 commit a246b8198f776a16d1d3a3bbfc2d437bad766b29 upstream. NBANK() macro assumes that ngpios is a multiple of 8(BANK_SZ) and hence results in 0 banks for PCA9536 which has just 4 gpios. This is wrong as PCA9356 has 1 bank with 4 gpios. This results in uninitialized PCA953X_INVERT register. Fix this by using DIV_ROUND_UP macro in NBANK(). Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Signed-off-by: Linus Walleij Signed-off-by: Willy Tarreau --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 426c51dd420c..ac11e455aea5 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -75,7 +75,7 @@ MODULE_DEVICE_TABLE(i2c, pca953x_id); #define MAX_BANK 5 #define BANK_SZ 8 -#define NBANK(chip) (chip->gpio_chip.ngpio / BANK_SZ) +#define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ) struct pca953x_chip { unsigned gpio_start; From 21fc94d75e5e5241ceff5ec8182db1d40d5ccc05 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 13 Jun 2016 19:44:00 +0800 Subject: [PATCH 157/178] hp-wmi: Fix wifi cannot be hard-unblocked commit fc8a601e1175ae351f662506030f9939cb7fdbfe upstream. Several users reported wifi cannot be unblocked as discussed in [1]. This patch removes the use of the 2009 flag by BIOS but uses the actual WMI function calls - it will be skipped if WMI reports unsupported. [1] https://bugzilla.kernel.org/show_bug.cgi?id=69131 Signed-off-by: Alex Hung Tested-by: Evgenii Shatokhin Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- drivers/platform/x86/hp-wmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index d111c8687f9b..46497c6cbcc1 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -640,6 +640,11 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) if (err) return err; + err = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, &wireless, + sizeof(wireless), 0); + if (err) + return err; + if (wireless & 0x1) { wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev, RFKILL_TYPE_WLAN, From a249c24517c783f1541e1b213a4c5879cb222bf6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 3 May 2016 16:27:16 -0400 Subject: [PATCH 158/178] s5p-mfc: Set device name for reserved memory region devs commit 29debab0a94035a390801d1f177d171d014b7765 upstream. The devices don't have a name set, so makes dev_name() returns NULL which makes harder to identify the devices that are causing issues, for example: WARNING: CPU: 2 PID: 616 at drivers/base/core.c:251 device_release+0x8c/0x90 Device '(null)' does not have a release() function, it is broken and must be fixed. And after setting the device name: WARNING: CPU: 0 PID: 591 at drivers/base/core.c:251 device_release+0x8c/0x90 Device 's5p-mfc-l' does not have a release() function, it is broken and must be fixed. Cc: Fixes: 6e83e6e25eb4 ("[media] s5p-mfc: Fix kernel warning on memory init") Signed-off-by: Javier Martinez Canillas Tested-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Willy Tarreau --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 961d7ff75427..fe9162378753 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -1012,6 +1012,8 @@ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev) mfc_err("Not enough memory\n"); return -ENOMEM; } + + dev_set_name(dev->mem_dev_l, "%s", "s5p-mfc-l"); device_initialize(dev->mem_dev_l); of_property_read_u32_array(dev->plat_dev->dev.of_node, "samsung,mfc-l", mem_info, 2); @@ -1029,6 +1031,8 @@ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev) mfc_err("Not enough memory\n"); return -ENOMEM; } + + dev_set_name(dev->mem_dev_r, "%s", "s5p-mfc-r"); device_initialize(dev->mem_dev_r); of_property_read_u32_array(dev->plat_dev->dev.of_node, "samsung,mfc-r", mem_info, 2); From e092e4ff8df9e2b5ffe575c29fbb025f363d4925 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 3 May 2016 16:27:17 -0400 Subject: [PATCH 159/178] s5p-mfc: Add release callback for memory region devs commit 6311f1261f59ce5e51fbe5cc3b5e7737197316ac upstream. When s5p_mfc_remove() calls put_device() for the reserved memory region devs, the driver core warns that the dev doesn't have a release callback: WARNING: CPU: 0 PID: 591 at drivers/base/core.c:251 device_release+0x8c/0x90 Device 's5p-mfc-l' does not have a release() function, it is broken and must be fixed. Also, the declared DMA memory using dma_declare_coherent_memory() isn't relased so add a dev .release that calls dma_release_declared_memory(). Cc: Fixes: 6e83e6e25eb4 ("[media] s5p-mfc: Fix kernel warning on memory init") Signed-off-by: Javier Martinez Canillas Tested-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Willy Tarreau --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index fe9162378753..eb92027cef92 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -1000,6 +1000,11 @@ static int match_child(struct device *dev, void *data) return !strcmp(dev_name(dev), (char *)data); } +static void s5p_mfc_memdev_release(struct device *dev) +{ + dma_release_declared_memory(dev); +} + static void *mfc_get_drv_data(struct platform_device *pdev); static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev) @@ -1014,6 +1019,7 @@ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev) } dev_set_name(dev->mem_dev_l, "%s", "s5p-mfc-l"); + dev->mem_dev_l->release = s5p_mfc_memdev_release; device_initialize(dev->mem_dev_l); of_property_read_u32_array(dev->plat_dev->dev.of_node, "samsung,mfc-l", mem_info, 2); @@ -1033,6 +1039,7 @@ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev) } dev_set_name(dev->mem_dev_r, "%s", "s5p-mfc-r"); + dev->mem_dev_r->release = s5p_mfc_memdev_release; device_initialize(dev->mem_dev_r); of_property_read_u32_array(dev->plat_dev->dev.of_node, "samsung,mfc-r", mem_info, 2); From 9e10c1166f492b7979a6cf1b71e2ffa19c18e21c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C3=85=C2=82awi=C3=85=C2=84ski?= Date: Thu, 14 Jul 2016 10:50:23 +0200 Subject: [PATCH 160/178] Bluetooth: Fix l2cap_sock_setsockopt() with optname BT_RCVMTU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 23bc6ab0a0912146fd674a0becc758c3162baabc upstream. When we retrieve imtu value from userspace we should use 16 bit pointer cast instead of 32 as it's defined that way in headers. Fixes setsockopt calls on big-endian platforms. Signed-off-by: Amadeusz Sławiński Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5f36f70ce44d..4b966c6c0145 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -725,7 +725,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u32 __user *) optval)) { + if (get_user(opt, (u16 __user *) optval)) { err = -EFAULT; break; } From 60a8744f15d8bdcab6dc4f21728d8e9cb5e8880f Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 7 Jul 2016 21:28:27 +0100 Subject: [PATCH 161/178] cifs: Check for existing directory when opening file with O_CREAT commit 8d9535b6efd86e6c07da59f97e68f44efb7fe080 upstream. When opening a file with O_CREAT flag, check to see if the file opened is an existing directory. This prevents the directory from being opened which subsequently causes a crash when the close function for directories cifs_closedir() is called which frees up the file->private_data memory while the file is still listed on the open file list for the tcon. Signed-off-by: Sachin Prabhu Signed-off-by: Steve French CC: Stable Reported-by: Xiaoli Feng Signed-off-by: Willy Tarreau --- fs/cifs/dir.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 0c2425b21974..a998c929286f 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -227,6 +227,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, goto cifs_create_get_file_info; } + if (S_ISDIR(newinode->i_mode)) { + CIFSSMBClose(xid, tcon, fid->netfid); + iput(newinode); + rc = -EISDIR; + goto out; + } + if (!S_ISREG(newinode->i_mode)) { /* * The server may allow us to open things like @@ -391,10 +398,14 @@ cifs_create_set_dentry: if (rc != 0) { cifs_dbg(FYI, "Create worked, get_inode_info failed rc = %d\n", rc); - if (server->ops->close) - server->ops->close(xid, tcon, fid); - goto out; + goto out_err; } + + if (S_ISDIR(newinode->i_mode)) { + rc = -EISDIR; + goto out_err; + } + d_drop(direntry); d_add(direntry, newinode); @@ -402,6 +413,13 @@ out: kfree(buf); kfree(full_path); return rc; + +out_err: + if (server->ops->close) + server->ops->close(xid, tcon, fid); + if (newinode) + iput(newinode); + goto out; } int From fb85c52515a65aa9763c0c480b9bbbb9de6e08ca Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 6 Jun 2016 15:17:20 -0400 Subject: [PATCH 162/178] netlabel: add address family checks to netlbl_{sock,req}_delattr() commit 0e0e36774081534783aa8eeb9f6fbddf98d3c061 upstream. It seems risky to always rely on the caller to ensure the socket's address family is correct before passing it to the NetLabel kAPI, especially since we see at least one LSM which didn't. Add address family checks to the *_delattr() functions to help prevent future problems. Cc: Reported-by: Maninder Singh Signed-off-by: Paul Moore Signed-off-by: Willy Tarreau --- net/netlabel/netlabel_kapi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7c94aedd0912..5b1fbe45ff0b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -700,7 +700,11 @@ socket_setattr_return: */ void netlbl_sock_delattr(struct sock *sk) { - cipso_v4_sock_delattr(sk); + switch (sk->sk_family) { + case AF_INET: + cipso_v4_sock_delattr(sk); + break; + } } /** @@ -879,7 +883,11 @@ req_setattr_return: */ void netlbl_req_delattr(struct request_sock *req) { - cipso_v4_req_delattr(req); + switch (req->rsk_ops->family) { + case AF_INET: + cipso_v4_req_delattr(req); + break; + } } /** From 1c72390ed82894cc1c90f4ef581be6c7642f7df2 Mon Sep 17 00:00:00 2001 From: Konstantin Neumoin Date: Mon, 11 Jul 2016 15:28:59 +0300 Subject: [PATCH 163/178] balloon: check the number of available pages in leak balloon commit 37cf99e08c6fb4dcea0f9ad2b13b6daa8c76a711 upstream. The balloon has a special mechanism that is subscribed to the oom notification which leads to deflation for a fixed number of pages. The number is always fixed even when the balloon is fully deflated. But leak_balloon did not expect that the pages to deflate will be more than taken, and raise a "BUG" in balloon_page_dequeue when page list will be empty. So, the simplest solution would be to check that the number of releases pages is less or equal to the number taken pages. Cc: stable@vger.kernel.org Signed-off-by: Konstantin Neumoin Signed-off-by: Denis V. Lunev CC: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Willy Tarreau --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7d7add5ceba4..148e8ea1bc96 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -177,6 +177,8 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) num = min(num, ARRAY_SIZE(vb->pfns)); mutex_lock(&vb->balloon_lock); + /* We can't release more pages than taken */ + num = min(num, (size_t)vb->num_pages); for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { page = balloon_page_dequeue(vb_dev_info); From 3e6afa42b576122df172ad1acbd545b85a7d0c7c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 8 Jul 2016 12:18:50 -0700 Subject: [PATCH 164/178] ftrace/recordmcount: Work around for addition of metag magic but not relocations commit b2e1c26f0b62531636509fbcb6dab65617ed8331 upstream. glibc recently did a sync up (94e73c95d9b5 "elf.h: Sync with the gabi webpage") that added a #define for EM_METAG but did not add relocations This triggers build errors: scripts/recordmcount.c: In function 'do_file': scripts/recordmcount.c:466:28: error: 'R_METAG_ADDR32' undeclared (first use in this function) case EM_METAG: reltype = R_METAG_ADDR32; ^~~~~~~~~~~~~~ scripts/recordmcount.c:466:28: note: each undeclared identifier is reported only once for each function it appears in scripts/recordmcount.c:468:20: error: 'R_METAG_NONE' undeclared (first use in this function) rel_type_nop = R_METAG_NONE; ^~~~~~~~~~~~ Work around this change with some more #ifdefery for the relocations. Fedora Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1354034 Link: http://lkml.kernel.org/r/1468005530-14757-1-git-send-email-labbott@redhat.com Cc: stable@vger.kernel.org # v3.9+ Cc: James Hogan Fixes: 00512bdd4573 ("metag: ftrace support") Reported-by: Ross Burton Signed-off-by: Laura Abbott Signed-off-by: Steven Rostedt Signed-off-by: Willy Tarreau --- scripts/recordmcount.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index ee625e3a56ba..4f7d13da04a5 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -33,10 +33,17 @@ #include #include +/* + * glibc synced up and added the metag number but didn't add the relocations. + * Work around this in a crude manner for now. + */ #ifndef EM_METAG -/* Remove this when these make it to the standard system elf.h. */ #define EM_METAG 174 +#endif +#ifndef R_METAG_ADDR32 #define R_METAG_ADDR32 2 +#endif +#ifndef R_METAG_NONE #define R_METAG_NONE 3 #endif From 256dc4c87ab4727a0c9ef3461858ff8571883a6b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 4 Aug 2016 17:36:08 +0100 Subject: [PATCH 165/178] metag: Fix __cmpxchg_u32 asm constraint for CMP commit 6154c187b97ee7513046bb4eb317a89f738f13ef upstream. The LNKGET based atomic sequence in __cmpxchg_u32 has slightly incorrect constraints for the return value which under certain circumstances can allow an address unit register to be used as the first operand of a CMP instruction. This isn't a valid instruction however as the encodings only allow a data unit to be specified. This would result in an assembler error like the following: Error: failed to assemble instruction: "CMP A0.2,D0Ar6" Fix by changing the constraint from "=&da" (assigned, early clobbered, data or address unit register) to "=&d" (data unit register only). The constraint for the second operand, "bd" (an op2 register where op1 is a data unit register and the instruction supports O2R) is already correct assuming the first operand is a data unit register. Other cases of CMP in inline asm have had their constraints checked, and appear to all be fine. Fixes: 6006c0d8ce94 ("metag: Atomics, locks and bitops") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: # 3.9.x- Signed-off-by: Willy Tarreau --- arch/metag/include/asm/cmpxchg_lnkget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h index 0154e2807ebb..2369ad394876 100644 --- a/arch/metag/include/asm/cmpxchg_lnkget.h +++ b/arch/metag/include/asm/cmpxchg_lnkget.h @@ -73,7 +73,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old, " DCACHE [%2], %0\n" #endif "2:\n" - : "=&d" (temp), "=&da" (retval) + : "=&d" (temp), "=&d" (retval) : "da" (m), "bd" (old), "da" (new) : "cc" ); From 265a87bbbb06d815afdbda6c7f60d03c93393dd4 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 23 Jun 2016 19:30:38 +0200 Subject: [PATCH 166/178] ubi: Make volume resize power cut aware commit 4946784bd3924b1374f05eebff2fd68660bae866 upstream. When the volume resize operation shrinks a volume, LEBs will be unmapped. Since unmapping will not erase these LEBs immediately we have to wait for that operation to finish. Otherwise in case of a power cut right after writing the new volume table the UBI attach process can find more LEBs than the volume table knows. This will render the UBI image unattachable. Fix this issue by waiting for erase to complete and write the new volume table afterward. Cc: Reported-by: Boris Brezillon Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger Signed-off-by: Willy Tarreau --- drivers/mtd/ubi/vmt.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 8330703c098f..96131eb34c9f 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -534,13 +534,6 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_unlock(&ubi->volumes_lock); } - /* Change volume table record */ - vtbl_rec = ubi->vtbl[vol_id]; - vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); - err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); - if (err) - goto out_acc; - if (pebs < 0) { for (i = 0; i < -pebs; i++) { err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); @@ -558,6 +551,24 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_unlock(&ubi->volumes_lock); } + /* + * When we shrink a volume we have to flush all pending (erase) work. + * Otherwise it can happen that upon next attach UBI finds a LEB with + * lnum > highest_lnum and refuses to attach. + */ + if (pebs < 0) { + err = ubi_wl_flush(ubi, vol_id, UBI_ALL); + if (err) + goto out_acc; + } + + /* Change volume table record */ + vtbl_rec = ubi->vtbl[vol_id]; + vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_acc; + vol->reserved_pebs = reserved_pebs; if (vol->vol_type == UBI_DYNAMIC_VOLUME) { vol->used_ebs = reserved_pebs; From 12f567db822241090b90c5645ea9146f6cf8fa42 Mon Sep 17 00:00:00 2001 From: Iosif Harutyunov Date: Fri, 22 Jul 2016 23:22:42 +0000 Subject: [PATCH 167/178] ubi: Fix race condition between ubi device creation and udev commit 714fb87e8bc05ff78255afc0dca981e8c5242785 upstream. Install the UBI device object before we arm sysfs. Otherwise udev tries to read sysfs attributes before UBI is ready and udev rules will not match. Cc: Signed-off-by: Iosif Harutyunov [rw: massaged commit message] Signed-off-by: Richard Weinberger Signed-off-by: Willy Tarreau --- drivers/mtd/ubi/build.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index a56133585e92..03331c173bd0 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -997,6 +997,9 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, goto out_detach; } + /* Make device "available" before it becomes accessible via sysfs */ + ubi_devices[ubi_num] = ubi; + err = uif_init(ubi, &ref); if (err) goto out_detach; @@ -1041,7 +1044,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); - ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; @@ -1052,6 +1054,7 @@ out_uif: ubi_assert(ref); uif_close(ubi); out_detach: + ubi_devices[ubi_num] = NULL; ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); From 8de0e6a7186bb35a757db8936200715b6b98372e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 29 Jul 2016 13:19:55 -0400 Subject: [PATCH 168/178] dm flakey: error READ bios during the down_interval commit 99f3c90d0d85708e7401a81ce3314e50bf7f2819 upstream. When the corrupt_bio_byte feature was introduced it caused READ bios to no longer be errored with -EIO during the down_interval. This had to do with the complexity of needing to submit READs if the corrupt_bio_byte feature was used. Fix it so READ bios are properly errored with -EIO; doing so early in flakey_map() as long as there isn't a match for the corrupt_bio_byte feature. Fixes: a3998799fb4df ("dm flakey: add corrupt_bio_byte feature") Reported-by: Akira Hayakawa Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau --- drivers/md/dm-flakey.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 7fcf21cb4ff8..a9a47cd029d5 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -286,10 +286,16 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Map reads as normal. + * Map reads as normal only if corrupt_bio_byte set. */ - if (bio_data_dir(bio) == READ) - goto map_bio; + if (bio_data_dir(bio) == READ) { + /* If flags were specified, only corrupt those that match. */ + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) + goto map_bio; + else + return -EIO; + } /* * Drop writes? @@ -327,12 +333,13 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) /* * Corrupt successful READs while in down state. - * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && pb->bio_submitted && - (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); + if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { + if (fc->corrupt_bio_byte) + corrupt_bio_data(bio, fc); + else + return -EIO; + } return error; } From fa20cb40aa25df36c25f2b8b77c199d720b4a85d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 28 Apr 2016 09:24:01 +0930 Subject: [PATCH 169/178] module: Invalidate signatures on force-loaded modules commit bca014caaa6130e57f69b5bf527967aa8ee70fdd upstream. Signing a module should only make it trusted by the specific kernel it was built for, not anything else. Loading a signed module meant for a kernel with a different ABI could have interesting effects. Therefore, treat all signatures as invalid when a module is force-loaded. Signed-off-by: Ben Hutchings Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau --- kernel/module.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index f8a4f48b48a9..2c87e521032b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2475,13 +2475,18 @@ static inline void kmemleak_load_module(const struct module *mod, #endif #ifdef CONFIG_MODULE_SIG -static int module_sig_check(struct load_info *info) +static int module_sig_check(struct load_info *info, int flags) { int err = -ENOKEY; const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const void *mod = info->hdr; - if (info->len > markerlen && + /* + * Require flags == 0, as a module with version information + * removed is no longer the module that was signed + */ + if (flags == 0 && + info->len > markerlen && memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ info->len -= markerlen; @@ -2503,7 +2508,7 @@ static int module_sig_check(struct load_info *info) return err; } #else /* !CONFIG_MODULE_SIG */ -static int module_sig_check(struct load_info *info) +static int module_sig_check(struct load_info *info, int flags) { return 0; } @@ -3228,7 +3233,7 @@ static int load_module(struct load_info *info, const char __user *uargs, struct module *mod; long err; - err = module_sig_check(info); + err = module_sig_check(info, flags); if (err) goto free_copy; From de3cae1cb2404b224945ba432c7f5cd6c580593d Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Fri, 30 Oct 2015 12:22:58 -0600 Subject: [PATCH 170/178] be2iscsi: Fix bogus WARN_ON length check commit dd29dae00d39186890a5eaa2fe4ad8768bfd41a9 upstream. drivers/scsi/be2iscsi/be_main.c: In function 'be_sgl_create_contiguous': drivers/scsi/be2iscsi/be_main.c:3187:18: warning: logical not is only applied to the left hand side of comparison [-Wlogical-not-parentheses] WARN_ON(!length > 0); gcc version 5.2.1 Signed-off-by: Tim Gardner Cc: Jayamohan Kallickal Cc: Minh Tran Cc: John Soni Jose Cc: "James E.J. Bottomley" Reported-by: Joel Stanley Reviewed-by: Manoj Kumar Signed-off-by: Martin K. Petersen Signed-off-by: Willy Tarreau --- drivers/scsi/be2iscsi/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index a683a831527b..02278130826b 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -2978,7 +2978,7 @@ be_sgl_create_contiguous(void *virtual_address, { WARN_ON(!virtual_address); WARN_ON(!physical_address); - WARN_ON(!length > 0); + WARN_ON(!length); WARN_ON(!sgl); sgl->va = virtual_address; From f7ed93fef079f4e81e87893686b0f3159dd7e507 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 21 Aug 2016 15:55:52 +0200 Subject: [PATCH 171/178] squash mm: Export migrate_page_... : also make it non-static commit ce16887b69e94a8c0305e88c918989f8bc1bd6b7 upstream. Signed-off-by: Willy Tarreau --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 2ee28c2be6a5..808f8abb1b8f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -308,7 +308,7 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -static int migrate_page_move_mapping(struct address_space *mapping, +int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, struct buffer_head *head, enum migrate_mode mode) { From 2fa2c236c0b448ffe60b141c014654ef18e27dae Mon Sep 17 00:00:00 2001 From: James C Boyd Date: Wed, 27 May 2015 17:09:06 -0500 Subject: [PATCH 172/178] HID: hid-input: Add parentheses to quell gcc warning commit 09a5c34e8d6b05663ec4c3d22b1fbd9fec89aaf9 upstream. GCC reports a -Wlogical-not-parentheses warning here; therefore add parentheses to shut it up and to express our intent more. Signed-off-by: James C Boyd Signed-off-by: Jiri Kosina Signed-off-by: Willy Tarreau --- drivers/hid/hid-input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index a3915d12e746..eb5700e40e1a 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1084,7 +1084,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct return; /* report the usage code as scancode if the key status has changed */ - if (usage->type == EV_KEY && !!test_bit(usage->code, input->key) != value) + if (usage->type == EV_KEY && (!!test_bit(usage->code, input->key)) != value) input_event(input, EV_MSC, MSC_SCAN, usage->hid); input_event(input, usage->type, usage->code, value); From c5a8b0061e44ebdb48e082d498a635e31c1a4c01 Mon Sep 17 00:00:00 2001 From: Tomer Barletz Date: Sun, 2 Aug 2015 02:08:57 -0700 Subject: [PATCH 173/178] ALSA: oxygen: Fix logical-not-parentheses warning commit 8ec7cfce3762299ae289c384e281b2f4010ae231 upstream. This fixes the following warning, that is seen with gcc 5.1: warning: logical not is only applied to the left hand side of comparison [-Wlogical-not-parentheses]. Signed-off-by: Tomer Barletz Signed-off-by: Takashi Iwai Signed-off-by: Willy Tarreau --- sound/pci/oxygen/oxygen_mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/oxygen/oxygen_mixer.c b/sound/pci/oxygen/oxygen_mixer.c index c0dbb52d45be..1e4bcb900fc6 100644 --- a/sound/pci/oxygen/oxygen_mixer.c +++ b/sound/pci/oxygen/oxygen_mixer.c @@ -88,7 +88,7 @@ static int dac_mute_put(struct snd_kcontrol *ctl, int changed; mutex_lock(&chip->mutex); - changed = !value->value.integer.value[0] != chip->dac_mute; + changed = (!value->value.integer.value[0]) != chip->dac_mute; if (changed) { chip->dac_mute = !value->value.integer.value[0]; chip->model.update_dac_mute(chip); From 9771330fd5decfd1ff4e38fb45ea05bcce1592e5 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Wed, 14 Aug 2013 23:10:06 +0100 Subject: [PATCH 174/178] net: rfkill: Do not ignore errors from regulator_enable() commit dee08ab83d0378d922b67e7cf10bbec3e4ea343b upstream. Function regulator_enable() may return an error that has to be checked. This patch changes function rfkill_regulator_set_block() so that it checks for the return code. Also, rfkill_data->reg_enabled is set to 'true' only if there is no error. This fixes the following compilation warning: net/rfkill/rfkill-regulator.c:43:20: warning: ignoring return value of 'regulator_enable', declared with attribute warn_unused_result [-Wunused-result] Signed-off-by: Luis Henriques Signed-off-by: Johannes Berg Signed-off-by: Willy Tarreau --- net/rfkill/rfkill-regulator.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c index d11ac79246e4..cf5b145902e5 100644 --- a/net/rfkill/rfkill-regulator.c +++ b/net/rfkill/rfkill-regulator.c @@ -30,6 +30,7 @@ struct rfkill_regulator_data { static int rfkill_regulator_set_block(void *data, bool blocked) { struct rfkill_regulator_data *rfkill_data = data; + int ret = 0; pr_debug("%s: blocked: %d\n", __func__, blocked); @@ -40,15 +41,16 @@ static int rfkill_regulator_set_block(void *data, bool blocked) } } else { if (!rfkill_data->reg_enabled) { - regulator_enable(rfkill_data->vcc); - rfkill_data->reg_enabled = true; + ret = regulator_enable(rfkill_data->vcc); + if (!ret) + rfkill_data->reg_enabled = true; } } pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__, regulator_is_enabled(rfkill_data->vcc)); - return 0; + return ret; } static struct rfkill_ops rfkill_regulator_ops = { From d55534bf7e5796555a9359fd82872a07ba4ac87c Mon Sep 17 00:00:00 2001 From: Antonio Alecrim Jr Date: Sat, 14 Sep 2013 14:20:40 -0300 Subject: [PATCH 175/178] isdn: hfcpci_softirq: get func return to suppress compiler warning commit d6d6d1bc44362112e10a48d434e5b3c716152003 upstream. Signed-off-by: Antonio Alecrim Jr Signed-off-by: David S. Miller Signed-off-by: Willy Tarreau --- drivers/isdn/hardware/mISDN/hfcpci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index a7e4939787c9..eab9167937e2 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -2295,8 +2295,8 @@ _hfcpci_softirq(struct device *dev, void *arg) static void hfcpci_softirq(void *arg) { - (void) driver_for_each_device(&hfc_driver.driver, NULL, arg, - _hfcpci_softirq); + WARN_ON_ONCE(driver_for_each_device(&hfc_driver.driver, NULL, arg, + _hfcpci_softirq) != 0); /* if next event would be in the past ... */ if ((s32)(hfc_jiffies + tics - jiffies) <= 0) From 78308a4a651d1b6c4851f84d09ba0edb9803df97 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Tue, 25 Feb 2014 23:41:14 -0300 Subject: [PATCH 176/178] stb6100: fix buffer length check in stb6100_write_reg_range() commit 7e6bd12fb77b0067df13fb3ba3fadbdff2945396 upstream. We are checking sizeof() the wrong variable! Signed-off-by: Alexander Shiyan Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Willy Tarreau --- drivers/media/dvb-frontends/stb6100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c index cea175d19890..4ef8a5c7003e 100644 --- a/drivers/media/dvb-frontends/stb6100.c +++ b/drivers/media/dvb-frontends/stb6100.c @@ -193,7 +193,7 @@ static int stb6100_write_reg_range(struct stb6100_state *state, u8 buf[], int st .len = len + 1 }; - if (1 + len > sizeof(buf)) { + if (1 + len > sizeof(cmdbuf)) { printk(KERN_WARNING "%s: i2c wr: len=%d is too big!\n", KBUILD_MODNAME, len); From 51ace2618a8647a2031981b088a91635dc0d3e04 Mon Sep 17 00:00:00 2001 From: "dan.carpenter@oracle.com" Date: Sun, 9 Jun 2013 16:07:28 +0300 Subject: [PATCH 177/178] spi: spi-xilinx: cleanup a check in xilinx_spi_txrx_bufs() commit e33d085d11e54bc9fb07b2555cd104d8e7b3089b upstream. '!' has higher precedence than comparisons so the original condition is equivalent to "if (xspi->remaining_bytes == 0)". This makes the static checkers complain. xspi->remaining_bytes is signed and from looking at the code briefly, I think it might be able to go negative. I suspect that going negative may cause a bug, but I don't have the hardware and can't test. Signed-off-by: Dan Carpenter Signed-off-by: Mark Brown Signed-off-by: Willy Tarreau --- drivers/spi/spi-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index 34d18dcfa0db..109a535b639c 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -315,7 +315,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) } /* See if there is more data to send */ - if (!xspi->remaining_bytes > 0) + if (xspi->remaining_bytes <= 0) break; } From 2ecaf1d025af0f481d00b3701ffbcc600dcab076 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 28 Aug 2016 12:19:20 +0200 Subject: [PATCH 178/178] Linux 3.10.103 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 868093c16ae0..d3cb458b295a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 102 +SUBLEVEL = 103 EXTRAVERSION = NAME = TOSSUG Baby Fish